In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import pathlib
import sklearn
import time

from networkx.algorithms import bipartite
from networkx.algorithms.centrality import degree_centrality, subgraph_centrality
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
pd.__version__

'1.0.5'

In [2]:
import bokeh
from bokeh.io import output_file, show
from bokeh.models import (BoxZoomTool, Circle, HoverTool, Scatter, 
                          MultiLine, Plot, Range1d, ResetTool, 
                          ColumnDataSource, CustomJSTransform, LabelSet)
from bokeh.palettes import Spectral4, RdYlGn4
from bokeh.plotting import figure
from bokeh.transform import linear_cmap, factor_mark
from bokeh.models.graphs import from_networkx

bokeh.__version__

'2.1.1'

In [3]:
from pathlib import Path
Path.cwd()

PosixPath('/Users/dawnstaana/Documents/NUS/Year 4/NUS Fintech/Insurance')

In [4]:
from utils import (Bipartite, calculate_birank, 
preprocess, preprocess_new_claim, 
calculate_new_claim, get_subgraph, 
get_subgraph_regular, get_claim_features, get_fraud_features, make_graph)

In [5]:
data = pd.read_csv("df.csv", header=0, parse_dates=["LOSS_DATE"], infer_datetime_format=True).drop_duplicates()
data.head()

Unnamed: 0,months_as_customer,age,CUST_CODE,policy_bind_date,policy_state,policy_csl,policy_deductable,policy_annual_premium,umbrella_limit,insured_zip,...,auto_make,auto_model,auto_year,fraud_flag,age_car_incident,age_policy_incident,WORKSHOP_ID,CLAIM_HANDLER,CLAIM_NO,investigation_flag
0,328,48,customer_ 521585,17/10/14,OH,250/500,1000,1406.91,0,466132,...,Saab,92x,2004,1,11,1,workshop_ A,handler_ Frank,claim_ 1,1
1,228,42,customer_ 342868,27/6/06,IN,250/500,2000,1197.22,5000000,468176,...,Mercedes,E400,2007,1,8,9,workshop_ A,handler_ Frank,claim_ 2,1
2,134,29,customer_ 687698,9/6/00,OH,100/300,2000,1413.14,5000000,430632,...,Dodge,RAM,2007,0,8,15,workshop_ A,handler_ Frank,claim_ 3,0
3,256,41,customer_ 227811,25/5/90,IL,250/500,2000,1415.74,6000000,608117,...,Chevrolet,Tahoe,2014,1,1,25,workshop_ C,handler_ Harry,claim_ 4,1
4,228,44,customer_ 367455,6/6/14,IL,500/1000,1000,1583.91,6000000,610706,...,Accura,RSX,2009,0,6,1,workshop_ C,handler_ Harry,claim_ 5,0


In [6]:
relevant_cols = ["CLAIM_NO", "CUST_CODE", "CLAIM_HANDLER", "WORKSHOP_ID", "investigation_flag", "fraud_flag"]
df = data[relevant_cols]
df.head()
print(df.shape)
#test_claim_dict = {
    #'CLAIM_NO': ['claim_ 12345678'],
    #'CUST_CODE': ['customer_ 118333'],
    #'CLAIM_HANDLER': ['handler_ Harry'],
    #'WORKSHOP_ID': ['workshop_ 247'],
    #'investigation_flag': 1,
    #'fraud_flag': 1
#}
#test_claim = pd.DataFrame(test_claim_dict)
#test_claim

(1000, 6)


In [7]:
claims_df = df.copy()
#df = pd.concat([df, test_claim], axis=0).reset_index(drop=True)
print(df.shape)
df.tail()

(1000, 6)


Unnamed: 0,CLAIM_NO,CUST_CODE,CLAIM_HANDLER,WORKSHOP_ID,investigation_flag,fraud_flag
995,claim_ 996,customer_ 941851,handler_ Jeff,workshop_ E,0,0
996,claim_ 997,customer_ 186934,handler_ Jeff,workshop_ E,0,0
997,claim_ 998,customer_ 918516,handler_ Frank,workshop_ A,0,0
998,claim_ 999,customer_ 533940,handler_ Frank,workshop_ A,0,0
999,claim_ 1000,customer_ 556080,handler_ Frank,workshop_ A,0,0


In [8]:
policyholder_edgelist = df[['CLAIM_NO', 'CUST_CODE']].rename(columns={'CLAIM_NO':'Claims', 'CUST_CODE':'Parties'}).dropna()
expert_edgelist = df[['CLAIM_NO', 'CLAIM_HANDLER']].rename(columns={'CLAIM_NO':'Claims', 'CLAIM_HANDLER':'Parties'}).dropna()
garage_edgelist = df[['CLAIM_NO', 'WORKSHOP_ID']].rename(columns={'CLAIM_NO':'Claims', 'WORKSHOP_ID':'Parties'}).dropna()
edgelist_df = pd.concat([policyholder_edgelist, expert_edgelist, garage_edgelist], axis=0).sort_values(by='Claims').reset_index(drop=True)
edgelist_df = edgelist_df[['Parties', 'Claims']]
print(edgelist_df.shape)

(3000, 2)


In [9]:
policyholder_edgelist

Unnamed: 0,Claims,Parties
0,claim_ 1,customer_ 521585
1,claim_ 2,customer_ 342868
2,claim_ 3,customer_ 687698
3,claim_ 4,customer_ 227811
4,claim_ 5,customer_ 367455
...,...,...
995,claim_ 996,customer_ 941851
996,claim_ 997,customer_ 186934
997,claim_ 998,customer_ 918516
998,claim_ 999,customer_ 533940


In [10]:
edgelist_df

Unnamed: 0,Parties,Claims
0,customer_ 521585,claim_ 1
1,workshop_ A,claim_ 1
2,handler_ Frank,claim_ 1
3,workshop_ D,claim_ 10
4,handler_ Ivan,claim_ 10
...,...,...
2995,customer_ 918516,claim_ 998
2996,workshop_ A,claim_ 998
2997,workshop_ A,claim_ 999
2998,customer_ 533940,claim_ 999


In [11]:
mask = df['fraud_flag'] == 1
prior = df[mask]['CLAIM_NO'].values
prior

array(['claim_ 1', 'claim_ 2', 'claim_ 4', 'claim_ 6', 'claim_ 15',
       'claim_ 16', 'claim_ 23', 'claim_ 24', 'claim_ 26', 'claim_ 28',
       'claim_ 32', 'claim_ 36', 'claim_ 37', 'claim_ 40', 'claim_ 42',
       'claim_ 48', 'claim_ 61', 'claim_ 64', 'claim_ 65', 'claim_ 66',
       'claim_ 67', 'claim_ 71', 'claim_ 72', 'claim_ 80', 'claim_ 85',
       'claim_ 90', 'claim_ 92', 'claim_ 97', 'claim_ 98', 'claim_ 107',
       'claim_ 109', 'claim_ 110', 'claim_ 112', 'claim_ 116',
       'claim_ 118', 'claim_ 122', 'claim_ 123', 'claim_ 129',
       'claim_ 130', 'claim_ 136', 'claim_ 144', 'claim_ 146',
       'claim_ 147', 'claim_ 149', 'claim_ 150', 'claim_ 153',
       'claim_ 155', 'claim_ 156', 'claim_ 164', 'claim_ 167',
       'claim_ 172', 'claim_ 184', 'claim_ 186', 'claim_ 189',
       'claim_ 197', 'claim_ 207', 'claim_ 214', 'claim_ 215',
       'claim_ 216', 'claim_ 219', 'claim_ 221', 'claim_ 228',
       'claim_ 235', 'claim_ 238', 'claim_ 242', 'claim_ 246',
    

In [12]:
birank_df = calculate_birank(edgelist_df, prior)
print(birank_df.shape)
birank_df.tail()

No. of known prior fraudulent flags: 247.0 Length of prior_vector: 1000
(2010, 3)


Unnamed: 0,node,birank_score,birank_scaled
2005,claim_ 995,0.207558,0.032304
2006,claim_ 996,0.149816,0.016896
2007,claim_ 997,0.149816,0.016896
2008,claim_ 998,0.207558,0.032304
2009,claim_ 999,0.207558,0.032304


In [13]:
G, claims_attr, parties_attr = preprocess(claims_df, birank_df)

Name: 
Type: Graph
Number of nodes: 2010
Number of edges: 3000
Average degree:   2.9851
Graph is bipartite:  True


In [14]:
new_claim_dict = {
    'CLAIM_NO': ['claim_ 1234567'],
    'CUST_CODE': ['customer_ 773651'],
    'CLAIM_HANDLER': ['handler_ Harry'],
    'WORKSHOP_ID': ['workshop_ A']
}
new_claim = pd.DataFrame(new_claim_dict)
new_claim

Unnamed: 0,CLAIM_NO,CUST_CODE,CLAIM_HANDLER,WORKSHOP_ID
0,claim_ 1234567,customer_ 773651,handler_ Harry,workshop_ A


In [15]:
edgelist_df_with_new_claim, new_claim_edgelist_df = preprocess_new_claim(new_claim, G)

Name: 
Type: Graph
Number of nodes: 2012
Number of edges: 3003
Average degree:   2.9851
Graph Bipartite:  True


In [16]:
new_birank, new_df, birank_df, new_parties_attr, new_claims_attr = calculate_new_claim(claims_df, new_claim, edgelist_df_with_new_claim, new_claim_edgelist_df, parties_attr, claims_attr, prior)

No. of known prior fraudulent flags: 247.0 Length of prior_vector: 1001


In [17]:
G.nodes['claim_ 1234567']

{'bipartite': 1}

In [18]:
nx.set_node_attributes(G, new_claims_attr)
nx.set_node_attributes(G, new_parties_attr)

In [19]:
test_threshold = birank_df['birank_score'].quantile(0.995)
test_threshold

0.4181531773467503

In [20]:
subgraph_nodes, subgraph_edges = get_subgraph(new_claim, new_birank, new_claim_edgelist_df, new_df, birank_df, prior)
regular_nodes, regular_edges = get_subgraph_regular(new_birank, new_df, birank_df, test_threshold, prior)

In [21]:
subgraph = nx.Graph()
subgraph.add_nodes_from(subgraph_nodes)
subgraph.add_edges_from(subgraph_edges)
subgraph.add_nodes_from(regular_nodes)
subgraph.add_edges_from(regular_edges)
nx.set_node_attributes(subgraph, new_parties_attr)
nx.set_node_attributes(subgraph, new_claims_attr)
print(nx.info(subgraph))

Name: 
Type: Graph
Number of nodes: 193
Number of edges: 192
Average degree:   1.9896


In [22]:
make_graph(subgraph, new_birank)



In [183]:
from cookbook import (get_edgeweight, Bipartite,generate_n1_q1,generate_n1_med,generate_n1_max,generate_n2_q1,
                      generate_n2_med,generate_n2_max,generate_n1_size,generate_n2_size,generate_n2_ratioFraud,
                      generate_n2_ratioNonFraud,generate_n2_binFraud)

In [160]:
G.add_nodes_from(claims_attr)

In [161]:
G.add_nodes_from(parties_attr)

In [162]:
claims_attr

{'claim_ 1': {'fraudulent': 1,
  'birank_score': 0.4168603659756818,
  'birank': 0.08815,
  'type': 'Claim'},
 'claim_ 2': {'fraudulent': 1,
  'birank_score': 0.4168603659756818,
  'birank': 0.08815,
  'type': 'Claim'},
 'claim_ 3': {'fraudulent': 0,
  'birank_score': 0.20755804039428644,
  'birank': 0.0323,
  'type': 'Claim'},
 'claim_ 4': {'fraudulent': 1,
  'birank_score': 0.41948768294269495,
  'birank': 0.08886,
  'type': 'Claim'},
 'claim_ 5': {'fraudulent': 0,
  'birank_score': 0.21018535736129956,
  'birank': 0.03301,
  'type': 'Claim'},
 'claim_ 6': {'fraudulent': 1,
  'birank_score': 0.4168603659756818,
  'birank': 0.08815,
  'type': 'Claim'},
 'claim_ 7': {'fraudulent': 0,
  'birank_score': 0.20755804039428644,
  'birank': 0.0323,
  'type': 'Claim'},
 'claim_ 8': {'fraudulent': 0,
  'birank_score': 0.21018535736129956,
  'birank': 0.03301,
  'type': 'Claim'},
 'claim_ 9': {'fraudulent': 0,
  'birank_score': 0.14981632990865534,
  'birank': 0.0169,
  'type': 'Claim'},
 'claim

In [163]:
import numpy as np
import pandas as pd
import networkx as nx
import sklearn
import scipy
import scipy.sparse as spa
from networkx.algorithms import bipartite
import matplotlib.pyplot as plt
%matplotlib inline
np.set_printoptions(precision=3)
np.set_printoptions(threshold=999)

In [164]:
birank_df['node']

0       customer_ 100804
1       customer_ 101421
2       customer_ 104594
3       customer_ 106186
4       customer_ 106873
              ...       
2007          claim_ 606
2008          claim_ 559
2009          claim_ 674
2010          claim_ 383
2011      claim_ 1234567
Name: node, Length: 2012, dtype: object

In [165]:
epo = [u + (1,) for u in G.edges]

In [166]:
G.add_weighted_edges_from(epo)

In [167]:
for u, v, d in G.edges(data=True):
    d['weight'] = d['weight'] / np.sqrt(G.degree[u] * G.degree[v])
    print( (u, G.degree[u], v, G.degree[v]), d)

('claim_ 1', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 1', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 1', 3, 'customer_ 521585', 1) {'weight': 0.5773502691896258}
('claim_ 2', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 2', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 2', 3, 'customer_ 342868', 1) {'weight': 0.5773502691896258}
('claim_ 3', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 3', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 3', 3, 'customer_ 687698', 1) {'weight': 0.5773502691896258}
('claim_ 4', 3, 'workshop_ C', 79) {'weight': 0.06495698024616309}
('claim_ 4', 3, 'handler_ Harry', 80) {'weight': 0.06454972243679027}
('claim_ 4', 3, 'customer_ 227811', 1) {'weight': 0.5773502691896258}
('claim_ 5', 3, 'workshop_ C', 79) {'weight': 0.06495698024616309}
('claim_ 5', 3, 'handler_ Harry', 80) {'weight': 0.06454972243679027}
('claim_ 5', 3, 'customer_ 36

('claim_ 303', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 303', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 303', 3, 'customer_ 297816', 1) {'weight': 0.5773502691896258}
('claim_ 304', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 304', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 304', 3, 'customer_ 426708', 1) {'weight': 0.5773502691896258}
('claim_ 305', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 305', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 305', 3, 'customer_ 615047', 1) {'weight': 0.5773502691896258}
('claim_ 306', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 306', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 306', 3, 'customer_ 771236', 1) {'weight': 0.5773502691896258}
('claim_ 307', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 307', 3, 'handler_ Frank', 640) {'weight': 0.0228217732293819

('claim_ 638', 3, 'customer_ 808544', 1) {'weight': 0.5773502691896258}
('claim_ 639', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 639', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 639', 3, 'customer_ 409074', 1) {'weight': 0.5773502691896258}
('claim_ 640', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 640', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 640', 3, 'customer_ 824728', 1) {'weight': 0.5773502691896258}
('claim_ 641', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 641', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 641', 3, 'customer_ 606037', 1) {'weight': 0.5773502691896258}
('claim_ 642', 3, 'workshop_ A', 641) {'weight': 0.02280396458583536}
('claim_ 642', 3, 'handler_ Frank', 640) {'weight': 0.02282177322938192}
('claim_ 642', 3, 'customer_ 636843', 1) {'weight': 0.5773502691896258}
('claim_ 643', 3, 'workshop_ D', 100) {'weight': 0.05773502691896257

('claim_ 1234567', 3, 'customer_ 773651', 1) {'weight': 0.5773502691896258}


In [168]:
G.edges[('claim_ 1', 'workshop_ A')]

{'weight': 0.02280396458583536}

In [169]:
print(bipartite.is_bipartite(G))
c = bipartite.color(G)
nodes = list(bipartite.color(G).keys())
# https://stackoverflow.com/questions/13517614/draw-different-color-for-nodes-in-networkx-based-on-their-node-value
color_map = {
    0: 'lightgreen',
    1: 'lightblue'
}
values = [color_map.get(value) for value in c.values()]
values

True


['lightblue',
 'lightgreen',
 'lightgreen',
 'lightgreen',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'lightblue',
 'l

In [170]:
G.edges

EdgeView([('claim_ 1', 'workshop_ A'), ('claim_ 1', 'handler_ Frank'), ('claim_ 1', 'customer_ 521585'), ('claim_ 2', 'workshop_ A'), ('claim_ 2', 'handler_ Frank'), ('claim_ 2', 'customer_ 342868'), ('claim_ 3', 'workshop_ A'), ('claim_ 3', 'handler_ Frank'), ('claim_ 3', 'customer_ 687698'), ('claim_ 4', 'workshop_ C'), ('claim_ 4', 'handler_ Harry'), ('claim_ 4', 'customer_ 227811'), ('claim_ 5', 'workshop_ C'), ('claim_ 5', 'handler_ Harry'), ('claim_ 5', 'customer_ 367455'), ('claim_ 6', 'workshop_ A'), ('claim_ 6', 'handler_ Frank'), ('claim_ 6', 'customer_ 104594'), ('claim_ 7', 'workshop_ A'), ('claim_ 7', 'handler_ Frank'), ('claim_ 7', 'customer_ 413978'), ('claim_ 8', 'workshop_ C'), ('claim_ 8', 'handler_ Harry'), ('claim_ 8', 'customer_ 429027'), ('claim_ 9', 'workshop_ E'), ('claim_ 9', 'handler_ Jeff'), ('claim_ 9', 'customer_ 485665'), ('claim_ 10', 'workshop_ D'), ('claim_ 10', 'handler_ Ivan'), ('claim_ 10', 'customer_ 636550'), ('claim_ 11', 'workshop_ A'), ('claim_ 

In [171]:
edgelist_df['Weight'] = edgelist_df.apply(lambda x: get_edgeweight(G, x), axis=1)
edgelist_df

Unnamed: 0,Parties,Claims,Weight
0,customer_ 521585,claim_ 1,0.577350
1,workshop_ A,claim_ 1,0.022804
2,handler_ Frank,claim_ 1,0.022822
3,workshop_ D,claim_ 10,0.057735
4,handler_ Ivan,claim_ 10,0.057735
...,...,...,...
2995,customer_ 918516,claim_ 998,0.577350
2996,workshop_ A,claim_ 998,0.022804
2997,workshop_ A,claim_ 999,0.022804
2998,customer_ 533940,claim_ 999,0.577350


In [172]:
bn = Bipartite()
bn.set_edgelist(edgelist_df,  parties_col='Parties', claims_col='Claims')

parties_birank, claims_birank = bn.generate_birank(normalizer="BiRank", alpha=0.85, beta=1, prior=prior, max_iter=500, tol=1.0e-4, verbose=False)
claims_birank

No. of known prior fraudulent flags: 247.0 Length of prior_vector: 1000


Unnamed: 0,Claims,birank_score
0,claim_ 1,0.416860
1,claim_ 10,0.150232
2,claim_ 100,0.193077
3,claim_ 1000,0.207558
4,claim_ 101,0.210185
...,...,...
995,claim_ 995,0.207558
996,claim_ 996,0.149816
997,claim_ 997,0.149816
998,claim_ 998,0.207558


In [173]:
new_cols = {"Claims": "node", "Parties": "node"}
df = parties_birank.rename(columns=new_cols).append(claims_birank.rename(columns=new_cols)).reset_index(drop=True)
df

Unnamed: 0,node,birank_score
0,customer_ 521585,0.240675
1,workshop_ A,3.834058
2,handler_ Frank,3.834058
3,workshop_ D,1.096966
4,handler_ Ivan,1.096966
...,...,...
2005,claim_ 995,0.207558
2006,claim_ 996,0.149816
2007,claim_ 997,0.149816
2008,claim_ 998,0.207558


In [176]:
df['n1_q1'] = df['node'].apply(lambda x: generate_n1_q1(G, df, x))

In [181]:
df['n1_med'] = df['node'].apply(lambda x: generate_n1_med(G, df, x))
df['n1_max'] = df['node'].apply(lambda x: generate_n1_max(G, df, x))
df['n2_q1'] = df['node'].apply(lambda x: generate_n2_q1(G, df, x))
df['n2_med'] = df['node'].apply(lambda x: generate_n2_med(G, df, x))
df['n2_max'] = df['node'].apply(lambda x: generate_n2_max(G, df, x))
df['n1_size'] = df['node'].apply(lambda x: generate_n1_size(G, df, x))
df['n2_size'] = df['node'].apply(lambda x: generate_n2_size(G, df, x))
df

Unnamed: 0,node,birank_score,n1_q1,n1_med,n1_max,n2_q1,n2_med,n2_max,n1_size,n2_size
0,customer_ 521585,0.240675,0.416860,0.416860,0.416860,3.834058,3.834058,3.834058,1,2
1,workshop_ A,3.834058,0.207558,0.207558,0.416860,0.119834,0.119834,3.834058,641,643
2,handler_ Frank,3.834058,0.207558,0.207558,0.416860,0.119834,0.119834,3.834058,640,641
3,workshop_ D,1.096966,0.150232,0.150232,0.359535,0.086737,0.086737,1.096966,100,101
4,handler_ Ivan,1.096966,0.150232,0.150232,0.359535,0.086737,0.086737,1.096966,100,101
...,...,...,...,...,...,...,...,...,...,...
2005,claim_ 995,0.207558,1.976946,3.834058,3.834058,0.207558,0.207558,0.416860,3,640
2006,claim_ 996,0.149816,0.576362,1.066228,1.066228,0.149816,0.149816,0.359119,3,94
2007,claim_ 997,0.149816,0.576362,1.066228,1.066228,0.149816,0.149816,0.359119,3,94
2008,claim_ 998,0.207558,1.976946,3.834058,3.834058,0.207558,0.207558,0.416860,3,640


In [186]:
df.to_csv('fraudscore.csv',index=False)