In [48]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pylab import rcParams

# 1. Pre-processing the datasets

In [49]:
# Loading all the training datasets 

Train_Beneficiary = pd.read_csv('/Users/suborna/Github/Capstone_Project/Data/Train_Beneficiary.csv')
Train_Inpatient = pd.read_csv('/Users/suborna/Github/Capstone_Project/Data/Train_Inpatient.csv')
Train_Outpatient = pd.read_csv('/Users/suborna/Github/Capstone_Project/Data/Train_Outpatient.csv')
Fraud_label = pd.read_csv('/Users/suborna/Github/Capstone_Project/Data/Train.csv')

In [50]:
Train_Beneficiary.shape # number of rows & columns

(138556, 25)

In [51]:
Train_Inpatient.shape # number of rows & columns

(40474, 30)

In [52]:
Train_Outpatient.shape # number of rows & columns

(517737, 27)

In [53]:
Test_label.shape # number of rows & columns

(5410, 2)

In [54]:
pd.set_option('display.max_columns', 100) # shows all the columns

### Columns dropped

In [55]:
Train_Beneficiary = Train_Beneficiary.drop(['DOD'], axis = 1)
Train_Inpatient = Train_Inpatient.drop(['ClmProcedureCode_6'], axis = 1) # dropping because 100% data as NAs
Train_Outpatient = Train_Outpatient.drop(['ClmProcedureCode_5', 'ClmProcedureCode_6'], axis = 1) # dropping because 100% data as NAs

### Checking for Missing Values

In [56]:
Train_Beneficiary.isnull().sum() # row = 138556

BeneID                             0
DOB                                0
Gender                             0
Race                               0
RenalDiseaseIndicator              0
State                              0
County                             0
NoOfMonths_PartACov                0
NoOfMonths_PartBCov                0
ChronicCond_Alzheimer              0
ChronicCond_Heartfailure           0
ChronicCond_KidneyDisease          0
ChronicCond_Cancer                 0
ChronicCond_ObstrPulmonary         0
ChronicCond_Depression             0
ChronicCond_Diabetes               0
ChronicCond_IschemicHeart          0
ChronicCond_Osteoporasis           0
ChronicCond_rheumatoidarthritis    0
ChronicCond_stroke                 0
IPAnnualReimbursementAmt           0
IPAnnualDeductibleAmt              0
OPAnnualReimbursementAmt           0
OPAnnualDeductibleAmt              0
dtype: int64

In [57]:
Train_Inpatient.isnull().sum() # row = 40474

BeneID                        0
ClaimID                       0
ClaimStartDt                  0
ClaimEndDt                    0
Provider                      0
InscClaimAmtReimbursed        0
AttendingPhysician          112
OperatingPhysician        16644
OtherPhysician            35784
AdmissionDt                   0
ClmAdmitDiagnosisCode         0
DeductibleAmtPaid           899
DischargeDt                   0
DiagnosisGroupCode            0
ClmDiagnosisCode_1            0
ClmDiagnosisCode_2          226
ClmDiagnosisCode_3          676
ClmDiagnosisCode_4         1534
ClmDiagnosisCode_5         2894
ClmDiagnosisCode_6         4838
ClmDiagnosisCode_7         7258
ClmDiagnosisCode_8         9942
ClmDiagnosisCode_9        13497
ClmDiagnosisCode_10       36547
ClmProcedureCode_1        17326
ClmProcedureCode_2        35020
ClmProcedureCode_3        39509
ClmProcedureCode_4        40358
ClmProcedureCode_5        40465
dtype: int64

In [58]:
Train_Outpatient.isnull().sum() # row = 517737

BeneID                         0
ClaimID                        0
ClaimStartDt                   0
ClaimEndDt                     0
Provider                       0
InscClaimAmtReimbursed         0
AttendingPhysician          1396
OperatingPhysician        427120
OtherPhysician            322691
ClmDiagnosisCode_1         10453
ClmDiagnosisCode_2        195380
ClmDiagnosisCode_3        314480
ClmDiagnosisCode_4        392141
ClmDiagnosisCode_5        443393
ClmDiagnosisCode_6        468981
ClmDiagnosisCode_7        484776
ClmDiagnosisCode_8        494825
ClmDiagnosisCode_9        502899
ClmDiagnosisCode_10       516654
ClmProcedureCode_1        517575
ClmProcedureCode_2        517701
ClmProcedureCode_3        517733
ClmProcedureCode_4        517735
DeductibleAmtPaid              0
ClmAdmitDiagnosisCode     412312
dtype: int64

### Checking rows that has missing values for all 3 physician columns

In [59]:
Train_Outpatient.loc[(Train_Outpatient.AttendingPhysician.isnull() == True) & \
                     (Train_Outpatient.OperatingPhysician.isnull() == True) & ((Train_Outpatient.OtherPhysician.isnull() == True))].head()

Unnamed: 0,BeneID,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,ClmDiagnosisCode_1,ClmDiagnosisCode_2,ClmDiagnosisCode_3,ClmDiagnosisCode_4,ClmDiagnosisCode_5,ClmDiagnosisCode_6,ClmDiagnosisCode_7,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,DeductibleAmtPaid,ClmAdmitDiagnosisCode
941,BENE11270,CLM390618,2009-06-01,2009-06-01,PRV54885,200,,,,43391,V5861,,,,,,,,,,,,,0,43491
1674,BENE11483,CLM488391,2009-07-25,2009-08-02,PRV52071,50,,,,1740,73399,53081.0,73390.0,1629.0,,,,,,,,,,0,
2531,BENE11718,CLM388753,2009-05-31,2009-05-31,PRV56562,30,,,,,,,,,,,,,,,,,,0,7862
2540,BENE11721,CLM352199,2009-05-11,2009-05-11,PRV56016,50,,,,V289,,,,,,,,,,,,,,0,V7612
2569,BENE11727,CLM220164,2009-02-28,2009-02-28,PRV51021,200,,,,5184,,,,,,,,,,,,,,0,78609


In [60]:
Train_Inpatient.loc[(Train_Inpatient.AttendingPhysician.isnull() == True) & \
                    (Train_Inpatient.OperatingPhysician.isnull() == True) & ((Train_Inpatient.OtherPhysician.isnull() == True))].head()

Unnamed: 0,BeneID,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,AdmissionDt,ClmAdmitDiagnosisCode,DeductibleAmtPaid,DischargeDt,DiagnosisGroupCode,ClmDiagnosisCode_1,ClmDiagnosisCode_2,ClmDiagnosisCode_3,ClmDiagnosisCode_4,ClmDiagnosisCode_5,ClmDiagnosisCode_6,ClmDiagnosisCode_7,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,ClmProcedureCode_5
147,BENE11546,CLM47949,2009-04-22,2009-04-24,PRV56253,9000,,,,2009-04-22,5762,1068.0,2009-04-24,421,1977,56723,5849,56211,7824,2724,2720,53010,5990.0,,,,,,
855,BENE14117,CLM65737,2009-08-28,2009-08-31,PRV54894,10000,,,,2009-08-28,5939,1068.0,2009-08-31,672,25040,78791,40390,78904,41401,4280,59080,5789,,,,,,,
868,BENE14147,CLM47613,2009-04-19,2009-04-25,PRV54561,300,,,,2009-04-19,79902,1068.0,2009-04-25,261,4241,2768,4019,40390,42820,92401,60000,53081,,,,,,,
1335,BENE15713,CLM62650,2009-08-05,2009-08-22,PRV54986,19000,,,,2009-08-05,55010,,2009-08-22,370,2113,7804,56089,1962,2851,1888,41401,5609,5119.0,,4573.0,,,,
2152,BENE18544,CLM34474,2009-01-19,2009-01-25,PRV52021,3000,,,,2009-01-19,5770,1068.0,2009-01-25,433,5770,56210,3051,9992,79439,7856,57140,2859,,,3722.0,7994.0,4414.0,,


### Checking # of unique values in physicians columns (There are 3 types)

In [61]:
Train_Outpatient.AttendingPhysician.nunique()

74109

In [62]:
Train_Outpatient.OperatingPhysician.nunique()

28532

In [63]:
Train_Outpatient.OtherPhysician.nunique()

44388

### Replacing NAs with 0s

In [64]:
Train_Inpatient[['DeductibleAmtPaid', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2', 'ClmDiagnosisCode_3',
       'ClmDiagnosisCode_4', 'ClmDiagnosisCode_5', 'ClmDiagnosisCode_6',
       'ClmDiagnosisCode_7', 'ClmDiagnosisCode_8', 'ClmDiagnosisCode_9',
       'ClmDiagnosisCode_10', 'ClmProcedureCode_1', 'ClmProcedureCode_2',
       'ClmProcedureCode_3', 'ClmProcedureCode_4', 'ClmProcedureCode_5']] = Train_Inpatient[[\
                                                                                             'DeductibleAmtPaid', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2', 'ClmDiagnosisCode_3',
       'ClmDiagnosisCode_4', 'ClmDiagnosisCode_5', 'ClmDiagnosisCode_6',
       'ClmDiagnosisCode_7', 'ClmDiagnosisCode_8', 'ClmDiagnosisCode_9',
       'ClmDiagnosisCode_10', 'ClmProcedureCode_1', 'ClmProcedureCode_2',
       'ClmProcedureCode_3', 'ClmProcedureCode_4', 'ClmProcedureCode_5']].fillna(0)

Train_Outpatient[['ClmAdmitDiagnosisCode', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2', 'ClmDiagnosisCode_3',
       'ClmDiagnosisCode_4', 'ClmDiagnosisCode_5', 'ClmDiagnosisCode_6',
       'ClmDiagnosisCode_7', 'ClmDiagnosisCode_8', 'ClmDiagnosisCode_9',
       'ClmDiagnosisCode_10', 'ClmProcedureCode_1', 'ClmProcedureCode_2',
       'ClmProcedureCode_3', 'ClmProcedureCode_4']] = Train_Outpatient[['ClmAdmitDiagnosisCode', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2', 'ClmDiagnosisCode_3',
       'ClmDiagnosisCode_4', 'ClmDiagnosisCode_5', 'ClmDiagnosisCode_6',
       'ClmDiagnosisCode_7', 'ClmDiagnosisCode_8', 'ClmDiagnosisCode_9',
       'ClmDiagnosisCode_10', 'ClmProcedureCode_1', 'ClmProcedureCode_2',
       'ClmProcedureCode_3', 'ClmProcedureCode_4']].fillna(0)

Train_Inpatient[['AttendingPhysician', 'OperatingPhysician', 'OtherPhysician']] = \
Train_Inpatient[['AttendingPhysician', 'OperatingPhysician', 'OtherPhysician']].fillna('None')

Train_Outpatient[['AttendingPhysician', 'OperatingPhysician', 'OtherPhysician']] = \
Train_Outpatient[['AttendingPhysician', 'OperatingPhysician', 'OtherPhysician']].fillna('None')

### Merging Beneficiary & Fraud label data with inpatient & outpatient data

In [65]:
Merged_Inpatient = pd.merge(Train_Beneficiary, Train_Inpatient, how = 'inner', on = 'BeneID')
Merged_Inpatient = pd.merge(Merged_Inpatient, Test_label, how = 'inner', on = 'Provider')
Merged_Outpatient = pd.merge(Train_Beneficiary, Train_Outpatient, how = 'inner', on = 'BeneID')
Merged_Outpatient = pd.merge(Merged_Outpatient, Test_label, how = 'inner', on = 'Provider')

In [66]:
Merged_Inpatient.head() # showing 1st 5 rows of the dataset

Unnamed: 0,BeneID,DOB,Gender,Race,RenalDiseaseIndicator,State,County,NoOfMonths_PartACov,NoOfMonths_PartBCov,ChronicCond_Alzheimer,ChronicCond_Heartfailure,ChronicCond_KidneyDisease,ChronicCond_Cancer,ChronicCond_ObstrPulmonary,ChronicCond_Depression,ChronicCond_Diabetes,ChronicCond_IschemicHeart,ChronicCond_Osteoporasis,ChronicCond_rheumatoidarthritis,ChronicCond_stroke,IPAnnualReimbursementAmt,IPAnnualDeductibleAmt,OPAnnualReimbursementAmt,OPAnnualDeductibleAmt,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,AdmissionDt,ClmAdmitDiagnosisCode,DeductibleAmtPaid,DischargeDt,DiagnosisGroupCode,ClmDiagnosisCode_1,ClmDiagnosisCode_2,ClmDiagnosisCode_3,ClmDiagnosisCode_4,ClmDiagnosisCode_5,ClmDiagnosisCode_6,ClmDiagnosisCode_7,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,ClmProcedureCode_5,PotentialFraud
0,BENE11001,1943-01-01,1,1,0,39,230,12,12,1,2,1,2,2,1,1,1,2,1,1,36000,3204,60,70,CLM46614,2009-04-12,2009-04-18,PRV55912,26000,PHY390922,,,2009-04-12,7866,1068.0,2009-04-18,201,1970,4019,5853,7843,2768,71590,2724,19889,5849,0,0.0,0.0,0.0,0.0,0.0,Yes
1,BENE17521,1913-12-01,2,1,0,39,230,12,12,1,1,1,2,1,2,2,1,2,2,2,19000,1068,100,20,CLM34721,2009-01-20,2009-02-01,PRV55912,19000,PHY349293,PHY370861,PHY363291,2009-01-20,45340,1068.0,2009-02-01,987,4240,2639,2948,40390,45821,28489,5854,2753,E9305,0,7769.0,5849.0,0.0,0.0,0.0,Yes
2,BENE21718,1922-10-01,1,1,0,39,600,12,12,2,2,2,2,2,2,2,1,1,2,2,17000,1068,1050,540,CLM72336,2009-10-17,2009-11-04,PRV55912,17000,PHY334706,PHY334706,,2009-10-17,V5789,1068.0,2009-11-04,941,V5789,4168,73313,7812,7993,78830,72273,43812,4019,0,9338.0,0.0,0.0,0.0,0.0,Yes
3,BENE22934,1930-07-01,2,1,0,39,280,12,12,2,1,2,2,1,2,1,1,1,1,2,27000,2136,450,160,CLM73394,2009-10-25,2009-10-29,PRV55912,13000,PHY390614,PHY323689,PHY363291,2009-10-25,71946,1068.0,2009-10-29,506,71535,71960,4019,V1202,4240,2449,2768,0,0,0,8154.0,0.0,0.0,0.0,0.0,Yes
4,BENE24402,1925-09-01,2,2,0,39,510,12,12,1,2,2,1,2,2,2,1,2,2,2,3000,1068,8900,740,CLM32911,2009-01-08,2009-01-12,PRV55912,3000,PHY380413,PHY432598,,2009-01-08,1748,1068.0,2009-01-12,593,2330,0,0,0,0,0,0,0,0,0,8543.0,0.0,0.0,0.0,0.0,Yes


### Replacing values for some of the columns

In [67]:
replace_two = ['Gender', 'ChronicCond_Alzheimer', 'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease', 'ChronicCond_Cancer', \
                      'ChronicCond_ObstrPulmonary', 'ChronicCond_Depression', 'ChronicCond_Diabetes', 'ChronicCond_IschemicHeart', \
                      'ChronicCond_Osteoporasis', 'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke']

Merged_Inpatient.RenalDiseaseIndicator = Merged_Inpatient.RenalDiseaseIndicator.replace('Y', 1)
Merged_Inpatient[replace_two] = Merged_Inpatient[replace_two].replace(2, 0)

Merged_Outpatient.RenalDiseaseIndicator = Merged_Outpatient.RenalDiseaseIndicator.replace('Y', 1)
Merged_Outpatient[replace_two] = Merged_Outpatient[replace_two].replace(2, 0)

In [68]:
# printing all the unique values to make sure the columns are good to go (Inpatient)
for column in replace_two:
    print(column, Merged_Inpatient[column].unique())

Gender [1 0]
ChronicCond_Alzheimer [1 0]
ChronicCond_Heartfailure [0 1]
ChronicCond_KidneyDisease [1 0]
ChronicCond_Cancer [0 1]
ChronicCond_ObstrPulmonary [0 1]
ChronicCond_Depression [1 0]
ChronicCond_Diabetes [1 0]
ChronicCond_IschemicHeart [1 0]
ChronicCond_Osteoporasis [0 1]
ChronicCond_rheumatoidarthritis [1 0]
ChronicCond_stroke [1 0]


In [69]:
# printing all the unique values to make sure the columns are good to go (Outpatient)
for column in replace_two:
    print(column, Merged_Outpatient[column].unique())

Gender [0 1]
ChronicCond_Alzheimer [0 1]
ChronicCond_Heartfailure [0 1]
ChronicCond_KidneyDisease [0 1]
ChronicCond_Cancer [0 1]
ChronicCond_ObstrPulmonary [0 1]
ChronicCond_Depression [0 1]
ChronicCond_Diabetes [0 1]
ChronicCond_IschemicHeart [0 1]
ChronicCond_Osteoporasis [0 1]
ChronicCond_rheumatoidarthritis [0 1]
ChronicCond_stroke [0 1]


### Adding new feature columns

In [70]:
# adding new column that has per patient total visit count
visit_count_per_patients_in = Merged_Inpatient.groupby('BeneID')['ClaimID'].agg(['count'])
Merged_Inpatient = Merged_Inpatient.merge(visit_count_per_patients_in, how = 'inner', on = 'BeneID')
Merged_Inpatient = Merged_Inpatient.rename(columns = {'count': 'Total_Visit_Count'})

visit_count_per_patients_out = Merged_Outpatient.groupby('BeneID')['ClaimID'].agg(['count'])
Merged_Outpatient = Merged_Outpatient.merge(visit_count_per_patients_out, how = 'inner', on = 'BeneID')
Merged_Outpatient = Merged_Outpatient.rename(columns = {'count': 'Total_Claim_Count'})

# Calculating Age of all the patients with respect to 2009
Merged_Inpatient['Age'] = int(2009) - Merged_Inpatient.DOB.apply(lambda x: x.split('-')[0]).astype('int')
Merged_Outpatient['Age'] = int(2009) - Merged_Outpatient.DOB.apply(lambda x: x.split('-')[0]).astype('int')

# Column counts per patient total chronic condition
chronic_conditions = ['ChronicCond_Alzheimer', 'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease', 'ChronicCond_Cancer', \
                      'ChronicCond_ObstrPulmonary', 'ChronicCond_Depression', 'ChronicCond_Diabetes', 'ChronicCond_IschemicHeart', \
                      'ChronicCond_Osteoporasis', 'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke']

Merged_Inpatient['Chronic_Condition_Count'] = Merged_Inpatient[chronic_conditions].sum(axis = 1)

Merged_Outpatient['Chronic_Condition_Count'] = Merged_Outpatient[chronic_conditions].sum(axis = 1)

In [71]:
Merged_Inpatient.head()

Unnamed: 0,BeneID,DOB,Gender,Race,RenalDiseaseIndicator,State,County,NoOfMonths_PartACov,NoOfMonths_PartBCov,ChronicCond_Alzheimer,ChronicCond_Heartfailure,ChronicCond_KidneyDisease,ChronicCond_Cancer,ChronicCond_ObstrPulmonary,ChronicCond_Depression,ChronicCond_Diabetes,ChronicCond_IschemicHeart,ChronicCond_Osteoporasis,ChronicCond_rheumatoidarthritis,ChronicCond_stroke,IPAnnualReimbursementAmt,IPAnnualDeductibleAmt,OPAnnualReimbursementAmt,OPAnnualDeductibleAmt,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,AdmissionDt,ClmAdmitDiagnosisCode,DeductibleAmtPaid,DischargeDt,DiagnosisGroupCode,ClmDiagnosisCode_1,ClmDiagnosisCode_2,ClmDiagnosisCode_3,ClmDiagnosisCode_4,ClmDiagnosisCode_5,ClmDiagnosisCode_6,ClmDiagnosisCode_7,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,ClmProcedureCode_5,PotentialFraud,Total_Visit_Count,Age,Chronic_Condition_Count
0,BENE11001,1943-01-01,1,1,0,39,230,12,12,1,0,1,0,0,1,1,1,0,1,1,36000,3204,60,70,CLM46614,2009-04-12,2009-04-18,PRV55912,26000,PHY390922,,,2009-04-12,7866,1068.0,2009-04-18,201,1970,4019,5853,7843,2768,71590,2724,19889,5849,0,0.0,0.0,0.0,0.0,0.0,Yes,3,66,7
1,BENE11001,1943-01-01,1,1,0,39,230,12,12,1,0,1,0,0,1,1,1,0,1,1,36000,3204,60,70,CLM66048,2009-08-31,2009-09-02,PRV55907,5000,PHY318495,PHY318495,,2009-08-31,6186,1068.0,2009-09-02,750,6186,2948,56400,0,0,0,0,0,0,0,7092.0,0.0,0.0,0.0,0.0,No,3,66,7
2,BENE11001,1943-01-01,1,1,0,39,230,12,12,1,0,1,0,0,1,1,1,0,1,1,36000,3204,60,70,CLM68358,2009-09-17,2009-09-20,PRV56046,5000,PHY372395,,PHY324689,2009-09-17,29590,1068.0,2009-09-20,883,29623,30390,71690,34590,V1581,32723,0,0,0,0,0.0,0.0,0.0,0.0,0.0,No,3,66,7
3,BENE17521,1913-12-01,0,1,0,39,230,12,12,1,1,1,0,1,0,0,1,0,0,0,19000,1068,100,20,CLM34721,2009-01-20,2009-02-01,PRV55912,19000,PHY349293,PHY370861,PHY363291,2009-01-20,45340,1068.0,2009-02-01,987,4240,2639,2948,40390,45821,28489,5854,2753,E9305,0,7769.0,5849.0,0.0,0.0,0.0,Yes,1,96,5
4,BENE21718,1922-10-01,1,1,0,39,600,12,12,0,0,0,0,0,0,0,1,1,0,0,17000,1068,1050,540,CLM72336,2009-10-17,2009-11-04,PRV55912,17000,PHY334706,PHY334706,,2009-10-17,V5789,1068.0,2009-11-04,941,V5789,4168,73313,7812,7993,78830,72273,43812,4019,0,9338.0,0.0,0.0,0.0,0.0,Yes,1,87,2


In [72]:
Merged_Outpatient.head()

Unnamed: 0,BeneID,DOB,Gender,Race,RenalDiseaseIndicator,State,County,NoOfMonths_PartACov,NoOfMonths_PartBCov,ChronicCond_Alzheimer,ChronicCond_Heartfailure,ChronicCond_KidneyDisease,ChronicCond_Cancer,ChronicCond_ObstrPulmonary,ChronicCond_Depression,ChronicCond_Diabetes,ChronicCond_IschemicHeart,ChronicCond_Osteoporasis,ChronicCond_rheumatoidarthritis,ChronicCond_stroke,IPAnnualReimbursementAmt,IPAnnualDeductibleAmt,OPAnnualReimbursementAmt,OPAnnualDeductibleAmt,ClaimID,ClaimStartDt,ClaimEndDt,Provider,InscClaimAmtReimbursed,AttendingPhysician,OperatingPhysician,OtherPhysician,ClmDiagnosisCode_1,ClmDiagnosisCode_2,ClmDiagnosisCode_3,ClmDiagnosisCode_4,ClmDiagnosisCode_5,ClmDiagnosisCode_6,ClmDiagnosisCode_7,ClmDiagnosisCode_8,ClmDiagnosisCode_9,ClmDiagnosisCode_10,ClmProcedureCode_1,ClmProcedureCode_2,ClmProcedureCode_3,ClmProcedureCode_4,DeductibleAmtPaid,ClmAdmitDiagnosisCode,PotentialFraud,Total_Claim_Count,Age,Chronic_Condition_Count
0,BENE11002,1936-09-01,0,1,0,39,280,12,12,0,0,0,0,0,0,0,0,0,0,0,0,0,30,50,CLM624349,2009-10-11,2009-10-11,PRV56011,30,PHY326117,,,78943,V5866,V1272,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,56409,Yes,1,73,0
1,BENE11004,1922-07-01,1,1,0,39,270,12,12,1,1,0,0,0,0,1,1,1,1,0,0,0,1810,760,CLM121801,2009-01-06,2009-01-06,PRV56011,40,PHY334319,,,71988,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,Yes,12,87,6
2,BENE11004,1922-07-01,1,1,0,39,270,12,12,1,1,0,0,0,0,1,1,1,1,0,0,0,1810,760,CLM150998,2009-01-22,2009-01-22,PRV56011,200,PHY403831,,,82382,30000,72887,4280,7197,V4577,0,0,0,0,0.0,0.0,0.0,0.0,0,71947,Yes,12,87,6
3,BENE11004,1922-07-01,1,1,0,39,270,12,12,1,1,0,0,0,0,1,1,1,1,0,0,0,1810,760,CLM173224,2009-02-03,2009-02-03,PRV56011,20,PHY339887,,,20381,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,Yes,12,87,6
4,BENE11004,1922-07-01,1,1,0,39,270,12,12,1,1,0,0,0,0,1,1,1,1,0,0,0,1810,760,CLM224741,2009-03-03,2009-03-03,PRV56011,40,PHY345721,,,V6546,4280,2449,V854,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,Yes,12,87,6
