Notebook Creating a Graph using NetworkX

In [1]:
import networkx as nx
import pandas as pd
import numpy as np

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Loading the preprocessed data

In [3]:
data = pd.read_csv("https://raw.githubusercontent.com/EY-Tech-Consulting-Denmark/Graphathon-ATP/main/Data/clean_data/data.csv")

In [4]:
data.head()

Unnamed: 0,BeneID,DOB,DOD,Gender,Race,RenalDiseaseIndicator,State,County,NoOfMonths_PartACov,NoOfMonths_PartBCov,...,ClmProcedureCode_5,PotentialFraud,Age,IsDead,DaysAdmitted,DaysClaimLasted,ClaimEndAfterDischarged,TotalDiagnosis,TotalProcedures,TotalPhysicians
0,BENE11001,1943-01-01,,1,1,0,39,230,12,12,...,,1,66.0,0,7,7,0,9,0,1
1,BENE17521,1913-12-01,,2,1,0,39,230,12,12,...,,1,95.0,0,13,13,0,9,2,3
2,BENE21718,1922-10-01,,1,1,0,39,600,12,12,...,,1,87.0,0,19,19,0,9,1,2
3,BENE22934,1930-07-01,,2,1,0,39,280,12,12,...,,1,79.0,0,5,5,0,7,1,3
4,BENE24402,1925-09-01,,2,2,0,39,510,12,12,...,,1,83.0,0,5,5,0,1,1,2


In [99]:
data['DOD'].unique()

array([nan, '2009-09-01', '2009-03-01', '2009-11-01', '2009-10-01',
       '2009-08-01', '2009-07-01', '2009-12-01', '2009-04-01',
       '2009-06-01', '2009-05-01', '2009-02-01'], dtype=object)

In [5]:
data.columns

Index(['BeneID', 'DOB', 'DOD', 'Gender', 'Race', 'RenalDiseaseIndicator',
       'State', 'County', 'NoOfMonths_PartACov', 'NoOfMonths_PartBCov',
       'ChronicCond_Alzheimer', 'ChronicCond_Heartfailure',
       'ChronicCond_KidneyDisease', 'ChronicCond_Cancer',
       'ChronicCond_ObstrPulmonary', 'ChronicCond_Depression',
       'ChronicCond_Diabetes', 'ChronicCond_IschemicHeart',
       'ChronicCond_Osteoporasis', 'ChronicCond_rheumatoidarthritis',
       'ChronicCond_stroke', 'IPAnnualReimbursementAmt',
       'IPAnnualDeductibleAmt', 'OPAnnualReimbursementAmt',
       'OPAnnualDeductibleAmt', 'ClaimID', 'ClaimStartDt', 'ClaimEndDt',
       'Provider', 'InscClaimAmtReimbursed', 'AttendingPhysician',
       'OperatingPhysician', 'OtherPhysician', 'AdmissionDt',
       'ClmAdmitDiagnosisCode', 'DeductibleAmtPaid', 'DischargeDt',
       'DiagnosisGroupCode', 'ClmDiagnosisCode_1', 'ClmDiagnosisCode_2',
       'ClmDiagnosisCode_3', 'ClmDiagnosisCode_4', 'ClmDiagnosisCode_5',
       'Cl

Creating the Graph

In [61]:
conditionCode = ['RenalDiseaseIndicator',
                 'ChronicCond_Alzheimer', 'ChronicCond_Heartfailure',
                 'ChronicCond_KidneyDisease', 'ChronicCond_Cancer',
                 'ChronicCond_ObstrPulmonary', 'ChronicCond_Depression',
                 'ChronicCond_Diabetes', 'ChronicCond_IschemicHeart',
                 'ChronicCond_Osteoporasis', 'ChronicCond_rheumatoidarthritis',
                 'ChronicCond_stroke']

procedureCode = ['ClmProcedureCode_1',
                 'ClmProcedureCode_2',
                 'ClmProcedureCode_3',
                 'ClmProcedureCode_4',
                 'ClmProcedureCode_5']

physicianCode = ['AttendingPhysician',
               'OperatingPhysician',
               'OtherPhysician']

In [102]:
#procedureID =  [*set([val for c in procedureCode for val in data[c].dropna().unique().tolist()])]
#conditionID = [*set([val for c in conditionCode for val in data[c].dropna().unique().tolist()])]
#physicianID = [*set([val for c in physicianCode for val in data[c].dropna().unique().tolist()])]
procedureID =  [*set([val for c in procedureCode for val in data[c].tolist()])]
conditionID = [*set([val for c in conditionCode for val in data[c].tolist()])]
physicianID = [*set([val for c in physicianCode for val in data[c].tolist()])]

Beneficiary: {
              "gender":"Gender", 
              "race":"Race",
              "dob":"DOB",
              "county":"County",
              "state":"State",
              "id":"BeneID",
              "age":"Age",
              "dod":"DOD"
              }
              
Claim :       {
              "reimbursedAmt":"InscClaimAmtReimbursed", 
              "claimEndAfterDischarged":"ClaimEndAfterDischarged",
              "admissionDate":"AdmissionDt",
              "daysClaimLasted":"DaysClaimLasted",
              "ClaimEndDt":"endDate",
              "dischargeDate":"DischargeDt",
              "deductibleAmt":"DeductibleAmtPaid",
              "ClaimID":"id",
              "ClaimStartDt":"startDate",
              "daysAdmitted":"DaysAdmitted"
              }
         
Provider: {
              "fraud":"PotentialFraud",
              "id": "Provider"
          }
         
Condition: {
               "id":"conditionIdID"
           }
        
Diagnosis: {
            "id":"ClmAdmitDiagnosisCode"
           }
           
Physician: {"id":"physicianID"
           }
          
Procedure: {"id":"procedureID"
           }      

In [None]:
list(data["Gender"].values)

In [None]:
attrs = {'Beneficiary': {
              "gender":list(data["Gender"]), 
              "race":list(data["Race"]),
              "dob":list(data["DOB"]),
              "county":list(data["County"]),
              "dod":list(data["DOD"])
              "state":list(data["State"]),
              "id":list(data["BeneID"]),
              "age":list(data["Age"])
              },
              
        'Claim' :       {
              "reimbursedAmt":list(data["InscClaimAmtReimbursed"]), 
              "claimEndAfterDischarged":list(data["ClaimEndAfterDischarged"]),
              "admissionDate":list(data["AdmissionDt"]),
              "daysClaimLasted":list(data["DaysClaimLasted"]),
              "endDate":list(data["ClaimEndDt"]),
              "dischargeDate":list(data["DischargeDt"]),
              "deductibleAmt":list(data["DeductibleAmtPaid"]),
              "id":list(data["ClaimID"]),
              "startDate":list(data["ClaimStartDt"]),
              "daysAdmitted":list(data["DaysAdmitted"])
              },
         
        'Provider':     {
              "fraud":list(data["PotentialFraud"]),
              "id": list(data["Provider"])
              },
         
        'Condition':    {
               "id":conditionID
              },

        'Diagnosis': {
               "id":list(data["ClmAdmitDiagnosisCode"])
              },
           
        'Physician': {"id":physicianID
           },
          
        'Procedure': {"id":procedureID
           }
        }

In [None]:
G = nx.MultiDiGraph()

In [None]:
nodes = ['Beneficiary','Claim','Provider','Condition','Diagnosis','Procedure','Physician']

In [None]:
G.add_nodes_from(nodes)

In [None]:
G.nodes()

In [None]:
nx.set_node_attributes(G, attrs)