In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import ast
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

patient = pd.read_csv('../../CombinedData/combined1.csv')
allergies = pd.read_csv('../../CombinedData/allergies.csv')
immunizations = pd.read_csv('../../CombinedData/immunizations.csv')
conditions = pd.read_csv('../../CombinedData/conditions.csv')
medications = pd.read_csv('../../CombinedData/medications.csv')
encounters = pd.read_csv('../../CombinedData/encounters.csv')
careplans = pd.read_csv('../../CombinedData/careplans.csv')
patient.head()

Unnamed: 0,ID,BIRTHDATE,DEATHDATE,PASSPORT,MARITAL,RACE,ETHNICITY,GENDER,BIRTHPLACE,ZIP,...,ALLERGIES_CODE,IMMUNIZATIONS_CODE,CAREPLANS_CODE,CAREPLANS_REASONCODE,BODY_WEIGHT,BODY_HEIGHT,BMI,SYSTOLIC_BP,DIASTOLIC_BP,OBSERVATIONS_CODE
0,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,,false,,white,irish,F,Fitchburg MA US,20810.0,...,,"[140, 140, 140, 114, 140, 140, 140, 140]","[872781000000100, 266694003, 183051005, 539500...","[39848009.0, 39848009.0, 39848009.0, 10509002....",73.54,163.07,27.66,101.0,75.0,"['8302-2', '29463-7', '39156-5', '8480-6', '84..."
1,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,1996-09-24,,false,,white,french_canadian,F,Westborough MA US,2638.0,...,"[91930004, 419263009, 424213003]","[140, 62, 140, 140, 114, 140, 140, 140, 140]","[326051000000105, 409002, 58332002, 3260510000...","[nan, nan, nan, nan, nan, nan, nan]",17.33,95.92,18.84,137.0,89.0,"['8302-2', '29463-7', '39156-5', '8480-6', '84..."
2,26626faf-cbd5-48d5-a3bf-a7b21ae08e4b,1944-09-01,2015-09-04,X19963891X,M,white,irish,M,Fall River MA US,23401.0,...,,"[133, 140, 33, 140, 140, 140, 140]","[133901003, 385949008, 440381005, 439830001, 3...","[403190006.0, 403190006.0, 403190006.0, 403190...",74.58,174.14,24.6,111.0,71.0,"['8302-2', '29463-7', '39156-5', '8480-6', '84..."
3,eed62b4a-1099-47ec-a2ac-d953830b44d6,1965-08-05,,false,M,hispanic,central_american,M,Shrewsbury MA US,2631.0,...,,"[140, 140, 140, 113]",,,113.23,164.74,41.72,131.0,86.0,"['8302-2', '29463-7', '39156-5', '8480-6', '84..."
4,6e9f8b3e-5a21-401e-868d-2d62e0e7f452,1979-11-03,,X29737332X,M,white,irish,F,Wilmington MA US,2630.0,...,[300913006],"[140, 113, 140]","[326051000000105, 409002, 58332002, 3260510000...","[nan, nan, nan, nan, nan, 72892002.0, 72892002...",106.81,166.13,38.7,131.0,74.0,"['8302-2', '29463-7', '39156-5', '8480-6', '84..."


In [2]:
patient.drop(columns=['CONDITIONS_CODE','MEDICATIONS_CODE','MEDICATIONS_REASONCODE','ENCOUNTERS_CODE','ENCOUNTERS_REASONCODE','ALLERGIES_CODE', 'IMMUNIZATIONS_CODE', 'CAREPLANS_CODE', 'CAREPLANS_REASONCODE', 'OBSERVATIONS_CODE'], inplace=True)
conditions.drop(columns=['START', 'STOP', 'ENCOUNTER', 'DESCRIPTION'], inplace=True)
medications.drop(columns=['START', 'STOP', 'ENCOUNTER', 'DESCRIPTION', 'REASONDESCRIPTION'], inplace=True)
immunizations.drop(columns=['DATE', 'ENCOUNTER', 'DESCRIPTION'], inplace=True)
allergies.drop(columns=['START', 'STOP', 'ENCOUNTER', 'DESCRIPTION'], inplace=True)
encounters.drop(columns=['DATE', 'ID', 'DESCRIPTION', 'REASONDESCRIPTION'], inplace=True)
careplans.drop(columns=['ID', 'START', 'STOP', 'ENCOUNTER', 'DESCRIPTION', 'REASONDESCRIPTION'], inplace=True)


In [3]:
patient.isna().sum()

ID                  0
BIRTHDATE           0
DEATHDATE       97023
PASSPORT            0
MARITAL         32230
RACE               37
ETHNICITY          19
GENDER             35
BIRTHPLACE         28
ZIP               113
BODY_WEIGHT         0
BODY_HEIGHT         0
BMI                 0
SYSTOLIC_BP         0
DIASTOLIC_BP        0
dtype: int64

In [4]:
columns_to_fill_with_mode = ['RACE', 'ETHNICITY', 'GENDER', 'BIRTHPLACE', 'ZIP']

for column in columns_to_fill_with_mode:
    mode_value = patient[column].mode()[0]
    patient[column].fillna(mode_value, inplace=True)
    
patient['MARITAL'] = patient['MARITAL'].fillna('S')
patient['DEATHDATE'] = patient['DEATHDATE'].fillna('False')
patient['PASSPORT'] = patient['PASSPORT'].apply(lambda x: x.lower() != 'false')
columns_to_normalize = ['RACE', 'ETHNICITY', 'PASSPORT', 'BIRTHPLACE'] 

# Create a LabelEncoder
label_encoder = LabelEncoder()

# Apply LabelEncoder to each column
patient[columns_to_normalize] = patient[columns_to_normalize].apply(label_encoder.fit_transform)

patient['MARITAL'] = patient['MARITAL'].apply(lambda x: 0 if x == 'S' else 1)

# Map 'M' to 1 and 'F' to 0 in the 'GENDER' column
patient['GENDER'] = patient['GENDER'].apply(lambda x: 0 if x == 'F' else 1)

patient.isna().sum()

ID              0
BIRTHDATE       0
DEATHDATE       0
PASSPORT        0
MARITAL         0
RACE            0
ETHNICITY       0
GENDER          0
BIRTHPLACE      0
ZIP             0
BODY_WEIGHT     0
BODY_HEIGHT     0
BMI             0
SYSTOLIC_BP     0
DIASTOLIC_BP    0
dtype: int64

In [5]:
# Convert 'BIRTHDATE' to datetime, handling errors by setting problematic values to NaN
patient['BIRTHDATE'] = pd.to_datetime(patient['BIRTHDATE'], errors='coerce')

# Drop rows with NaN values in 'BIRTHDATE'
patient = patient.dropna(subset=['BIRTHDATE'])

# Assuming you have cleaned the 'BIRTHDATE' column, you can proceed with the reference date
reference_date = pd.to_datetime('2023-01-01')

# Calculate age
patient['AGE'] = (reference_date - patient['BIRTHDATE']).dt.days // 365.25

# Convert 'DEATHDATE' to datetime, handling errors by setting problematic values to NaT
patient['DEATHDATE'] = pd.to_datetime(patient['DEATHDATE'], errors='coerce')

# Calculate age at death if available
patient['AGE_AT_DEATH'] = (patient['DEATHDATE'] - patient['BIRTHDATE']).dt.days // 365.25

# Replace NaT with 0 in 'AGE_AT_DEATH' for patients who are not deceased
patient['AGE_AT_DEATH'] = patient['AGE_AT_DEATH'].fillna(0)


patient.head()

  patient['DEATHDATE'] = pd.to_datetime(patient['DEATHDATE'], errors='coerce')


Unnamed: 0,ID,BIRTHDATE,DEATHDATE,PASSPORT,MARITAL,RACE,ETHNICITY,GENDER,BIRTHPLACE,ZIP,BODY_WEIGHT,BODY_HEIGHT,BMI,SYSTOLIC_BP,DIASTOLIC_BP,AGE,AGE_AT_DEATH
0,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,NaT,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0
1,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,1996-09-24,NaT,0,0,43,56,0,363,2638.0,17.33,95.92,18.84,137.0,89.0,26.0,0.0
2,26626faf-cbd5-48d5-a3bf-a7b21ae08e4b,1944-09-01,2015-09-04,1,1,43,59,1,117,23401.0,74.58,174.14,24.6,111.0,71.0,78.0,71.0
3,eed62b4a-1099-47ec-a2ac-d953830b44d6,1965-08-05,NaT,0,1,41,48,1,304,2631.0,113.23,164.74,41.72,131.0,86.0,57.0,0.0
4,6e9f8b3e-5a21-401e-868d-2d62e0e7f452,1979-11-03,NaT,1,1,43,59,0,377,2630.0,106.81,166.13,38.7,131.0,74.0,43.0,0.0


In [6]:
allergies = allergies.rename(columns={'CODE': 'ALLERGIES_CODE'})
allergies.isna().sum()

PATIENT           0
ALLERGIES_CODE    0
dtype: int64

In [7]:
conditions = conditions.rename(columns={'CODE': 'CONDITIONS_CODE'})
conditions.isna().sum()

PATIENT            0
CONDITIONS_CODE    0
dtype: int64

In [8]:
medications = medications.rename(columns={'CODE': 'MEDICATIONS_CODE'})
medications = medications.rename(columns={'REASONCODE': 'MEDICATIONS_REASONCODE'})
medications.isna().sum()

PATIENT                        0
MEDICATIONS_CODE               0
MEDICATIONS_REASONCODE    118792
dtype: int64

In [9]:
medications.fillna(0, inplace=True)
medications.isna().sum()

PATIENT                   0
MEDICATIONS_CODE          0
MEDICATIONS_REASONCODE    0
dtype: int64

In [10]:
immunizations = immunizations.rename(columns={'CODE': 'IMMUNIZATIONS_CODE'})
immunizations.isna().sum()

PATIENT               0
IMMUNIZATIONS_CODE    0
dtype: int64

In [11]:
encounters = encounters.rename(columns={'CODE': 'ENCOUNTERS_CODE'})
encounters = encounters.rename(columns={'REASONCODE': 'ENCOUNTERS_REASONCODE'})
encounters.isna().sum()

PATIENT                       0
ENCOUNTERS_CODE               0
ENCOUNTERS_REASONCODE    907763
dtype: int64

In [12]:
encounters.fillna(0, inplace=True)
encounters.isna().sum()

PATIENT                  0
ENCOUNTERS_CODE          0
ENCOUNTERS_REASONCODE    0
dtype: int64

In [13]:
careplans = careplans.rename(columns={'CODE': 'CAREPLANS_CODE'})
careplans = careplans.rename(columns={'REASONCODE': 'CAREPLANS_REASONCODE'})
careplans.isna().sum()

PATIENT                     0
CAREPLANS_CODE              0
CAREPLANS_REASONCODE    94889
dtype: int64

In [14]:
careplans.fillna(0, inplace=True)
careplans.isna().sum()

PATIENT                 0
CAREPLANS_CODE          0
CAREPLANS_REASONCODE    0
dtype: int64

In [15]:
allergies_immu_df = pd.merge(allergies, immunizations, on='PATIENT', how='left')

In [16]:
all_immu_encou_df = pd.merge(allergies_immu_df, encounters, on='PATIENT', how='left').merge(conditions, on='PATIENT', how='left')
all_immu_encou_df.head()

Unnamed: 0,PATIENT,ALLERGIES_CODE,IMMUNIZATIONS_CODE,ENCOUNTERS_CODE,ENCOUNTERS_REASONCODE,CONDITIONS_CODE
0,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,91930004,140.0,371883000,0.0,232353008.0
1,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,91930004,140.0,371883000,0.0,74400008.0
2,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,91930004,140.0,371883000,0.0,428251008.0
3,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,91930004,140.0,185347001,0.0,232353008.0
4,5125d2b2-3aef-4ae2-aa5c-335f7e206b92,91930004,140.0,185347001,0.0,74400008.0


In [17]:
med_care_df = pd.merge(careplans, medications, on='PATIENT', how='left').merge(conditions, on='PATIENT', how='left')
med_care_df.head()

Unnamed: 0,PATIENT,CAREPLANS_CODE,CAREPLANS_REASONCODE,MEDICATIONS_CODE,MEDICATIONS_REASONCODE,CONDITIONS_CODE
0,660bec03-9e58-47f2-98b9-2f1c564f3838,872781000000100,39848009.0,834060.0,43878008.0,39848009.0
1,660bec03-9e58-47f2-98b9-2f1c564f3838,872781000000100,39848009.0,834060.0,43878008.0,10509002.0
2,660bec03-9e58-47f2-98b9-2f1c564f3838,872781000000100,39848009.0,834060.0,43878008.0,444814009.0
3,660bec03-9e58-47f2-98b9-2f1c564f3838,872781000000100,39848009.0,834060.0,43878008.0,38341003.0
4,660bec03-9e58-47f2-98b9-2f1c564f3838,872781000000100,39848009.0,834060.0,43878008.0,283371005.0


In [18]:
patient_merge = pd.merge(patient, conditions, left_on='ID', right_on='PATIENT', how='left')
patient_merge.head()

Unnamed: 0,ID,BIRTHDATE,DEATHDATE,PASSPORT,MARITAL,RACE,ETHNICITY,GENDER,BIRTHPLACE,ZIP,BODY_WEIGHT,BODY_HEIGHT,BMI,SYSTOLIC_BP,DIASTOLIC_BP,AGE,AGE_AT_DEATH,PATIENT,CONDITIONS_CODE
0,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,NaT,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,660bec03-9e58-47f2-98b9-2f1c564f3838,39848009.0
1,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,NaT,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,660bec03-9e58-47f2-98b9-2f1c564f3838,10509002.0
2,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,NaT,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,660bec03-9e58-47f2-98b9-2f1c564f3838,444814009.0
3,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,NaT,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,660bec03-9e58-47f2-98b9-2f1c564f3838,38341003.0
4,660bec03-9e58-47f2-98b9-2f1c564f3838,1996-07-26,NaT,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,660bec03-9e58-47f2-98b9-2f1c564f3838,283371005.0


In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, classification_report

# These codes are the ones that correlate to a type of lung cancer
conditions_code = set([162573006, 254637007, 432121009, 424132000, 422968005, 254632001, 67821000119109])
patient_merge.drop(columns=['DEATHDATE', 'BIRTHDATE', 'PATIENT', 'ID'], inplace=True)
med_care_df.drop(columns=['PATIENT'], inplace=True)
all_immu_encou_df.drop(columns=['PATIENT'], inplace=True)
# Create a 'CODE' column in conditions DataFrame
patient_merge['TARGET'] = conditions['CONDITIONS_CODE'].apply(lambda code: 1 if code in conditions_code else 0)
med_care_df['TARGET'] = conditions['CONDITIONS_CODE'].apply(lambda code: 1 if code in conditions_code else 0)
all_immu_encou_df['TARGET'] = conditions['CONDITIONS_CODE'].apply(lambda code: 1 if code in conditions_code else 0)

#fill any missing codes with 0 meaning there were no inputs
patient_merge.fillna(0, inplace=True)
med_care_df.fillna(0, inplace=True)
all_immu_encou_df.fillna(0, inplace=True)


In [22]:
from imblearn.over_sampling import SMOTE

X = patient_merge.drop(columns='TARGET')
y = patient_merge['TARGET']

# Instantiate the SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)

# Fit and apply the SMOTE
X_resampled, y_resampled = smote.fit_resample(X, y)

# Create a new DataFrame with the balanced data
balanced_df1 = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.Series(y_resampled, name='TARGET')], axis=1)

# Check the balance
print(balanced_df1['TARGET'].value_counts())

TARGET
0    427910
1    427910
Name: count, dtype: int64


In [23]:
X = med_care_df.drop(columns='TARGET')
y = med_care_df['TARGET']

# Instantiate the SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)

# Fit and apply the SMOTE
X_resampled, y_resampled = smote.fit_resample(X, y)

# Create a new DataFrame with the balanced data
balanced_df2 = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.Series(y_resampled, name='TARGET')], axis=1)

# Check the balance
print(balanced_df2['TARGET'].value_counts())

TARGET
0.0    22608821
1.0    22608821
Name: count, dtype: int64


In [24]:
X = all_immu_encou_df.drop(columns='TARGET')
y = all_immu_encou_df['TARGET']

# Instantiate the SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)

# Fit and apply the SMOTE
X_resampled, y_resampled = smote.fit_resample(X, y)

# Create a new DataFrame with the balanced data
balanced_df3 = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.Series(y_resampled, name='TARGET')], axis=1)

# Check the balance
print(balanced_df3['TARGET'].value_counts())

TARGET
0.0    65963899
1.0    65963899
Name: count, dtype: int64


In [25]:
balanced_df1.head()

Unnamed: 0,PASSPORT,MARITAL,RACE,ETHNICITY,GENDER,BIRTHPLACE,ZIP,BODY_WEIGHT,BODY_HEIGHT,BMI,SYSTOLIC_BP,DIASTOLIC_BP,AGE,AGE_AT_DEATH,CONDITIONS_CODE,TARGET
0,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,39848009.0,0
1,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,10509002.0,0
2,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,444814009.0,0
3,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,38341003.0,0
4,0,0,43,59,0,119,20810.0,73.54,163.07,27.66,101.0,75.0,26.0,0.0,283371005.0,0


In [26]:
balanced_df2.head()

Unnamed: 0,CAREPLANS_CODE,CAREPLANS_REASONCODE,MEDICATIONS_CODE,MEDICATIONS_REASONCODE,CONDITIONS_CODE,TARGET
0,872781000000100,39848009.0,834060.0,43878008.0,39848009.0,0.0
1,872781000000100,39848009.0,834060.0,43878008.0,10509002.0,0.0
2,872781000000100,39848009.0,834060.0,43878008.0,444814009.0,0.0
3,872781000000100,39848009.0,834060.0,43878008.0,38341003.0,0.0
4,872781000000100,39848009.0,834060.0,43878008.0,283371005.0,0.0


In [27]:
balanced_df3.head()

Unnamed: 0,ALLERGIES_CODE,IMMUNIZATIONS_CODE,ENCOUNTERS_CODE,ENCOUNTERS_REASONCODE,CONDITIONS_CODE,TARGET
0,91930004,140.0,371883000,0.0,232353008.0,0.0
1,91930004,140.0,371883000,0.0,74400008.0,0.0
2,91930004,140.0,371883000,0.0,428251008.0,0.0
3,91930004,140.0,185347001,0.0,232353008.0,0.0
4,91930004,140.0,185347001,0.0,74400008.0,0.0


In [28]:
features1 = ['PASSPORT','MARITAL','RACE','ETHNICITY','GENDER','BIRTHPLACE','ZIP','BODY_WEIGHT','BODY_HEIGHT','BMI','SYSTOLIC_BP','DIASTOLIC_BP','AGE','AGE_AT_DEATH','CONDITIONS_CODE']
features2 = ['CAREPLANS_CODE', 'CAREPLANS_REASONCODE', 'MEDICATIONS_CODE', 'MEDICATIONS_REASONCODE', 'CONDITIONS_CODE']
features3 = ['ALLERGIES_CODE', 'IMMUNIZATIONS_CODE', 'ENCOUNTERS_CODE', 'ENCOUNTERS_REASONCODE', 'CONDITIONS_CODE']
Y1 = balanced_df1['TARGET']
Y2 = balanced_df2['TARGET']
Y3 = balanced_df3['TARGET']
X1 = balanced_df1[features1]
X2 = balanced_df2[features2]
X3 = balanced_df3[features3]
# Split the data into training and test sets
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X1, Y1, test_size=0.2, random_state=42)
X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X2, Y2, test_size=0.2, random_state=42)
X_train3, X_test3, Y_train3, Y_test3 = train_test_split(X3, Y3, test_size=0.2, random_state=42)

In [29]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train1_scaled = scaler.fit_transform(X_train1)
X_test_1scaled = scaler.transform(X_test1)

In [30]:
X_train2_scaled = scaler.fit_transform(X_train2)
X_test2_scaled = scaler.transform(X_test2)

In [31]:
X_train3_scaled = scaler.fit_transform(X_train3)
X_test3_scaled = scaler.transform(X_test3)

In [32]:
# Intitiate the logistic regression model
lr_model1 = LogisticRegression()
lr_model2 = LogisticRegression()

In [33]:
# Train the bagging model
lr_model1.fit(X_train1_scaled, Y_train1)

In [34]:
# Train the bagging model
lr_model2.fit(X_train2_scaled, Y_train2)

In [35]:

#crashed on LR model3 using SGDC to do a incremental training to fix this
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss='log_loss', random_state=42, max_iter=1000, tol=1e-3)
# Incremental training
batch_size = 1000
for i in range(0, len(X_train3), batch_size):
    X_batch = X_train3.iloc[i:i+batch_size, :]
    y_batch = Y_train3.iloc[i:i+batch_size]
    clf.partial_fit(X_batch, y_batch, classes=[0, 1])



In [36]:
# Predict on the test set
Y_pred1 = lr_model1.predict(X_test1)
Y_pred2 = lr_model2.predict(X_test2)
#Y_pred3 = lr_model3.predict(X_test3)
Y_pred3 = clf.predict(X_test3)



In [37]:
# Get each accuracy score from the 3 data sets
accuracy1 = accuracy_score(Y_test1, Y_pred1)
accuracy2 = accuracy_score(Y_test2, Y_pred2)
#accuracy3 = accuracy_score(Y_test3, Y_pred3)
accuracy3 = accuracy_score(Y_test3, Y_pred3)

In [38]:
print(f"Patients Model Report:")
print(classification_report(Y_test1, Y_pred1))
print("Accuracy:", accuracy1)
print("\n")

Patients Model Report:
              precision    recall  f1-score   support

           0       0.46      0.01      0.01     85518
           1       0.50      0.99      0.67     85646

    accuracy                           0.50    171164
   macro avg       0.48      0.50      0.34    171164
weighted avg       0.48      0.50      0.34    171164

Accuracy: 0.49984809889930126




In [39]:
print(f"Medications/CarePlans Model Report:")
print(classification_report(Y_test2, Y_pred2))
print("Accuracy:", accuracy2)
print("\n")

Medications/CarePlans Model Report:
              precision    recall  f1-score   support

         0.0       0.47      0.55      0.51   4520726
         1.0       0.46      0.39      0.42   4522803

    accuracy                           0.47   9043529
   macro avg       0.47      0.47      0.46   9043529
weighted avg       0.47      0.47      0.46   9043529

Accuracy: 0.46742681977356404




In [40]:
print(f"Allergies/Immunizations/Encounters Model Report:")
print(classification_report(Y_test3, Y_pred3))
print("Accuracy:", accuracy3)
print("\n")

Allergies/Immunizations/Encounters Model Report:


              precision    recall  f1-score   support

         0.0       0.48      0.67      0.56  13193700
         1.0       0.46      0.28      0.35  13191860

    accuracy                           0.47  26385560
   macro avg       0.47      0.47      0.45  26385560
weighted avg       0.47      0.47      0.45  26385560

Accuracy: 0.47260713814677424




In [41]:
coefficients1 = lr_model1.coef_
print(coefficients1)

[[-0.49817227 -0.51607132 -0.09014324 -0.07776971 -0.47747404  0.02547673
  -0.01950067  0.02857654  0.00519281  0.00408341  0.01273696 -0.00836325
   0.39176771 -0.08342518  0.03076314]]


In [42]:
coefficients2 = lr_model2.coef_
print(coefficients2)

[[ 0.02831056  0.04458448  0.17357429 -0.17208068 -0.0347646 ]]


In [43]:
coefficients3 = clf.coef_
print(coefficients3)

[[ 6.39293215e+09 -2.67621402e+03  5.13915011e+08 -2.78087108e+10
  -1.89094047e+10]]
