In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [None]:
obe2_drug_histories = pd.read_csv("../Source/OBE2 Drug Histories.txt", delimiter="\t")
dia_drug_histories = pd.read_csv("../Source/DIA Drug Histories.txt", delimiter="\t")

In [None]:
# drug_histories = pd.concat([dia_drug_histories, obe2_drug_histories], ignore_index=True)
drug_histories = dia_drug_histories
drug_histories = drug_histories.melt(id_vars=['patient', 'disease', 'weight'],  var_name='Month', value_name='Drugs')

drug_histories = drug_histories.drop(columns=['disease', 'weight'])

In [None]:
danu_demographics = pd.read_csv("../Source/DANU Demographics.txt", delimiter="\t")
danu_demographics_filtered = danu_demographics[danu_demographics['diagnosis'].str.contains("Obesity", na=False)][['patid']]
danu_demographics_filtered = danu_demographics_filtered.rename(columns={"patid": "patient"})

drug_histories = pd.merge(danu_demographics_filtered, drug_histories, left_on='patient', right_on='patient', how='inner')


In [None]:
drug_histories = drug_histories[drug_histories['Drugs'] != "-"].drop(columns=['Month']).drop_duplicates()
drug_histories['patient'].nunique()

In [None]:
drug_histories['Drugs'] = drug_histories['Drugs'].str.split(',')
drug_histories = drug_histories.explode('Drugs')

In [None]:
danu_ingredients = pd.read_csv("../Source/DANU Ingredients.txt", delimiter="\t", dtype=str)
danu_ingredients[['class', 'molecule']] = danu_ingredients['drug_id'].str.split(':', expand=True)
danu_ingredients = danu_ingredients[['molecule', 'drug_class']]

In [None]:
drug_histories['Drugs'] = drug_histories['Drugs'].astype(str)
drug_histories = drug_histories.merge(danu_ingredients, left_on="Drugs", right_on="molecule", how="left")


In [None]:
drug_histories = drug_histories.drop(columns=['Drugs', 'molecule'])
drug_histories = drug_histories.drop_duplicates()

In [None]:
danu_drug_utilizations_full = pd.read_csv("../Source/DANU Drug Utilizations Full.txt", delimiter="\t")
danu_drug_utilizations_full = danu_drug_utilizations_full[['patid', 'drug_ahfs_class']].drop_duplicates()
danu_drug_utilizations_full['drug_ahfs_class'].nunique()

In [None]:
danu_drug_utilizations_full = danu_drug_utilizations_full.rename(columns={'patid': 'patient'})
danu_drug_utilizations_full = danu_drug_utilizations_full.rename(columns={'drug_ahfs_class': 'drug_class'})

In [None]:
drug_histories = pd.concat([drug_histories, danu_drug_utilizations_full], ignore_index=True)


In [None]:
drug_histories['exp'] = 1
drug_histories = drug_histories.pivot(index='patient', columns='drug_class', values='exp')
drug_histories = drug_histories.fillna(0)

In [None]:
drug_histories.rename(columns=lambda s: s.replace(" ", "_"), inplace=True)
drug_histories.rename(columns=lambda s: s.replace("-", "_"), inplace=True)
drug_histories.rename(columns=lambda s: s.replace(",", "_"), inplace=True)
drug_histories.rename(columns=lambda s: s.replace("&", "_"), inplace=True)

In [None]:
print(drug_histories.index.name)

In [None]:
Comorbidity_Inventories = pd.read_csv("../Source/Comorbidity_Inventories.txt", delimiter=",")



In [None]:
drug_histories = drug_histories.rename_axis(None)

In [None]:
drug_histories = drug_histories.rename(columns={'patient': 'drug_class'})

In [None]:
columns = pd.DataFrame(drug_histories.columns)


In [None]:
drug_histories['patient'] = drug_histories.index

In [None]:
drug_histories = drug_histories.merge(Comorbidity_Inventories, left_on="patient", right_on="patient", how="left")


In [None]:
drug_histories = drug_histories.fillna(0)

In [None]:
del Comorbidity_Inventories

In [None]:
def glp(row):
    if row['GLP1_Oral'] == 1:
        val = 1
    elif row['GLP1_Injectable'] ==1:
        val = 1
    else:
        val = 0
    return val

In [None]:
drug_histories['GLP'] = drug_histories.apply(glp, axis=1)

In [None]:
drug_histories.shape

In [None]:
drug_histories = drug_histories.drop(columns=['GLP1_Oral', 'GLP1_Injectable'])

In [None]:
drug_histories

In [None]:
X = drug_histories.drop('GLP', axis=1)
y = drug_histories['GLP']


In [None]:
sample_size = 1000
class_0 = drug_histories[drug_histories['GLP'] == 0].sample(n=sample_size, random_state=42)
class_1 = drug_histories[drug_histories['GLP'] == 1].sample(n=sample_size, random_state=42)

In [None]:
# sampled_data = pd.concat([class_0, class_1])

sampled_data = drug_histories.sample(n=2000, random_state=42)

In [None]:
#sampled_data = sampled_data.drop(columns=['SGLT2', 'DPP4', 'Incretin_Mimetics', 'Insulin_Long', 'Insulin_Therapy', 'Insulin_Short', 'Biguanide', 
#                                          'AGI', 'Sulfonylurea', 'Sulfonylureas', 'Biguanides',  'Antidiabetic', 'Glinide', 'Glitazone', 'Sodium_Gluc_Cotransport_2_(Sglt2)_Inhib',
#                                          'Alpha_Glucosidase_Inhibitors', 'Diabetes_Mellitus', 'Dipeptidyl_Peptidase_4(Dpp_4)_Inhibitors', 'Insulins', 'Intermediate_Acting_Insulins',
#                                          'Pancreatic_Function', 'Rapid_Acting_Insulins', 'Short_Acting_Insulins', 'Long_Acting_Insulins', 'patient'])

sampled_data = sampled_data.drop(columns=['Incretin_Mimetics', 'patient'])

In [None]:
X_sampled = sampled_data.drop('GLP', axis=1)
y_sampled = sampled_data['GLP']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_sampled, y_sampled, test_size=0.2, random_state=42)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f'Test Accuracy: {test_accuracy}')

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
def rgba_to_hex(rgba):
    """Convert RGBA color code to hexadecimal color code."""
    r, g, b, a = rgba
    return "#{:02X}{:02X}{:02X}".format(int(r * 255), int(g * 255), int(b * 255))

cmap = plt.get_cmap('RdBu')
blue_hex = rgba_to_hex(cmap(0.0))
red_hex = rgba_to_hex(cmap(1.0))
print(red_hex, blue_hex)

In [None]:
sampled_data[sampled_data['GLP']==0]

In [None]:
import shap
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test, max_evals=1500)

In [None]:
shap.summary_plot(shap_values, X_test, feature_names=sampled_data.columns, max_display=20)

In [None]:
shap.plots.beeswarm(shap_values,  color=plt.get_cmap("RdBu"))


In [None]:
sampled_data.iloc[:,515]

In [None]:
print(drug_histories.shape)
print(drug_histories[(drug_histories['J7']==0)].shape)
print(drug_histories[(drug_histories['J7']==0)&(drug_histories['GLP']==1)].shape)
print(drug_histories[(drug_histories['J7']==1)].shape)
print(drug_histories[(drug_histories['J7']==1)&(drug_histories['GLP']==1)].shape)

In [None]:
print(52060/1348392)
print(84/576)

In [None]:
shap.plots.heatmap(shap_values, max_display=10, plot_width=6, cmap=plt.get_cmap("RdBu"))


In [None]:
sampled_data[sampled_data['GLP']==1]