In [None]:
import pandas as pd
import re
import numpy as np

import tensorflow as tf
import shap

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay


In [None]:
DANU_Ingredients = pd.read_csv("Source/DANU Ingredients.txt", sep='\t', dtype=str)

In [None]:
split_columns = DANU_Ingredients['drug_id'].str.split(':', n=1, expand=True)
split_columns

In [None]:
DANU_Ingredients['class'] = split_columns[0]

DANU_Ingredients['molecule'] = split_columns[1]

In [None]:
DANU_Ingredients

In [None]:
DANU_Ingredients['molecule'].nunique()

In [None]:
drug_group_filter = DANU_Ingredients['drug_group'].isin(["GLP1 Injectable", "GLP1 Oral"])

filtered_molecules = DANU_Ingredients.loc[drug_group_filter, 'molecule']

string_GLP1 = r'\b(' + '|'.join(re.escape(molecule) for molecule in filtered_molecules) + r')\b'

In [None]:
string_GLP1

In [None]:
DIA_Drug_Histories = pd.read_csv("Source/DIA Drug Histories.txt", sep='\t', dtype=str)
Treatment_exp_Vector = pd.read_csv("Source/Treatment_exp_Vector.txt", sep=',', dtype=str)

In [None]:
Treatment_exp_Vector

In [None]:

DIA_Drug_Histories = Treatment_exp_Vector.merge(DIA_Drug_Histories, on=['patient','weight'], how='left')

In [None]:
columns_to_drop = ['disease', 'weight']
DIA_Drug_Histories = DIA_Drug_Histories.drop(columns=columns_to_drop)

In [None]:
DIA_Drug_Histories

In [None]:
value_vars = DIA_Drug_Histories.columns[DIA_Drug_Histories.columns != 'patient']
value_vars

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.melt(id_vars=['patient'], var_name='Month', value_name='Drugs', 
                                             value_vars=value_vars, col_level=0)


In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories[DIA_Drug_Histories['Drugs'] != "-"]


In [None]:
DIA_Drug_Histories

In [None]:
patient = DIA_Drug_Histories[DIA_Drug_Histories['Drugs'].str.contains(string_GLP1, na=False)]
patient = patient[['patient']]
patient = patient.drop_duplicates()



In [None]:
patient

In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories = patient.merge(DIA_Drug_Histories, on='patient', how='left')

DIA_Drug_Histories

In [None]:
DIA_Drug_Histories['Month'] = DIA_Drug_Histories['Month'].str.replace('month', 'm')

In [None]:
DIA_Drug_Histories

In [None]:
month_mapping = {
    'm1': 'm01',
    'm2': 'm02',
    'm3': 'm03',
    'm4': 'm04',
    'm5': 'm05',
    'm6': 'm06',
    'm7': 'm07',
    'm8': 'm08',
    'm9': 'm09'
}

In [None]:
DIA_Drug_Histories['Month'] = DIA_Drug_Histories['Month'].replace(month_mapping)


In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories['Drugs'] = DIA_Drug_Histories['Drugs'].str.split(',')

DIA_Drug_Histories

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.explode('Drugs', ignore_index=True)


In [None]:
DIA_Drug_Histories

In [None]:
print("Unique Drugs Count:", DIA_Drug_Histories['Drugs'].nunique())
print("Unique Month Count:", DIA_Drug_Histories['Month'].nunique())
print("Unique Patient Count:", DIA_Drug_Histories['patient'].nunique())

In [None]:
pd.DataFrame(DIA_Drug_Histories['Month'].unique()).rename(columns={0: "M"})

pd.DataFrame(DIA_Drug_Histories['Drugs'].unique()).rename(columns={0: "D"})

df = (pd.DataFrame(DIA_Drug_Histories['Month'].unique()).rename(columns={0: "M"}).assign(dummy=1)
    .merge(pd.DataFrame(DIA_Drug_Histories['Drugs'].unique()).rename(columns={0: "D"}).assign(dummy=1), on='dummy')
    .drop('dummy', axis=1)
)

df

In [None]:
df = df.assign(Var=df['M'] + '_' + df['D'])
df = df.drop(columns=['M', 'D'])
df

In [None]:
df['patient'] = 'PTxxxxxx'

df = df[['patient', 'Var']] 

df['Exp'] = 0

df

In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.assign(Var=DIA_Drug_Histories['Month'] + '_' + DIA_Drug_Histories['Drugs'])

DIA_Drug_Histories = DIA_Drug_Histories.drop(columns=['Month', 'Drugs'])

In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories['Exp'] = 1

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.sort_values(by=['patient', 'Var'])


In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories['Var'].nunique()

In [None]:
df['Var'].nunique()

In [None]:
DIA_Drug_Histories = pd.concat([DIA_Drug_Histories, df], ignore_index=True)

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.pivot(index='patient', columns='Var', values='Exp')

In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.fillna(0)


In [None]:
DIA_Drug_Histories

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories.drop(['PTxxxxxx'])

In [None]:
DIA_Drug_Histories.shape

In [None]:
DIA_Drug_Histories.to_csv("DIA_Drug_Histories_All_Months_Processed.txt", sep='\t', index=False)


In [None]:
DIA_Drug_Histories.groupby('m60_47').size().reset_index(name='Count')

In [None]:
sample_df = DIA_Drug_Histories.groupby('m60_47').apply(lambda x: x.sample(1500)).reset_index(drop=True)


In [None]:
sample_df.columns

In [None]:
sample_df.dtypes
sample_df

In [None]:
X = sample_df.drop('m60_47', axis=1)  
y = sample_df['m60_47']  

X = X.values
y = y.values


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
y_test

In [None]:
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
     tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)


In [None]:
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)  


In [None]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

In [None]:
confusion = confusion_matrix(y_test, y_pred)
confusion

In [None]:
display = ConfusionMatrixDisplay(confusion)
display.plot()

In [None]:
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test, max_evals=3000)

In [None]:

shap.summary_plot(shap_values, X_test, feature_names=sample_df.columns.drop('m60_47'), max_display=20)

In [None]:
confusion_matrix(sample_df['m60_47'].values, sample_df['m50_47'].values)

In [None]:
sample_df

In [None]:
m01_df = sample_df.filter(like='m01_', axis=1).values
m02_df = sample_df.filter(like='m02_', axis=1).values
m03_df = sample_df.filter(like='m03_', axis=1).values
m04_df = sample_df.filter(like='m04_', axis=1).values
m05_df = sample_df.filter(like='m05_', axis=1).values
m06_df = sample_df.filter(like='m06_', axis=1).values
m07_df = sample_df.filter(like='m07_', axis=1).values
m08_df = sample_df.filter(like='m08_', axis=1).values
m09_df = sample_df.filter(like='m09_', axis=1).values
m10_df = sample_df.filter(like='m10_', axis=1).values
m11_df = sample_df.filter(like='m11_', axis=1).values
m12_df = sample_df.filter(like='m12_', axis=1).values
m13_df = sample_df.filter(like='m13_', axis=1).values
m14_df = sample_df.filter(like='m14_', axis=1).values
m15_df = sample_df.filter(like='m15_', axis=1).values
m16_df = sample_df.filter(like='m16_', axis=1).values
m17_df = sample_df.filter(like='m17_', axis=1).values
m18_df = sample_df.filter(like='m18_', axis=1).values
m19_df = sample_df.filter(like='m19_', axis=1).values
m20_df = sample_df.filter(like='m20_', axis=1).values
m21_df = sample_df.filter(like='m21_', axis=1).values
m22_df = sample_df.filter(like='m22_', axis=1).values
m23_df = sample_df.filter(like='m23_', axis=1).values
m24_df = sample_df.filter(like='m24_', axis=1).values
m25_df = sample_df.filter(like='m25_', axis=1).values
m26_df = sample_df.filter(like='m26_', axis=1).values
m27_df = sample_df.filter(like='m27_', axis=1).values
m28_df = sample_df.filter(like='m28_', axis=1).values
m29_df = sample_df.filter(like='m29_', axis=1).values
m30_df = sample_df.filter(like='m30_', axis=1).values
m31_df = sample_df.filter(like='m31_', axis=1).values
m32_df = sample_df.filter(like='m32_', axis=1).values
m33_df = sample_df.filter(like='m33_', axis=1).values
m34_df = sample_df.filter(like='m34_', axis=1).values
m35_df = sample_df.filter(like='m35_', axis=1).values
m36_df = sample_df.filter(like='m36_', axis=1).values
m37_df = sample_df.filter(like='m37_', axis=1).values
m38_df = sample_df.filter(like='m38_', axis=1).values
m39_df = sample_df.filter(like='m39_', axis=1).values
m40_df = sample_df.filter(like='m40_', axis=1).values
m41_df = sample_df.filter(like='m41_', axis=1).values
m42_df = sample_df.filter(like='m42_', axis=1).values
m43_df = sample_df.filter(like='m43_', axis=1).values
m44_df = sample_df.filter(like='m44_', axis=1).values
m45_df = sample_df.filter(like='m45_', axis=1).values
m46_df = sample_df.filter(like='m46_', axis=1).values
m47_df = sample_df.filter(like='m47_', axis=1).values
m48_df = sample_df.filter(like='m48_', axis=1).values
m49_df = sample_df.filter(like='m49_', axis=1).values
m50_df = sample_df.filter(like='m50_', axis=1).values
m51_df = sample_df.filter(like='m51_', axis=1).values
m52_df = sample_df.filter(like='m52_', axis=1).values
m53_df = sample_df.filter(like='m53_', axis=1).values
m54_df = sample_df.filter(like='m54_', axis=1).values
m55_df = sample_df.filter(like='m55_', axis=1).values
m56_df = sample_df.filter(like='m56_', axis=1).values
m57_df = sample_df.filter(like='m57_', axis=1).values
m58_df = sample_df.filter(like='m58_', axis=1).values
m59_df = sample_df.filter(like='m59_', axis=1).values
m60_df = sample_df.filter(like='m60_', axis=1).values

In [None]:
np.stack([m01_df, m02_df, m03_df, m04_df, m05_df, m06_df, m07_df, m08_df, m09_df, m10_df, m11_df, m12_df, m13_df, m14_df, m15_df,
          m16_df, m17_df, m18_df, m19_df, m20_df, m21_df, m22_df, m23_df, m24_df, m25_df, m26_df, m27_df, m28_df, m29_df, m30_df,
          m31_df, m32_df, m33_df, m34_df, m35_df, m36_df, m37_df, m38_df, m39_df, m40_df, m41_df, m42_df, m43_df, m44_df, m45_df,
          m46_df, m47_df, m48_df, m49_df, m50_df, m51_df, m52_df, m53_df, m54_df, m55_df, m56_df, m57_df, m58_df, m59_df, m60_df], axis = -1).shape

In [None]:
sample_df = np.stack([m01_df, m02_df, m03_df, m04_df, m05_df, m06_df, m07_df, m08_df, m09_df, m10_df, m11_df, m12_df, m13_df, m14_df, m15_df,
          m16_df, m17_df, m18_df, m19_df, m20_df, m21_df, m22_df, m23_df, m24_df, m25_df, m26_df, m27_df, m28_df, m29_df, m30_df,
          m31_df, m32_df, m33_df, m34_df, m35_df, m36_df, m37_df, m38_df, m39_df, m40_df, m41_df, m42_df, m43_df, m44_df, m45_df,
          m46_df, m47_df, m48_df, m49_df, m50_df, m51_df, m52_df, m53_df, m54_df, m55_df, m56_df, m57_df, m58_df, m59_df, m60_df], axis = -1)

In [None]:
sample_df

In [None]:
X = np.delete(sample_df, 30, axis=1)
X = X[:, :, :-1] 
X.shape

In [None]:
sample_df.shape

In [None]:
y = sample_df[:, 30, :] 

In [None]:
y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
X_test.shape

In [None]:
(X_train.shape[1], X_train.shape[2])

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(512, activation='relu', input_shape=((X_train.shape[1], X_train.shape[2]))),
     tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(y_train.shape[1], activation='sigmoid')  
])

In [None]:
print(model.summary())

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.2)


In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

In [None]:
y_pred = model.predict(X_test)
y_pred.shape


In [None]:
y_pred = (y_pred[:,-1] > 0.5)  

In [None]:
y_test[:,-1] 

In [None]:

accuracy = accuracy_score(y_test[:,-1] , y_pred)
accuracy


In [None]:

confusion = confusion_matrix(y_test[:,-1], y_pred)
confusion


In [None]:
display = ConfusionMatrixDisplay(confusion)
display.plot()

In [None]:
import tensorflow as tf

# Check if GPU is available
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("GPU not found. Defaulting to CPU.")