In [None]:
import pandas as pd
import re
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed
from tensorflow.keras.models import load_model


In [None]:
DANU_Ingredients = pd.read_csv("Source/DANU Ingredients.txt", sep='\t', dtype=str)
split_columns = DANU_Ingredients['drug_id'].str.split(':', n=1, expand=True)
DANU_Ingredients['class'] = split_columns[0]
DANU_Ingredients['molecule'] = split_columns[1]
DANU_Ingredients['molecule'].nunique()

In [None]:
drug_group_filter = DANU_Ingredients['drug_group'].isin(["GLP1 Injectable", "GLP1 Oral"])
filtered_molecules = DANU_Ingredients.loc[drug_group_filter, 'molecule']
string_GLP1 = r'\b(' + '|'.join(re.escape(molecule) for molecule in filtered_molecules) + r')\b'

In [None]:
DIA_Drug_Histories = pd.read_csv("Source/DIA Drug Histories.txt", sep='\t', dtype=str)
Treatment_exp_Vector = pd.read_csv("Source/Treatment_exp_Vector.txt", sep=',', dtype=str)
DIA_Drug_Histories = Treatment_exp_Vector.merge(DIA_Drug_Histories, on=['patient','weight'], how='left')
columns_to_drop = ['disease', 'weight']
DIA_Drug_Histories = DIA_Drug_Histories.drop(columns=columns_to_drop)

In [None]:
value_vars = DIA_Drug_Histories.columns[DIA_Drug_Histories.columns != 'patient']
DIA_Drug_Histories = DIA_Drug_Histories.melt(id_vars=['patient'], var_name='Month', value_name='Drugs', value_vars=value_vars, col_level=0)

In [None]:
DIA_Drug_Histories = DIA_Drug_Histories[DIA_Drug_Histories['Drugs'] != "-"]
DIA_Drug_Histories['Month'] = DIA_Drug_Histories['Month'].str.replace('month', 'm')

In [None]:
month_mapping = {
    'm1': 'm01',
    'm2': 'm02',
    'm3': 'm03',
    'm4': 'm04',
    'm5': 'm05',
    'm6': 'm06',
    'm7': 'm07',
    'm8': 'm08',
    'm9': 'm09'
}

In [None]:
DIA_Drug_Histories['Month'] = DIA_Drug_Histories['Month'].replace(month_mapping)
DIA_Drug_Histories['Drugs'] = DIA_Drug_Histories['Drugs'].str.split(',')
DIA_Drug_Histories = DIA_Drug_Histories.explode('Drugs', ignore_index=True)

In [None]:
print("Unique Drugs Count:", DIA_Drug_Histories['Drugs'].nunique())
print("Unique Month Count:", DIA_Drug_Histories['Month'].nunique())
print("Unique Patient Count:", DIA_Drug_Histories['patient'].nunique())

In [None]:
#pd.DataFrame(DIA_Drug_Histories['Month'].unique()).rename(columns={0: "M"})
#pd.DataFrame(DIA_Drug_Histories['Drugs'].unique()).rename(columns={0: "D"})

#df = (pd.DataFrame(DIA_Drug_Histories['Month'].unique()).rename(columns={0: "M"}).assign(dummy=1)
#    .merge(pd.DataFrame(DIA_Drug_Histories['Drugs'].unique()).rename(columns={0: "D"}).assign(dummy=1), on='dummy')
#    .drop('dummy', axis=1))



In [None]:
# df['patient'] = 'PTxxxxxx'
# df['Exp'] = 0


In [None]:
DIA_Drug_Histories['Exp'] = 1
DIA_Drug_Histories = DIA_Drug_Histories.sort_values(by=['patient', 'Month', 'Drugs'])

In [None]:
# df = df[['patient', 'M', 'D', 'Exp']]
# df = df.rename(columns={"M": "Month", "D": "Drugs"})

In [None]:
# DIA_Drug_Histories = pd.concat([DIA_Drug_Histories, df], ignore_index=True)
DIA_Drug_Histories = DIA_Drug_Histories.drop_duplicates(subset=['patient', 'Month' , 'Drugs', 'Exp'])

In [None]:
DIA_Drug_Histories['Drugs'].nunique()
DIA_Drug_Histories

In [None]:
sampled_patients = DIA_Drug_Histories['patient'].drop_duplicates().sample(n=50000, random_state=42)
DIA_Drug_Histories = DIA_Drug_Histories[DIA_Drug_Histories['patient'].isin(sampled_patients)]
DIA_Drug_Histories

In [None]:
patients = DIA_Drug_Histories['patient'].unique()
months = DIA_Drug_Histories['Month'].unique() 
drugs = DIA_Drug_Histories['Drugs'].unique()

all_combinations = pd.MultiIndex.from_product([patients, months, drugs], names=['patient', 'Month', 'Drugs'])
all_combinations_df = pd.DataFrame(index=all_combinations).reset_index()

merged_df = pd.merge(all_combinations_df, DIA_Drug_Histories, on=['patient', 'Month', 'Drugs'], how='left').fillna(0)


In [None]:
merged_df

In [None]:
merged_df = merged_df.sort_values(['patient', 'Month', "Drugs"], ascending=[True, True, True])

merged_df = merged_df[merged_df['Drugs'] != 0]

In [None]:
pivot_df = merged_df.pivot_table(index='patient', columns=['Month', 'Drugs'], values='Exp', fill_value=0)
pivot_df = pivot_df.sort_index(axis=1, level=[0, 1])


In [None]:
pivot_df

In [None]:
patients = merged_df['patient'].unique()
months = merged_df['Month'].unique()
drugs = merged_df['Drugs'].unique()

n_patients = len(patients)
n_months = 60
n_drugs = len(drugs)


In [None]:
drugs

In [None]:
data_3d = pivot_df.values.reshape((n_patients, n_months, n_drugs))
data_3d

In [None]:
print(data_3d.shape)
n_patients, n_months, n_drugs = data_3d.shape

In [None]:
X = data_3d[:, :-1, :]  # All months except the last for input
y = data_3d[:, 1:, :]   # All months except the first for target (predict next month)

In [None]:
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_months-1, n_drugs)))
model.add(TimeDistributed(Dense(n_drugs, activation='sigmoid')))  # Output layer with sigmoid activation for binary output
model.compile(optimizer='adam', loss='binary_crossentropy')  # Use binary cross-entropy loss for binary classification


# Train the model
history = model.fit(X, y, epochs=20, batch_size=32, verbose=1)


In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper right')
plt.show()

In [None]:
loss = model.evaluate(X, y, verbose=0)
print(f"Model Loss: {loss}")

In [None]:
# model.save('drug_usage_lstm_model.h5')

# loaded_model = load_model('drug_usage_lstm_model.h5')

In [None]:
# Initialize an empty list to store predictions
all_predictions = []

# Initialize the input data with the original data
input_data = data_3d.copy()

# Number of future months to predict
n_future_months = 12

# Predict drug usage for each future month
for _ in range(n_future_months):
    # Predict drug usage for the next month
    next_month_predictions = model.predict(input_data[:, -1:, :])
    
    next_month_predictions = np.where(next_month_predictions > 0.1, 1, 0)
    
    # Reshape next_month_predictions to match input_data dimensions
    next_month_predictions = next_month_predictions.reshape((next_month_predictions.shape[0], 1, next_month_predictions.shape[2]))
    

    # Append the predictions to the list of all predictions
    all_predictions.append(next_month_predictions)
    
    # Append the predictions to the input data for the next iteration
    input_data = np.concatenate([input_data, next_month_predictions], axis=1)
    
    # Remove the oldest month from the input data to maintain the same input shape
    input_data = input_data[:, 1:, :]

# Convert the list of predictions to a numpy array
all_predictions = np.array(all_predictions)

# all_predictions will have shape (n_future_months, n_patients, 1, n_drugs)


In [None]:
next_month_predictions

In [None]:
all_predictions.shape

In [None]:
all_predictions

In [None]:
binary_predictions = np.where(all_predictions > 0.5, 1, 0)
binary_predictions

In [None]:
input_data.shape
binary_predictions.shape
input_data.shape

In [None]:
count_zeros = np.count_nonzero(binary_predictions == 0)
count_ones = np.count_nonzero(binary_predictions == 1)

print("Number of 0s:", count_zeros)
print("Number of 1s:", count_ones)

In [None]:
drugs

In [None]:
# Sum along the axis representing the patients (axis 1)
sum_per_month_per_drug = np.sum(all_predictions, axis=1)

# Print the counts for each drug in each month
for month_index, counts_per_drug in enumerate(sum_per_month_per_drug):
    print(f"Month {month_index + 1}:")
    for drug_index, count in enumerate(counts_per_drug[0]):
        print(f"  Drug {drug_index + 1}: {count} 1s")

In [None]:
# Transpose binary_predictions to match the shape of input_data
transposed_predictions = all_predictions.transpose((1, 0, 2, 3))

# Initialize an empty array to store the appended data
appended_data = np.empty((50000, 72, 38))

# Copy the original data into the appended data
appended_data[:, :60, :] = input_data

# Append the predicted months to the appended data
appended_data[:, 60:, :] = transposed_predictions[:, :, 0, :]

# Verify the shape of the appended data
print("Shape of appended data:", appended_data.shape)


In [None]:
print("Shape of counts_per_drug_per_month:", counts_per_drug_per_month.shape)
print("Shape of appended_data:", appended_data.shape)

In [None]:
counts_per_drug_per_month = np.sum(appended_data, axis=(0))

counts_per_drug_per_month = counts_per_drug_per_month.reshape((72, 38))

# Plot the counts for each drug over time
import matplotlib.pyplot as plt

# Set up the figure and axis
plt.figure(figsize=(12, 6))
plt.title('Drug Usage Over Time')
plt.xlabel('Month')
plt.ylabel('Count of 1s')

# Plot each drug
for drug_index in range(counts_per_drug_per_month.shape[1]):
    plt.plot(range(1, 73), counts_per_drug_per_month[:, drug_index], label=f'Drug {drug_index + 1}')

# Add legend and show plot
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Plot the counts for each drug over time (filtering for drug indices >= 29)
plt.figure(figsize=(12, 6))
plt.title('Drug Usage Over Time')
plt.xlabel('Month')
plt.ylabel('Count of 1s')

# Plot each drug with index >= 29
for drug_index in range(31, counts_per_drug_per_month.shape[1]):
    plt.plot(range(1, 73), counts_per_drug_per_month[:, drug_index], label=f'Drug {drug_index + 1}')

# Add legend and show plot
plt.legend()
plt.grid(True)
plt.show()