In [None]:
import pandas as pd
import numpy as np

demography_df = pd.read_csv("patient_demographic.csv")
demography_df['date'] = pd.to_datetime(demography_df['date'])

In [None]:
seed = 5571

In [None]:
from sklearn.model_selection import train_test_split

def split_dats(df, seed):
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=seed, stratify=df['Label'])
    train_df, early_df = train_test_split(train_df, test_size=len(test_df), random_state=seed, stratify=train_df['Label'])
    
    del train_df['Label']
    del early_df['Label']
    del test_df['Label']


    return train_df, early_df, test_df

In [None]:
train_df, early_df, test_df = split_dats(demography_df, seed)

In [None]:
def calc_age(row):
    return row['timestamp'].year - row['date'].year + row['age']
    
df = pd.read_csv("feature_selected.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])


train_df = pd.merge(df, train_df, on='patient_id')
train_df['age'] = train_df.apply(calc_age, axis=1)
train_df.insert(4,'age2',0)
train_df.insert(5,'sex2',0)
train_df['sex2'] = train_df['sex']
train_df['age2'] = train_df['age']
train_df.drop(['age','sex','date'], axis=1, inplace=True)
train_df.rename(columns={'age2':'age','sex2':'sex'}, inplace=True)

In [None]:
early_df = pd.merge(df, early_df, on='patient_id')
early_df['age'] = early_df.apply(calc_age, axis=1)
early_df.insert(4,'age2',0)
early_df.insert(5,'sex2',0)
early_df['sex2'] = early_df['sex']
early_df['age2'] = early_df['age']
early_df.drop(['age','sex','date'], axis=1, inplace=True)
early_df.rename(columns={'age2':'age','sex2':'sex'}, inplace=True)

test_df = pd.merge(df, test_df, on='patient_id')
test_df['age'] = test_df.apply(calc_age, axis=1)
test_df.insert(4,'age2',0)
test_df.insert(5,'sex2',0)
test_df['sex2'] = test_df['sex']
test_df['age2'] = test_df['age']
test_df.drop(['age','sex','date'], axis=1, inplace=True)
test_df.rename(columns={'age2':'age','sex2':'sex'}, inplace=True)

In [None]:
def make_feature(df):
    patients = np.unique(df['patient_id'])
    feature = []
    target = []
    
    for p in patients:
        df_tmp = df[df['patient_id']==p]
        arr = df_tmp.values[:, :]
        
        for i in range(7, len(arr)-7):
            tmp_f = arr[i-7:i, 2:-1]
            tmp_t = arr[i+6,-1]
    
            if pd.isna(tmp_t) or pd.isna(tmp_f).sum() >= 1:
                continue
    
            feature.append(tmp_f)
            target.append(tmp_t)

    return np.array(feature).astype(float), np.array(target).reshape(-1, 1)

train_feature, train_target = make_feature(train_df)
earlystop_feature, earlystop_target = make_feature(early_df)
test_feature, test_target = make_feature(test_df)

In [None]:
tmp = np.unique(train_target, return_counts=True)

In [None]:
import math

max_count = np.max(tmp[1])

my_dict = {}
for i in range(len(tmp[0])):
    k = tmp[0][i]
    v = tmp[1][i]
    if i == 0:
        my_dict[k] = v * (len(tmp[0])) * 4
    else:
        my_dict[k] = round(v * math.ceil(max_count/v) * 5.5)

In [None]:
shape = train_feature.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

train_feature = train_feature.reshape(len(train_feature), -1)
earlystop_feature = earlystop_feature.reshape(len(earlystop_feature), -1)
test_feature = test_feature.reshape(len(test_feature), -1)

scaler = MinMaxScaler()
train_feature = scaler.fit_transform(train_feature)
earlystop_feature = scaler.transform(earlystop_feature)
test_feature = scaler.transform(test_feature)

In [None]:
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state = seed, sampling_strategy=my_dict)
try:
    train_feature_SMOTE, train_target_SMOTE = sm.fit_resample(train_feature, train_target)
    train_target_SMOTE = train_target_SMOTE.reshape(len(train_target_SMOTE),1)
except:
    try:
        sm = SMOTE(random_state = seed, sampling_strategy=my_dict, k_neighbors=4)
        train_feature_SMOTE, train_target_SMOTE = sm.fit_resample(train_feature, train_target)
        train_target_SMOTE = train_target_SMOTE.reshape(len(train_target_SMOTE),1)
    except:
        try:
            sm = SMOTE(random_state = seed, sampling_strategy=my_dict, k_neighbors=3)
            train_feature_SMOTE, train_target_SMOTE = sm.fit_resample(train_feature, train_target)
            train_target_SMOTE = train_target_SMOTE.reshape(len(train_target_SMOTE),1)
        except:
            try:
                sm = SMOTE(random_state = seed, sampling_strategy=my_dict, k_neighbors=2)
                train_feature_SMOTE, train_target_SMOTE = sm.fit_resample(train_feature, train_target)
                train_target_SMOTE = train_target_SMOTE.reshape(len(train_target_SMOTE),1)
            except:
                sm = SMOTE(random_state = seed, sampling_strategy=my_dict, k_neighbors=1)
                train_feature_SMOTE, train_target_SMOTE = sm.fit_resample(train_feature, train_target)
                train_target_SMOTE = train_target_SMOTE.reshape(len(train_target_SMOTE),1)

In [None]:
train_feature_SMOTE = train_feature_SMOTE.reshape(len(train_feature_SMOTE), shape[1], shape[2]).astype(float)
earlystop_feature = earlystop_feature.reshape(len(earlystop_feature), shape[1], shape[2]).astype(float)
test_feature = test_feature.reshape(len(test_feature), shape[1], shape[2]).astype(float)

In [None]:
def make_multi_label(arr_target):
    tmp_target = []
    for i in range(len(arr_target)):
        row = []
        if arr_target[i][0] == 0:
            row.append(0)
        elif arr_target[i][0] == 1:
            row.append(1)
        elif arr_target[i][0] == 2:
            row.append(1)
        elif arr_target[i][0] == 4:
            row.append(1)

        tmp_target.append(row)

    return np.array(tmp_target).reshape(len(tmp_target), -1)

train_target_SMOTE = make_multi_label(train_target_SMOTE)
earlystop_target = make_multi_label(earlystop_target)
test_target = make_multi_label(test_target)

In [None]:
from tensorflow.keras.layers import Input, Masking, LSTM, Dropout, MultiHeadAttention, Flatten, Dense, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Precision, Recall, PrecisionAtRecall
from tensorflow.keras.losses import BinaryFocalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

input_layer = Input(shape=shape[1:])

layer_01_lstm = LSTM(32, return_sequences=True)(input_layer)
layer_02_dropout = Dropout(0.2)(layer_01_lstm)

layer_03_lstm = LSTM(24, return_sequences=True)(layer_02_dropout)
layer_04_dropout = Dropout(0.2)(layer_03_lstm)

layer_05_lstm = LSTM(16, return_sequences=True)(layer_04_dropout)
layer_06_dropout = Dropout(0.2)(layer_05_lstm)

layer_07_lstm = LSTM(8, return_sequences=True)(layer_06_dropout)
layer_08_dropout = Dropout(0.2)(layer_07_lstm)

layer_attention = MultiHeadAttention(num_heads=16, key_dim=16)(layer_08_dropout, layer_08_dropout)
layer_flatten = Flatten()(layer_attention)

layer_09_dense = Dense(96,  activation='relu')(layer_flatten)
layer_10_dropout = Dropout(0.2)(layer_09_dense)
layer_11_normalization = LayerNormalization()(layer_10_dropout)

layer_12_dense = Dense(64,  activation='relu')(layer_11_normalization)
layer_13_dropout = Dropout(0.2)(layer_12_dense)
layer_14_normalization = LayerNormalization()(layer_13_dropout)

layer_15_dense = Dense(32,  activation='relu')(layer_14_normalization)
layer_16_dropout = Dropout(0.2)(layer_15_dense)
layer_17_normalization = LayerNormalization()(layer_16_dropout)

layer_depressive = Dense(1, activation='sigmoid', name='depressive')(layer_17_normalization)

model = Model(inputs=input_layer, outputs=layer_depressive)

model.compile(optimizer=Adam(learning_rate=ExponentialDecay(0.000001, decay_steps=1000, decay_rate=0.99)), loss=BinaryFocalCrossentropy(), metrics=[Precision(), Recall(), PrecisionAtRecall(0.7)])

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_recall',mode='max', patience=10, verbose=1,restore_best_weights=True, start_from_epoch=5)
model.fit(train_feature_SMOTE, train_target_SMOTE, epochs = 1000, verbose=1, validation_data=(earlystop_feature, earlystop_target), callbacks=[early_stop])

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_curve, precision_recall_curve, auc


y_pred_proba = model.predict(test_feature)
y_pred = np.round(y_pred_proba)

precision = precision_score(test_target, y_pred)
recall = recall_score(test_target, y_pred)
f1 = f1_score(test_target, y_pred)

fpr, tpr, thresholds = roc_curve(test_target, y_pred_proba)
auroc = auc(fpr, tpr)
precision_prc, recall_prc, thresholds = precision_recall_curve(test_target, y_pred_proba)
auprc = auc(recall_prc, precision_prc)

In [None]:
[precision, recall, f1, auroc, auprc]

In [None]:
model.save('LSTM_Feature_DLMO_CR_{}.h5'.format(seed))