In [1]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')
data=pd.read_csv("data/train_data.csv")

In [2]:
print(data.columns)
print(data.shape)

Index(['HADM_ID', 'SUBJECT_ID', 'GENDER', 'AGE', 'Admission Weight (Kg)',
       'Height (cm)', 'Heart Rate', 'Non Invasive Blood Pressure diastolic',
       'Non Invasive Blood Pressure mean',
       'Non Invasive Blood Pressure systolic', 'Respiratory Rate', 'stroke',
       'Tachycardia', 'Bradycardia', 'Hypertension', 'Hypotension',
       'Tachypnea', 'Bradypnea'],
      dtype='object')
(207408, 18)


In [3]:
new_columns = [col for col in data.columns if col != 'stroke'] + ['stroke']
print(new_columns)

['HADM_ID', 'SUBJECT_ID', 'GENDER', 'AGE', 'Admission Weight (Kg)', 'Height (cm)', 'Heart Rate', 'Non Invasive Blood Pressure diastolic', 'Non Invasive Blood Pressure mean', 'Non Invasive Blood Pressure systolic', 'Respiratory Rate', 'Tachycardia', 'Bradycardia', 'Hypertension', 'Hypotension', 'Tachypnea', 'Bradypnea', 'stroke']


In [4]:
data=data[new_columns]

In [5]:
data

Unnamed: 0,HADM_ID,SUBJECT_ID,GENDER,AGE,Admission Weight (Kg),Height (cm),Heart Rate,Non Invasive Blood Pressure diastolic,Non Invasive Blood Pressure mean,Non Invasive Blood Pressure systolic,Respiratory Rate,Tachycardia,Bradycardia,Hypertension,Hypotension,Tachypnea,Bradypnea,stroke
0,100018,58128,0,55,125.0,178.0,65.5,70.2,81.8,122.4,14.00,0,0,0,0,0,0,0
1,100018,58128,0,55,125.0,178.0,66.0,70.2,81.8,122.4,15.75,0,0,0,0,0,0,0
2,100018,58128,0,55,125.0,178.0,70.0,70.2,81.8,122.4,15.75,0,0,0,0,0,0,0
3,100018,58128,0,55,125.0,178.0,82.5,69.4,81.6,123.2,15.00,0,0,0,0,0,0,0
4,100018,58128,0,55,125.0,178.0,99.0,69.4,80.8,124.0,17.50,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207403,178565,46857,1,86,80.0,160.0,69.0,44.6,46.0,127.0,22.00,0,0,0,0,1,0,0
207404,178565,46857,1,86,80.0,160.0,64.0,49.0,73.0,142.0,24.00,0,0,0,0,1,0,0
207405,178565,46857,1,86,80.0,160.0,66.0,43.0,67.0,132.0,16.00,0,0,0,0,0,0,0
207406,178565,46857,1,86,80.0,160.0,67.0,69.6,86.4,140.0,22.00,0,0,0,0,1,0,0


In [6]:
#extractins subset of the top 10 patients with the most measurements
patient_counts = data['SUBJECT_ID'].value_counts()
top_10_patients = patient_counts.nlargest(10).index
subset_df_top10 = data[data['SUBJECT_ID'].isin(top_10_patients)]
subset_df_top10['SUBJECT_ID'].value_counts()

48872    1453
72908    1440
73695    1270
74438    1124
70191    1069
71059    1068
60393     988
70330     949
51385     928
40013     927
Name: SUBJECT_ID, dtype: int64

In [7]:
from sklearn.preprocessing import StandardScaler,RobustScaler
def static_scale(df):
    data=df.copy()
    scaler=RobustScaler()
    static_cols=["AGE",'Admission Weight (Kg)',"Height (cm)"]
    data[static_cols]=scaler.fit_transform(data[static_cols])
    return data,scaler

In [8]:
static_cols=["AGE",'Admission Weight (Kg)',"Height (cm)"]
scaled,scaler=static_scale(subset_df_top10)

In [9]:
from sklearn.preprocessing import StandardScaler,RobustScaler
from tqdm import tqdm
def dynamic_scale(df):
    final_df=pd.DataFrame()
    cols=['Heart Rate',
                  'Non Invasive Blood Pressure diastolic', 'Non Invasive Blood Pressure mean',
                 'Non Invasive Blood Pressure systolic', 'Respiratory Rate']
    for hadm in tqdm(df['HADM_ID'].unique()):
        scaler=RobustScaler()
        #extract the dataframe
        hadm_df=df[df['HADM_ID']==hadm]
        #scale the float cols
        hadm_df[cols]=scaler.fit_transform(hadm_df[cols])
#         print(hadm_df)
        final_df = pd.concat([final_df,hadm_df])
    return final_df

In [10]:
scaled=dynamic_scale(scaled)

100%|██████████| 10/10 [00:00<00:00, 222.97it/s]


In [24]:
#apply this function for eacxh unique admission
#so we won't mix timestamps from different admission in the same training sequence
def create_window(df):
    n_observation=4
    n_forecast=2
    n_target=1
    X,y=[],[]
    for adm_id in df['HADM_ID'].unique():
        data=df[df['HADM_ID']==adm_id]
#         target_cols=["stroke"]
#         target_df=data[target_cols]
    #     data.drop(target_cols,axis=1,inplace=True)
        for i in range(len(data)-6):
            X.append(data.iloc[i:i+n_observation,2:])
            y.append(data.iloc[i+n_observation+n_forecast:i+n_observation+n_forecast+n_target,-1])
    return np.array(X),np.array(y)

In [12]:
neg,pos=np.bincount(subset_df_top10['stroke'])
total=neg+pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

Examples:
    Total: 11216
    Positive: 992 (8.84% of total)

Weight for class 0: 0.55
Weight for class 1: 5.65


In [37]:
# Function to train and evaluate the model based on the provided architecture
from tensorflow import keras
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Bidirectional, LSTM, Dropout, Dense, Concatenate
from tensorflow.keras.regularizers import l2

def train_model(X_train, y_train, X_val, y_val, class_weight):
    # Define the model architecture
    n_static = 4
    n_timesteps = 4
    n_dynamic = 12
    n_output = 1

    # Define input layer
    recurrent_input = Input(shape=(n_timesteps, n_dynamic), name="TIMESERIES_INPUT")
    static_input = Input(shape=(n_static,), name="STATIC_INPUT")

    # RNN Layers
    # Layer - 1
    rec_layer_one = Bidirectional(LSTM(128, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), return_sequences=True), name="BIDIRECTIONAL_LAYER_1")(recurrent_input)
    rec_layer_one = Dropout(0.1, name="DROPOUT_LAYER_1")(rec_layer_one)

    # Layer - 2
    rec_layer_two = Bidirectional(LSTM(64, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01)), name="BIDIRECTIONAL_LAYER_2")(rec_layer_one)
    rec_layer_two = Dropout(0.1, name="DROPOUT_LAYER_2")(rec_layer_two)

    # SLP Layers
    static_layer_one = Dense(64, kernel_regularizer=l2(0.001), activation='relu', name="DENSE_LAYER_1")(static_input)

    # Combine layers - RNN + SLP
    combined = Concatenate(axis=1, name="CONCATENATED_TIMESERIES_STATIC")([rec_layer_two, static_layer_one])
    combined_dense_two = Dense(64, activation='relu', name="DENSE_LAYER_2")(combined)
    output = Dense(n_output, activation='sigmoid', name="OUTPUT_LAYER")(combined_dense_two)

    # Compile Model
    model = Model(inputs=[recurrent_input, static_input], outputs=[output])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[
        keras.metrics.BinaryAccuracy(name='accuracy'),
        #keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall')
        #keras.metrics.AUC(name='auc'),
        #keras.metrics.AUC(name='prc', curve='PR')  # precision-recall curve
    ])

    # Extract static features for training and validation sets
    X_train_static = X_train[:, 0, :4]
    X_val_static = X_val[:, 0, :4]

    # Fit the model
    history = model.fit([X_train[:, :, 4:], X_train_static], y_train,
                        epochs=10,
                        batch_size=32,
                        class_weight=class_weight)
    results=model.evaluate([X_val[:, :, 4:], X_val_static],y_val)
    print(results)
    return results




In [14]:
min_hours=subset_df_top10['SUBJECT_ID'].value_counts().min()
n_blocks=5
block_size=int(min_hours/n_blocks)
print(block_size)

185


In [15]:
subset_df_top10[~subset_df_top10['SUBJECT_ID'].isin([40013])]

Unnamed: 0,HADM_ID,SUBJECT_ID,GENDER,AGE,Admission Weight (Kg),Height (cm),Heart Rate,Non Invasive Blood Pressure diastolic,Non Invasive Blood Pressure mean,Non Invasive Blood Pressure systolic,Respiratory Rate,Tachycardia,Bradycardia,Hypertension,Hypotension,Tachypnea,Bradypnea,stroke
21384,108174,73695,1,44,77.3,168.0,105.0,82.0,85.0,96.0,23.0,1,0,0,0,1,0,0
21385,108174,73695,1,44,77.3,168.0,101.0,57.0,67.0,95.0,21.0,1,0,0,0,1,0,0
21386,108174,73695,1,44,77.3,168.0,88.0,58.0,66.0,87.0,23.0,0,0,0,1,1,0,0
21387,108174,73695,1,44,77.3,168.0,91.0,62.0,68.0,84.0,16.0,0,0,0,0,0,0,0
21388,108174,73695,1,44,77.3,168.0,94.0,60.0,65.0,94.0,16.0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201921,176846,72908,1,42,67.8,165.0,87.0,83.0,95.0,133.0,29.0,0,0,1,0,1,0,0
201922,176846,72908,1,42,67.8,165.0,90.0,102.0,112.0,150.0,28.0,0,0,1,0,1,0,1
201923,176846,72908,1,42,67.8,165.0,88.0,99.0,118.0,169.0,26.0,0,0,1,0,1,0,1
201924,176846,72908,1,42,67.8,165.0,94.0,70.0,90.0,148.0,29.0,0,0,0,0,1,0,0


In [16]:
def get_val_patient(df):
    counts_df=df['SUBJECT_ID'].value_counts()
    min_index=counts_df.idxmin()
    val_patient=df[df['SUBJECT_ID']==min_index]
    return val_patient,val_patient['SUBJECT_ID'].unique()[0]

In [17]:
subset_df_top10.iloc[0:185]

Unnamed: 0,HADM_ID,SUBJECT_ID,GENDER,AGE,Admission Weight (Kg),Height (cm),Heart Rate,Non Invasive Blood Pressure diastolic,Non Invasive Blood Pressure mean,Non Invasive Blood Pressure systolic,Respiratory Rate,Tachycardia,Bradycardia,Hypertension,Hypotension,Tachypnea,Bradypnea,stroke
21384,108174,73695,1,44,77.3,168.0,105.0,82.0,85.0,96.0,23.0,1,0,0,0,1,0,0
21385,108174,73695,1,44,77.3,168.0,101.0,57.0,67.0,95.0,21.0,1,0,0,0,1,0,0
21386,108174,73695,1,44,77.3,168.0,88.0,58.0,66.0,87.0,23.0,0,0,0,1,1,0,0
21387,108174,73695,1,44,77.3,168.0,91.0,62.0,68.0,84.0,16.0,0,0,0,0,0,0,0
21388,108174,73695,1,44,77.3,168.0,94.0,60.0,65.0,94.0,16.0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21564,108174,73695,1,44,92.0,168.0,87.0,61.0,67.0,89.0,34.0,0,0,0,0,1,0,0
21565,108174,73695,1,44,92.0,168.0,83.0,68.0,63.0,95.0,31.0,0,0,0,0,1,0,0
21566,108174,73695,1,44,92.0,168.0,84.0,73.0,79.0,97.0,38.0,0,0,0,0,1,0,0
21567,108174,73695,1,44,92.0,168.0,83.0,73.0,78.0,95.0,32.0,0,0,0,0,1,0,0


In [18]:
# Function to compute class weights for imbalanced data, with handling for single-class dataframes
def compute_weights(df, target_column='stroke'):
    bincount_result = np.bincount(df[target_column])
    
    # Handle the case where the DataFrame contains only one class
    if len(bincount_result) == 1:
        if df[target_column].iloc[0] == 0:
            neg = bincount_result[0]
            pos = 0
        else:
            pos = bincount_result[0]
            neg = 0
    else:
        neg, pos = bincount_result
    
    total = neg + pos
    
    # Avoid division by zero when calculating weights
    weight_for_0 = (1 / (neg if neg > 0 else 1)) * (total / 2.0)
    weight_for_1 = (1 / (pos if pos > 0 else 1)) * (total / 2.0)
    
    # Create a dictionary to store the class weights
    class_weight = {0: weight_for_0, 1: weight_for_1}
    
    # Print and return the computed class weights
    print(f'Examples:\n    Total: {total}\n    Positive: {pos} ({100 * pos / total if total > 0 else 0:.2f}% of total)\n')
    print(f'Weight for class 0: {weight_for_0:.2f}')
    print(f'Weight for class 1: {weight_for_1:.2f}')
    
    return class_weight

# Test the function with a DataFrame containing only one class
dummy_df_single_class = pd.DataFrame({'stroke': [0, 0, 0, 0, 0]})
compute_weights(dummy_df_single_class)


Examples:
    Total: 5
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 2.50


{0: 0.5, 1: 2.5}

In [66]:
def custom_cross_val(df,val_id,n_blocks=5):
    block_metrics=[]
    val_patient=df[df['SUBJECT_ID']==val_id]
    print(val_patient['stroke'].value_counts())
    #calculate the block_size
    min_hours=subset_df_top10['SUBJECT_ID'].value_counts().min()
    block_size=int(min_hours/n_blocks)
    start = 0
    end = block_size
    #print(subset_df_top10['stroke'].iloc[0:740].value_counts())
    #print("#######################")
    #print(f"Cross validation for patient ID: {val_id}")
    for fold in range(n_blocks):
        #initialize the x_train and x_test X_train y_train y_test numpy arrays 
        train_list,label_list=[],[]
        train_df=pd.DataFrame()
        print("#######################")
        print(f"Fold number: {fold + 1}")
        print("#######################")
        #train on all patients and validate on ony one 
        # Determine 80-20 split index within the block
        split_idx = int(block_size * 0.8)
        #print(start, end, split_idx)
        #print(end-(start+split_idx))
        #create the sequence for each patient separately then concatenate
        for p in subset_df_top10['SUBJECT_ID'].unique():
            patient_data=subset_df_top10[subset_df_top10['SUBJECT_ID']==p]
            # Create training subset for this patient
            train = patient_data.iloc[start:start+split_idx]
            #print('len_train_patient ',len(train))
            train_seq, train_label = create_window(train)
            #print(len(train_seq))
            #append to X_train and y_train and train_df
            train_list.append(train_seq)
            label_list.append(train_label)
            train_df=train_df.append(train)
        #append to X_train and y_train
        X_train=np.vstack(train_list)
        y_train=np.vstack(label_list)

        test=subset_df_top10[subset_df_top10['SUBJECT_ID']==val_id][start+split_idx:end]
        #print("train_df length ",len(test))
        X_test, y_test = create_window(test)
        #print("X_test length ",len(X_test))
        print(f"X_train shape: {X_train.shape}, y_train.shape: {y_train.shape}")
        print(f"X_test shape: {X_test.shape}, y_test.shape: {y_test.shape}")
        #print(f"train_df shape: {train_df.shape}")
        weights=compute_weights(train_df[train_df['SUBJECT_ID']==val_id])
        print(weights)
        results = train_model(X_train, y_train, X_test, y_test, weights)
        block_metrics.append({"fold":fold+1,
                             "results":results})
        start = end
        end += block_size
    return block_metrics

In [67]:
def nested_cross_val(df):
    global_metrics=[]
    for idx,val_id in enumerate(df['SUBJECT_ID'].unique()):
        print("##############")
        print(f"iteration {idx+1}")
        print("cross-validation for patient: ",val_id)
        print("##############")
        pat_res=custom_cross_val(df,val_id,n_blocks=5)
        global_metrics.append({"validation patient ID":val_id,
                             "results":pat_res})
    return global_metrics

In [68]:
final_metrics=nested_cross_val(subset_df_top10)

##############
iteration 1
cross-validation for patient:  73695
##############
0    1269
1       1
Name: stroke, dtype: int64
#######################
Fold number: 1
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[4.418652534484863, 0.0, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.37805941700935364, 1.0, 0.0]


X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 1 (0.68% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5034013605442177, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[5.760497093200684, 0.0, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.6409904360771179, 0.9677419066429138, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (

X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.19342175126075745, 1.0, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.270153284072876, 1.0, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 1

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.2534433603286743, 1.0, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.3366040289402008, 1.0, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 1 (0.68% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5034013605442177, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.8335959911346436, 0.29032257199287415, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 1 (0.68% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5034013605442177, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.37879300117492676, 0.9032257795333862, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Ep

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.2077270746231079, 1.0, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 1 (0.68% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5034013605442177, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[1.0055633783340454, 0.4516128897666931, 1.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 2 (1.35% of total)

Weight for class 0: 0.51
Weight for class 1: 37.00
{0: 0.5068493150684932, 1: 37.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoc

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.6749556064605713, 0.6451612710952759, 1.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 79 (53.38% of total)

Weight for class 0: 1.07
Weight for class 1: 0.94
{0: 1.0724637681159421, 1: 0.9367088607594937}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.3021685481071472, 0.9354838728904724, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 46 (31.08% of total)

Weight for class 0: 0.73
Weight for class 1: 1.61
{0: 0.7254901960784313, 1: 1.608695652173913}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoc

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.21969221532344818, 1.0, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.8494916558265686, 0.6774193644523621, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.6149103045463562, 0.9354838728904724, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.3854163885116577, 1.0, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 74.00
{0: 0.5, 1: 74.0}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.30935528

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[1.1659201383590698, 0.4193548262119293, 0.0]
#######################
Fold number: 2
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 36 (24.32% of total)

Weight for class 0: 0.66
Weight for class 1: 2.06
{0: 0.6607142857142857, 1: 2.0555555555555554}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.5575591921806335, 0.8064516186714172, 0.0]
#######################
Fold number: 3
#######################
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
Examples:
    Total: 148
    Positive: 38 (25.68% of total)

Weight for class 0: 0.67
Weight for class 1: 1.95
{0: 0.6727272727272727, 1: 1.9473684210526314}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epo

In [70]:
for metric_dic in final_metrics:
    for k,v in metric_dic.items():
        print(k)
        print(v)
    

validation patient ID
73695
results
[{'fold': 1, 'results': [4.418652534484863, 0.0, 0.0]}, {'fold': 2, 'results': [0.37805941700935364, 1.0, 0.0]}, {'fold': 3, 'results': [2.50274920463562, 0.0, 0.0]}, {'fold': 4, 'results': [0.3759647607803345, 1.0, 0.0]}, {'fold': 5, 'results': [0.6404802799224854, 1.0, 0.0]}]
validation patient ID
71059
results
[{'fold': 1, 'results': [5.760497093200684, 0.0, 0.0]}, {'fold': 2, 'results': [0.6409904360771179, 0.9677419066429138, 0.0]}, {'fold': 3, 'results': [2.6720709800720215, 0.0, 0.0]}, {'fold': 4, 'results': [0.7266538739204407, 0.6774193644523621, 0.0]}, {'fold': 5, 'results': [0.2965176999568939, 0.9677419066429138, 0.0]}]
validation patient ID
48872
results
[{'fold': 1, 'results': [0.19342175126075745, 1.0, 0.0]}, {'fold': 2, 'results': [0.270153284072876, 1.0, 0.0]}, {'fold': 3, 'results': [2.2693934440612793, 0.0, 0.0]}, {'fold': 4, 'results': [1.479731798171997, 0.29032257199287415, 0.0]}, {'fold': 5, 'results': [1.1136205196380615, 0.03

In [99]:
final_metrics[0]['results'][0]['results']

[4.418652534484863, 0.0, 0.0]

In [103]:
average_results = {}

for metric_dict in final_metrics:
    
    loss = []
    acc = []
    recall = []  
    # Loop through each fold's results
    for fold in metric_dict['results']:
        results = fold['results']
        loss.append(results[0])
        acc.append(results[1])
        recall.append(results[2])

    # Calculate average for each metric across all folds
    avg_loss = np.mean(loss)
    avg_acc = np.mean(acc)
    avg_recall = np.mean(recall)

    # Store average results for this validation patient ID
    average_results[metric_dict['validation patient ID']] = [avg_loss, avg_acc, avg_recall]

average_results


{73695: [1.6631812393665313, 0.6, 0.0],
 71059: [2.0193460166454313, 0.5225806355476379, 0.0],
 48872: [1.0652641594409942, 0.4645161271095276, 0.0],
 40013: [0.5590966939926147, 0.8451612889766693, 0.0],
 60393: [0.39842030107975007, 0.8129032194614411, 0.0],
 70330: [0.4796446353197098, 0.8193548321723938, 0.2],
 74438: [0.7917275905609131, 0.6774193465709686, 0.6],
 51385: [0.5824076861143113, 0.8580645084381103, 0.0],
 70191: [0.7392233699560166, 0.7870967745780945, 0.0],
 72908: [0.7280100345611572, 0.6903225839138031, 0.0]}

In [110]:
acc=[]
rec=[]
for metrics in average_results.values():
    acc.append(metrics[1])
print(np.mean(acc))
    

0.7077419316768645


- error analysis
1. iteration 1
cross-validation for patient 73695:  

0    1269

1       1

2. iteration 2
cross-validation for patient 71059:  

0    1054

1      14

3. iteration 3
cross-validation for patient 48872:  

0    1443

1      10

4. iteration 4
cross-validation for patient 40013:  

0    926

1      1

5. iteration 5
cross-validation for patient 60393:  

0    985

1      3

6. iteration 6
cross-validation for patient 70330:  

0    939

1     10

7. iteration 7
cross-validation for patient 74438:  

1    567

0    557

8. iteration 8
cross-validation for patient 51385:  

0    927

1      1

9. iteration 9
cross-validation for patient 70191:  

0    1067

1       2

10. iteration 10
cross-validation for patient 72908:  

0    1057

1     383
 


## computing weights across the entire stack of patients

In [134]:
def custom_cross_val_v2(df,val_id,n_blocks=5):
    block_metrics=[]
    val_patient=df[df['SUBJECT_ID']==val_id]
    print(val_patient['stroke'].value_counts())
    #calculate the block_size
    min_hours=subset_df_top10['SUBJECT_ID'].value_counts().min()
    block_size=int(min_hours/n_blocks)
    start = 0
    end = block_size
    #print(subset_df_top10['stroke'].iloc[0:740].value_counts())
    #print("#######################")
    #print(f"Cross validation for patient ID: {val_id}")
    for fold in range(n_blocks):
        #initialize the x_train and x_test X_train y_train y_test numpy arrays 
        train_list,label_list=[],[]
        train_df=pd.DataFrame()
        print("#######################")
        print(f"Fold number: {fold + 1}")
        print("#######################")
        print("start pointer: ",start)
        print("end pointer; ",end)
        #train on all patients and validate on ony one 
        # Determine 80-20 split index within the block
        split_idx = int(block_size * 0.8)
        #print(start, end, split_idx)
        #print(end-(start+split_idx))
        #create the sequence for each patient separately then concatenate
        for p in subset_df_top10['SUBJECT_ID'].unique():
            patient_data=subset_df_top10[subset_df_top10['SUBJECT_ID']==p]
            # Create training subset for this patient
            train = patient_data.iloc[start:start+split_idx]
            #print('len_train_patient ',len(train))
            train_seq, train_label = create_window(train)
            #print(len(train_seq))
            #append to X_train and y_train and train_df
            train_list.append(train_seq)
            label_list.append(train_label)
            train_df=train_df.append(train)
        #append to X_train and y_train
        X_train=np.vstack(train_list)
        y_train=np.vstack(label_list)

        test=subset_df_top10[subset_df_top10['SUBJECT_ID']==val_id][start+split_idx:end]
        #print("train_df length ",len(test))
        X_test, y_test = create_window(test)
        #print("X_test length ",len(X_test))
        print(f"X_train shape: {X_train.shape}, y_train.shape: {y_train.shape}")
        print(f"X_test shape: {X_test.shape}, y_test.shape: {y_test.shape}")
        #print(f"train_df shape: {train_df.shape}")
        print("class distribution for train_df")
        print(compute_weights(train_df))
        print("class distribution for test_df")
        print(compute_weights(test))
        results = train_model(X_train, y_train, X_test, y_test, compute_weights(train_df))
        block_metrics.append({"fold":fold+1,
                             "results":results})
        start = end
        end += block_size
    return block_metrics

In [135]:
def nested_cross_val_2(df):
    global_metrics=[]
    for idx,val_id in enumerate(df['SUBJECT_ID'].unique()):
        print("##############")
        print(f"iteration {idx+1}")
        print("cross-validation for patient: ",val_id)
        print("##############")
        pat_res=custom_cross_val_v2(df,val_id,n_blocks=5)
        global_metrics.append({"validation patient ID":val_id,
                             "results":pat_res})
    return global_metrics

In [136]:
final_metrics_v2=nested_cross_val_2(subset_df_top10)

##############
iteration 1
cross-validation for patient:  73695
##############
0    1269
1       1
Name: stroke, dtype: int64
#######################
Fold number: 1
#######################
start pointer:  0
end pointer;  185
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
{0: 0.538965768390386, 1: 6.915887850467289}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.054001566022634506, 1.0, 0.0]
#######################
Fold number: 2
#######################
start

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.028042413294315338, 1.0, 0.0]
##############
iteration 2
cross-validation for patient:  71059
##############
0    1054
1      14
Name: stroke, dtype: int64
#######################
Fold number: 1
#######################
start pointer:  0
end pointer;  185
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
{0: 0.538965768390386, 1: 6.915887850467289}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Ep

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.38926467299461365, 0.8387096524238586, 0.0]
#######################
Fold number: 5
#######################
start pointer:  740
end pointer;  925
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 126 (8.51% of total)

Weight for class 0: 0.55
Weight for class 1: 5.87
{0: 0.5465288035450517, 1: 5.873015873015873}
class distribution for test_df
Examples:
    Total: 37
    Positive: 2 (5.41% of total)

Weight for class 0: 0.53
Weight for class 1: 9.25
{0: 0.5285714285714286, 1: 9.25}
Examples:
    Total: 1480
    Positive: 126 (8.51% of total)

Weight for class 0: 0.55
Weight for class 1: 5.87
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.20707857608795166, 0.9677419066429138, 0.0]
##############
iteration 3


Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.32168757915496826, 1.0, 0.0]
#######################
Fold number: 4
#######################
start pointer:  555
end pointer;  740
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 124 (8.38% of total)

Weight for class 0: 0.55
Weight for class 1: 5.97
{0: 0.5457227138643068, 1: 5.967741935483871}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 124 (8.38% of total)

Weight for class 0: 0.55
Weight for class 1: 5.97
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.08172821253538132, 1.0, 0.0]
#######################
Fold number: 5
#######################
start pointer:  740

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.0596129447221756, 1.0, 0.0]
#######################
Fold number: 3
#######################
start pointer:  370
end pointer;  555
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 88 (5.95% of total)

Weight for class 0: 0.53
Weight for class 1: 8.41
{0: 0.5316091954022989, 1: 8.40909090909091}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 88 (5.95% of total)

Weight for class 0: 0.53
Weight for class 1: 8.41
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.06318534165620804, 1.0, 0.0]
#######################
Fold number: 4
#######################
start pointer:  555
end pointer;  

Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.021406712010502815, 1.0, 0.0]
#######################
Fold number: 2
#######################
start pointer:  185
end pointer;  370
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 117 (7.91% of total)

Weight for class 0: 0.54
Weight for class 1: 6.32
{0: 0.5429200293470287, 1: 6.324786324786325}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 117 (7.91% of total)

Weight for class 0: 0.54
Weight for class 1: 6.32
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.14280150830745697, 1.0, 0.0]
#######################
Fold number: 3
#######################
start pointer:  370
end pointer;  555
X_

Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.043864425271749496, 1.0, 0.0]
##############
iteration 6
cross-validation for patient:  70330
##############
0    939
1     10
Name: stroke, dtype: int64
#######################
Fold number: 1
#######################
start pointer:  0
end pointer;  185
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
{0: 0.538965768390386, 1: 6.915887850467289}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.04489242658019066, 1.0, 0.0]
####

Epoch 9/10
Epoch 10/10
[0.1268790066242218, 1.0, 0.0]
#######################
Fold number: 5
#######################
start pointer:  740
end pointer;  925
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 126 (8.51% of total)

Weight for class 0: 0.55
Weight for class 1: 5.87
{0: 0.5465288035450517, 1: 5.873015873015873}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 126 (8.51% of total)

Weight for class 0: 0.55
Weight for class 1: 5.87
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.06348934024572372, 1.0, 0.0]
##############
iteration 7
cross-validation for patient:  74438
##############
1    567
0    557
Name: stroke, dtype: int64
##############

Epoch 10/10
[3.5375757217407227, 0.29032257199287415, 1.0]
#######################
Fold number: 4
#######################
start pointer:  555
end pointer;  740
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 124 (8.38% of total)

Weight for class 0: 0.55
Weight for class 1: 5.97
{0: 0.5457227138643068, 1: 5.967741935483871}
class distribution for test_df
Examples:
    Total: 37
    Positive: 14 (37.84% of total)

Weight for class 0: 0.80
Weight for class 1: 1.32
{0: 0.8043478260869565, 1: 1.3214285714285714}
Examples:
    Total: 1480
    Positive: 124 (8.38% of total)

Weight for class 0: 0.55
Weight for class 1: 5.97
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.8271055817604065, 0.6129032373428345, 0.8999999761581421]
#######################
Fold number: 5
#######################
start point

[0.056035444140434265, 1.0, 0.0]
#######################
Fold number: 3
#######################
start pointer:  370
end pointer;  555
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 88 (5.95% of total)

Weight for class 0: 0.53
Weight for class 1: 8.41
{0: 0.5316091954022989, 1: 8.40909090909091}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 88 (5.95% of total)

Weight for class 0: 0.53
Weight for class 1: 8.41
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.05883586406707764, 1.0, 0.0]
#######################
Fold number: 4
#######################
start pointer:  555
end pointer;  740
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test

[0.5876544713973999, 0.9354838728904724, 0.0]
#######################
Fold number: 2
#######################
start pointer:  185
end pointer;  370
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 117 (7.91% of total)

Weight for class 0: 0.54
Weight for class 1: 6.32
{0: 0.5429200293470287, 1: 6.324786324786325}
class distribution for test_df
Examples:
    Total: 37
    Positive: 0 (0.00% of total)

Weight for class 0: 0.50
Weight for class 1: 18.50
{0: 0.5, 1: 18.5}
Examples:
    Total: 1480
    Positive: 117 (7.91% of total)

Weight for class 0: 0.54
Weight for class 1: 6.32
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.05058278888463974, 1.0, 0.0]
#######################
Fold number: 3
#######################
start pointer:  370
end pointer;  555
X_train shape: (1420, 4, 16), y_train.shape: 

X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
{0: 0.538965768390386, 1: 6.915887850467289}
class distribution for test_df
Examples:
    Total: 37
    Positive: 18 (48.65% of total)

Weight for class 0: 0.97
Weight for class 1: 1.03
{0: 0.9736842105263157, 1: 1.0277777777777777}
Examples:
    Total: 1480
    Positive: 107 (7.23% of total)

Weight for class 0: 0.54
Weight for class 1: 6.92
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.7745305299758911, 0.5806451439857483, 1.0]
#######################
Fold number: 2
#######################
start pointer:  185
end pointer;  370
X_train shape: (1420, 4, 16), y_train.shape: (1420, 1)
X_test shape: (31, 4, 16), y_test.shape: (31, 1)
class distribution for train_df
Examples:
   

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[2.239638566970825, 0.32258063554763794, 1.0]


In [127]:
average_results_2 = {}

for metric_dict in final_metrics_v2:
    
    loss = []
    acc = []
    recall = []  
    # Loop through each fold's results
    for fold in metric_dict['results']:
        results = fold['results']
        loss.append(results[0])
        acc.append(results[1])
        recall.append(results[2])

    # Calculate average for each metric across all folds
    avg_loss = np.mean(loss)
    avg_acc = np.mean(acc)
    avg_recall = np.mean(recall)

    # Store average results for this validation patient ID
    average_results_2[metric_dict['validation patient ID']] = [avg_loss, avg_acc, avg_recall]

average_results_2


{73695: [0.08168927133083344, 1.0, 0.0],
 71059: [0.49174468517303466, 0.7935483813285827, 0.0],
 48872: [0.07214270606637001, 1.0, 0.0],
 40013: [0.43570808619260787, 0.8193548381328583, 0.0],
 60393: [0.0496336467564106, 1.0, 0.0],
 70330: [0.20879362821578978, 0.9225806355476379, 0.0],
 74438: [2.0265417814254763, 0.4451612770557404, 0.9799999952316284],
 51385: [0.08599675446748734, 1.0, 0.0],
 70191: [0.15193893611431122, 0.9870967745780945, 0.0],
 72908: [1.1356263637542725, 0.5032258033752441, 0.6]}

In [128]:
avg_acc=np.mean([metrics[1] for metrics in average_results_2.values()])

In [129]:
res

0.8412903189659119