In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw


In [None]:
def preprocess_features(feature_set, column, number_of_windows):
    """
    Preprocess the feature set for a specific column and window size.
    """
    feature_set_b = pd.DataFrame({
        'SubjectId': feature_set['SubjectId'],
        'value': feature_set[column]
    })
    
    # Filter and reset the index for ClientId
    client_id = feature_set_b['SubjectId'].drop_duplicates(keep='first').reset_index(drop=True)
    
    return feature_set_b, client_id

In [None]:
def calculate_separation(feature_set_b, feature_mortality, number_of_windows):
    """
    Calculate the normalized separation distance average (nrmSepDA) for the feature set.
    """
    # Preprocess mortality feature set
    feature_mortality_filtered = feature_mortality[feature_mortality['SubjectId'].isin(feature_set_b['SubjectId'])]
    feature_mortality_filtered.sort_values(by=['SubjectId'], ascending=True, inplace=True)
    
    client_id = feature_mortality_filtered['SubjectId'].drop_duplicates(keep='first').reset_index(drop=True)
    
    feature_set_b_filtered = feature_set_b[feature_set_b['SubjectId'].isin(client_id)]
    
    y = feature_mortality_filtered['Death'].values
    x = feature_set_b_filtered['value'].values.reshape(-1, number_of_windows)
    
    # Scale the features
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)
    
    # Calculate the centroids
    x_0 = x_scaled[y == 0]
    x_1 = x_scaled[y == 1]
    
    cent_neg = np.mean(x_0, axis=0)
    cent_pos = np.mean(x_1, axis=0)
    
    # Calculate the separation distance using DTW
    cent_sep_d, _ = fastdtw(cent_neg, cent_pos, dist=euclidean)
    
    pos_sep_d = np.mean([fastdtw(cent_neg, row, dist=euclidean)[0] for row in x_1])
    neg_sep_d = np.mean([fastdtw(cent_pos, row, dist=euclidean)[0] for row in x_0])
    
    nrm_sep_da = (pos_sep_d + neg_sep_d) / 2
    return nrm_sep_da


In [None]:
results = []
feature_mortality = pd.read_csv('FeatureMortality.csv') # Adjust the file path as needed

for number_of_windows in [1, 2, 3, 6, 12, 45, 90]:
    file_name = f'MIMIC/DATA_{number_of_windows}.csv'
    feature_set = pd.read_csv(file_name)
    feature_set.drop("Unnamed: 0", axis=1, inplace=True)
    feature_set.sort_values(by=['SubjectId'], ascending=True, inplace=True)
    
    for column in feature_set.columns:
        if column == 'SubjectId':
            continue
        
        feature_set_b, client_id = preprocess_features(feature_set, column, number_of_windows)
        nrm_sep_da = calculate_separation(feature_set_b, feature_mortality, number_of_windows)
        
        results.append({
            'Window Size': number_of_windows,
            'Column': column,
            'nrmSepDA': nrm_sep_da
        })

# Calculate gradients and save results
results_df = pd.DataFrame(results)
results_df['Gradient'] = results_df.groupby('Column')['nrmSepDA'].transform(np.gradient)
results_df.to_csv('Gradient_Results.csv', index=False)

print("Processing and saving complete.")