In [1]:
import pytz
import os


DEFAULT_TZ = pytz.FixedOffset(540)  # GMT+09:00; Asia/Seoul

PATH_DATA = 'data/D'
PATH_ESM = os.path.join(PATH_DATA, 'EsmResponse.csv')
PATH_PARTICIPANT = os.path.join(PATH_DATA, 'UserInfo.csv')
PATH_SENSOR = os.path.join(PATH_DATA, 'Sensor')

PATH_INTERMEDIATE = os.path.join('/home/uzair/Stress/StressDetection_Collaboration/Data_Processing_D1/data/intermediate')
RANDOM_STATE = 42

In [2]:
import pandas as pd
import numpy as np
import scipy.stats as st
import cloudpickle
import ray
from datetime import datetime
from contextlib import contextmanager
import warnings
import time


def load(path: str):
    with open(path, mode='rb') as f:
        return cloudpickle.load(f)

    
def dump(obj, path: str):
    with open(path, mode='wb') as f:
        cloudpickle.dump(obj, f)
        
    
def log(msg: any):
    print('[{}] {}'.format(datetime.now().strftime('%y-%m-%d %H:%M:%S'), msg))


def summary(x):
    x = np.asarray(x)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        n = len(x)
        # Here, uppercase np.dtype.kind corresponds to non-numeric data.
        # Also, we view the boolean data as dichotomous categorical data.
        if x.dtype.kind.isupper() or x.dtype.kind == 'b': 
            cnt = pd.Series(x).value_counts(dropna=False)
            card = len(cnt)
            cnt = cnt[:20]                
            cnt_str = ', '.join([f'{u}:{c}' for u, c in zip(cnt.index, cnt)])
            if card > 30:
                cnt_str = f'{cnt_str}, ...'
            return {
                'n': n,
                'cardinality': card,
                'value_count': cnt_str
            }
        else: 
            x_nan = x[np.isnan(x)]
            x_norm = x[~np.isnan(x)]
            
            tot = np.sum(x_norm)
            m = np.mean(x_norm)
            me = np.median(x_norm)
            s = np.std(x_norm, ddof=1)
            l, u = np.min(x_norm), np.max(x)
            conf_l, conf_u = st.t.interval(0.95, len(x_norm) - 1, loc=m, scale=st.sem(x_norm))
            n_nan = len(x_nan)
            
            return {
                'n': n,
                'sum': tot,
                'mean': m,
                'SD': s,
                'med': me,
                'range': (l, u),
                'conf.': (conf_l, conf_u),
                'nan_count': n_nan
            }

In [3]:
p = os.path.join(PATH_INTERMEDIATE, 'stress-fixed.pkl')
X, y, groups, t, datetimes = load(p)

In [4]:
PARTICIPANTS = pd.read_csv(os.path.join(PATH_INTERMEDIATE, 'PARTICIPANT_INFO.csv'),index_col = 'pcode')
PINFO = PARTICIPANTS.assign(
    BFI_OPN=lambda x: x['openness'],
    BFI_CON=lambda x: x['conscientiousness'],
    BFI_NEU=lambda x: x['neuroticism'],
    BFI_EXT=lambda x: x['extraversion'],
    BFI_AGR=lambda x: x['agreeableness'],
)[[
    'BFI_OPN', 'BFI_CON', 'BFI_NEU', 'BFI_EXT', 'BFI_AGR'
]]
PINFO = pd.get_dummies(PINFO, prefix_sep='=', dtype=bool)

In [5]:
LABELS_PROC = pd.read_csv(os.path.join(PATH_INTERMEDIATE, 'LABELS_PROC.csv'), index_col=['pcode','timestamp'],parse_dates=True)

In [6]:
_df =LABELS_PROC
_df.reset_index(level='timestamp', inplace=True)
print('First timestamp:', _df['timestamp'].min())
print('Last timestamp:', _df['timestamp'].max())

First timestamp: 2019-04-30 10:03:28+09:00
Last timestamp: 2019-05-22 22:02:03+09:00


In [7]:
time_ranges = _df.groupby('pcode')['timestamp'].agg(['min', 'max'])

In [8]:
list_pid = set(LABELS_PROC.index.get_level_values('pcode').values)

In [9]:
PINFO_valid = PINFO.loc[PINFO.index.isin(list_pid)]

In [10]:
#Divide the features into different categories
feat_current = X.loc[:,[('#VAL' in str(x)) or ('ESM#LastLabel' in str(x)) for x in X.keys()]]  
feat_dsc = X.loc[:,[('#DSC' in str(x))  for x in X.keys()]]  
feat_yesterday = X.loc[:,[('Yesterday' in str(x))  for x in X.keys()]]  
feat_today = X.loc[:,[('Today' in str(x))  for x in X.keys()]]  
feat_sleep = X.loc[:,[('Sleep' in str(x))  for x in X.keys()]]  
feat_time = X.loc[:,[('Time' in str(x))  for x in X.keys()]]  
feat_pif = X.loc[:,[('PIF' in str(x))  for x in X.keys()]]  
feat_ImmediatePast = X.loc[:,[('ImmediatePast_15' in str(x))  for x in X.keys()]]
#Divide the time window features into sensor/past stress label
feat_current_sensor = X.loc[:,[('#VAL' in str(x))  for x in X.keys()]]  
feat_current_ESM = X.loc[:,[('ESM#LastLabel' in str(x)) for x in X.keys()]]  
feat_ImmediatePast_sensor = feat_ImmediatePast.loc[:,[('ESM' not in str(x)) for x in feat_ImmediatePast.keys()]]  
feat_ImmediatePast_ESM = feat_ImmediatePast.loc[:,[('ESM'  in str(x)) for x in feat_ImmediatePast.keys()]]  
feat_today_sensor = feat_today.loc[:,[('ESM' not in str(x))  for x in feat_today.keys()]]  
feat_today_ESM = feat_today.loc[:,[('ESM'  in str(x)) for x in feat_today.keys()]]  
feat_yesterday_sensor = feat_yesterday.loc[:,[('ESM' not in str(x)) for x in feat_yesterday.keys()]]  
feat_yesterday_ESM = feat_yesterday.loc[:,[('ESM'  in str(x)) for x in feat_yesterday.keys()]]
feat_baseline = pd.concat([ feat_time,feat_dsc,feat_current_sensor, feat_ImmediatePast_sensor],axis=1)
feat_final = pd.concat([feat_baseline  ], axis=1)
X = feat_final
cats = X.columns[X.dtypes == bool]

In [11]:
similar_user = pd.read_csv(os.path.join(PATH_INTERMEDIATE,  'similar_user_3.csv'))

In [12]:
import pandas as pd

# Convert groups to a pandas DataFrame
groups_df = pd.DataFrame(groups, columns=['pcode'])

# Ensure the data types of the 'pcode' and 'pcode' columns are the same
groups_df['pcode'] = groups_df['pcode'].astype(str)
similar_user['pcode'] = similar_user['pcode'].astype(str)

# Create a new Series that maps Pcode to cluster label
cluster_map = similar_user.set_index('pcode')['cluster']

# Create the 'cluster' column in the 'groups' DataFrame
groups_df['cluster'] = groups_df['pcode'].map(cluster_map)

In [13]:
cluster_map.head()

pcode
P01    2
P02    0
P03    2
P05    0
P06    1
Name: cluster, dtype: int64

In [14]:
# Create a dictionary to store data structures for each cluster
clusters = {}

# Loop through each unique cluster label in the Series
for cluster_label in cluster_map.unique():
    clusters[cluster_label] = cluster_map[cluster_map == cluster_label]

In [15]:
# XGBoost parameters
param = {
    "learning_rate": 0.01,
    "seed": RANDOM_STATE,
    "objective": 'binary:logistic',
    'eval_metric': 'auc',
    'verbosity': 0,
}

In [16]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from imblearn.over_sampling import SMOTENC, SMOTE
from tqdm import tqdm
from sklearn.model_selection import StratifiedGroupKFold, GroupKFold, KFold, TimeSeriesSplit, LeavePGroupsOut, train_test_split, GroupShuffleSplit
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder

In [17]:
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.base import clone

normalize =True
# Feature selection method
selector = SelectFromModel(
        estimator=LogisticRegression(penalty='l1', solver='liblinear', C=1, random_state=RANDOM_STATE, max_iter=4000),
        threshold=0.005
#         estimator=LinearSVC(
#         penalty='l1',
#         loss='squared_hinge',
#         dual=False,
#         tol=1e-3,
#         C=1e-2,
#         max_iter=5000,
#         random_state=RANDOM_STATE
#     )  
    )
select = [clone(selector)]
oversample = True

In [18]:
C_cat = np.asarray(sorted(cats))
C_num = np.asarray(sorted(X.columns[~X.columns.isin(C_cat)]))

In [19]:
def process_dataframe(X):
    """
    Process the input DataFrame 'X':
    1. Set column names as a range from 0 to the number of columns.
    2. Replace boolean values with 1 for True and 0 for False.

    Parameters:
        X (pandas.DataFrame): The input DataFrame.

    Returns:
        pandas.DataFrame: The processed DataFrame.
    """
    # Set column names as a range from 0 to the number of columns
    X.columns = range(X.shape[1])

In [20]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, recall_score, precision_score

# Store the results
results = {}

# Iterate through each cluster
for cluster_label, cluster_data in clusters.items():
    results[cluster_label] = {'accuracy': [], 'auc': [], 'f1': [], 'recall': [], 'F1 Positive': [], 'Precision': []}
    
    # Users in current cluster
    users_in_cluster = cluster_data.index.tolist()
    
    # Create a mask for data instances belonging to this cluster
    cluster_data_mask = groups_df['pcode'].isin(users_in_cluster)
    
    # Iterate through each user in the cluster for LOUO cross-validation
    for user in users_in_cluster:
        
        # Find the indices in groups_df for this user
        user_indices = groups_df[groups_df['pcode'] == user].index.tolist()
        
        # Mask for excluding the current user's data
        train_mask = (cluster_data_mask) & (~groups_df.index.isin(user_indices))
        
        # Split the data for LOUO cross-validation
        X_train = X[train_mask]
        y_train = y[train_mask.values]
        
        X_test = X.loc[user_indices]
        y_test = y[user_indices]
        
        X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.2, random_state=RANDOM_STATE,  stratify=y_train)
        
        if normalize:
            # Normalize numeric features
            scaler = StandardScaler().fit(X_train[C_num])
            X_train[C_num] = scaler.transform(X_train[C_num].copy())
            X_eval[C_num] = scaler.transform(X_eval[C_num].copy())
            X_test[C_num] = scaler.transform(X_test[C_num].copy())

        if select:
            if isinstance(select, SelectFromModel):
                select = [select]
            for i, s in enumerate(select):
                # Fit feature selector only on training data
                s.fit(X_train, y_train)
                selected_features = X_train.columns[s.get_support()].tolist()

                # Apply feature selection to train, eval, and test
                X_train = X_train[selected_features].copy()
                X_eval = X_eval[selected_features].copy()
                X_test = X_test[selected_features].copy()

        if oversample:
            # Determine categorical features for SMOTENC
            if len(C_cat):
                M = np.isin(X_train.columns, C_cat)
                sampler = SMOTENC(categorical_features=M, random_state=RANDOM_STATE)
            else:
                sampler = SMOTE(random_state = RANDOM_STATE)
            # Only oversample training data
            X_train, y_train = sampler.fit_resample(X_train, y_train)
        
        # Train the XGBoost model
        dtrain = xgb.DMatrix(X_train, label=y_train)
        deval = xgb.DMatrix(X_eval, label=y_eval)
        dtest = xgb.DMatrix(X_test, label=y_test)
        
        evallist = [(dtrain, 'train'), (deval, 'eval')]
        bst = xgb.train(param, dtrain, early_stopping_rounds=10, evals=evallist)
        
        y_real = dtest.get_label()  # Use the labels directly
        y_score = bst.predict(dtest,ntree_limit=bst.best_ntree_limit)
        
        y_pred = [1 if score >= 0.5 else 0 for score in y_score]
        
        # Calculate the metrics
        accuracy = accuracy_score(y_real, y_pred)
        auc = roc_auc_score(y_true=y_real, y_score=y_score, average=None)
        f1 = f1_score(y_true=y_real, y_pred=y_pred, pos_label=1, average='macro', zero_division=0)
        recall = recall_score(y_true=y_real, y_pred=y_pred, pos_label=1, average='macro', zero_division=0)
        f1p = f1_score(y_true=y_real, y_pred=y_pred, pos_label=1, average='binary', zero_division=0)
        precision = precision_score(y_true=y_real, y_pred=y_pred, pos_label=1, average='macro', zero_division=0)
        
        # Store the metrics
        results[cluster_label]['accuracy'].append(accuracy)
        results[cluster_label]['auc'].append(auc)
        results[cluster_label]['f1'].append(f1)
        results[cluster_label]['recall'].append(recall)
        results[cluster_label]['F1 Positive'].append(f1p)
        results[cluster_label]['Precision'].append(precision)

# results dictionary will have the metrics for each user in each cluster

[0]	train-auc:0.92807	eval-auc:0.60777
[1]	train-auc:0.93969	eval-auc:0.61953
[2]	train-auc:0.94410	eval-auc:0.61953
[3]	train-auc:0.94504	eval-auc:0.61305
[4]	train-auc:0.94429	eval-auc:0.60566
[5]	train-auc:0.94554	eval-auc:0.61463
[6]	train-auc:0.94634	eval-auc:0.61780
[7]	train-auc:0.94733	eval-auc:0.62149
[8]	train-auc:0.94776	eval-auc:0.62044
[9]	train-auc:0.94754	eval-auc:0.62059




[0]	train-auc:0.89803	eval-auc:0.64808
[1]	train-auc:0.89888	eval-auc:0.64631
[2]	train-auc:0.90081	eval-auc:0.64123
[3]	train-auc:0.90026	eval-auc:0.64385
[4]	train-auc:0.90217	eval-auc:0.64192
[5]	train-auc:0.90210	eval-auc:0.64192
[6]	train-auc:0.90292	eval-auc:0.63992
[7]	train-auc:0.90330	eval-auc:0.63954
[8]	train-auc:0.93668	eval-auc:0.64931
[9]	train-auc:0.93851	eval-auc:0.64823




[0]	train-auc:0.91299	eval-auc:0.59994
[1]	train-auc:0.92058	eval-auc:0.61575
[2]	train-auc:0.91900	eval-auc:0.62076
[3]	train-auc:0.91908	eval-auc:0.62244
[4]	train-auc:0.92329	eval-auc:0.62145
[5]	train-auc:0.92361	eval-auc:0.62114
[6]	train-auc:0.92883	eval-auc:0.62874
[7]	train-auc:0.93306	eval-auc:0.62973
[8]	train-auc:0.93522	eval-auc:0.62958
[9]	train-auc:0.93570	eval-auc:0.62623




[0]	train-auc:0.90348	eval-auc:0.61314
[1]	train-auc:0.90334	eval-auc:0.61538
[2]	train-auc:0.90286	eval-auc:0.62067
[3]	train-auc:0.94539	eval-auc:0.59567
[4]	train-auc:0.95815	eval-auc:0.60721
[5]	train-auc:0.96165	eval-auc:0.60489
[6]	train-auc:0.96605	eval-auc:0.58934
[7]	train-auc:0.96728	eval-auc:0.59303
[8]	train-auc:0.96735	eval-auc:0.59014
[9]	train-auc:0.97022	eval-auc:0.58542




[0]	train-auc:0.89874	eval-auc:0.61584
[1]	train-auc:0.89891	eval-auc:0.62406
[2]	train-auc:0.89863	eval-auc:0.61652
[3]	train-auc:0.89902	eval-auc:0.61471
[4]	train-auc:0.89923	eval-auc:0.60935
[5]	train-auc:0.89897	eval-auc:0.60837
[6]	train-auc:0.92375	eval-auc:0.61531
[7]	train-auc:0.92276	eval-auc:0.61380
[8]	train-auc:0.93076	eval-auc:0.62232
[9]	train-auc:0.93054	eval-auc:0.62383




[0]	train-auc:0.91493	eval-auc:0.60747
[1]	train-auc:0.96868	eval-auc:0.60362
[2]	train-auc:0.96478	eval-auc:0.59275
[3]	train-auc:0.96604	eval-auc:0.58762
[4]	train-auc:0.97088	eval-auc:0.59125
[5]	train-auc:0.97160	eval-auc:0.59125
[6]	train-auc:0.97594	eval-auc:0.60204
[7]	train-auc:0.97527	eval-auc:0.60596
[8]	train-auc:0.97684	eval-auc:0.60385
[9]	train-auc:0.97671	eval-auc:0.60762




[0]	train-auc:0.93249	eval-auc:0.61213
[1]	train-auc:0.94281	eval-auc:0.62076
[2]	train-auc:0.94100	eval-auc:0.62591
[3]	train-auc:0.94239	eval-auc:0.63287
[4]	train-auc:0.94400	eval-auc:0.62985
[5]	train-auc:0.94425	eval-auc:0.63287
[6]	train-auc:0.94562	eval-auc:0.63098
[7]	train-auc:0.95621	eval-auc:0.63181
[8]	train-auc:0.95636	eval-auc:0.63583
[9]	train-auc:0.95802	eval-auc:0.63250




[0]	train-auc:0.92025	eval-auc:0.62340
[1]	train-auc:0.92207	eval-auc:0.64048
[2]	train-auc:0.92472	eval-auc:0.64151
[3]	train-auc:0.93613	eval-auc:0.63558
[4]	train-auc:0.93707	eval-auc:0.63629
[5]	train-auc:0.93829	eval-auc:0.63534
[6]	train-auc:0.93863	eval-auc:0.63139
[7]	train-auc:0.94516	eval-auc:0.63392
[8]	train-auc:0.94631	eval-auc:0.63297
[9]	train-auc:0.94614	eval-auc:0.63218




[0]	train-auc:0.87642	eval-auc:0.61216
[1]	train-auc:0.87581	eval-auc:0.60239
[2]	train-auc:0.89967	eval-auc:0.60239
[3]	train-auc:0.91033	eval-auc:0.60861
[4]	train-auc:0.91362	eval-auc:0.60893
[5]	train-auc:0.91005	eval-auc:0.61232
[6]	train-auc:0.91610	eval-auc:0.61345
[7]	train-auc:0.91785	eval-auc:0.61458
[8]	train-auc:0.95144	eval-auc:0.64236
[9]	train-auc:0.95208	eval-auc:0.64260




[0]	train-auc:0.91462	eval-auc:0.60490
[1]	train-auc:0.91852	eval-auc:0.58983
[2]	train-auc:0.91840	eval-auc:0.60150
[3]	train-auc:0.91665	eval-auc:0.60065
[4]	train-auc:0.91649	eval-auc:0.59709
[5]	train-auc:0.92376	eval-auc:0.59918
[6]	train-auc:0.92368	eval-auc:0.60165
[7]	train-auc:0.92704	eval-auc:0.59833
[8]	train-auc:0.92793	eval-auc:0.60529
[9]	train-auc:0.92938	eval-auc:0.60575




[0]	train-auc:0.93152	eval-auc:0.62259
[1]	train-auc:0.93595	eval-auc:0.63171
[2]	train-auc:0.93508	eval-auc:0.63406
[3]	train-auc:0.93821	eval-auc:0.63406
[4]	train-auc:0.93720	eval-auc:0.63421
[5]	train-auc:0.93716	eval-auc:0.63604
[6]	train-auc:0.93948	eval-auc:0.63733
[7]	train-auc:0.93945	eval-auc:0.63642
[8]	train-auc:0.93908	eval-auc:0.64098
[9]	train-auc:0.94159	eval-auc:0.63763




[0]	train-auc:0.90795	eval-auc:0.58162
[1]	train-auc:0.93696	eval-auc:0.57570
[2]	train-auc:0.93543	eval-auc:0.59257
[3]	train-auc:0.94169	eval-auc:0.60055
[4]	train-auc:0.93991	eval-auc:0.60017
[5]	train-auc:0.95311	eval-auc:0.60663
[6]	train-auc:0.95428	eval-auc:0.59698
[7]	train-auc:0.95721	eval-auc:0.60534
[8]	train-auc:0.95998	eval-auc:0.61848
[9]	train-auc:0.95879	eval-auc:0.62304




[0]	train-auc:0.91850	eval-auc:0.57447
[1]	train-auc:0.91985	eval-auc:0.58076
[2]	train-auc:0.93062	eval-auc:0.55936
[3]	train-auc:0.93033	eval-auc:0.56861
[4]	train-auc:0.93617	eval-auc:0.58908
[5]	train-auc:0.94699	eval-auc:0.58127
[6]	train-auc:0.94899	eval-auc:0.59290
[7]	train-auc:0.95095	eval-auc:0.59265
[8]	train-auc:0.95180	eval-auc:0.59893
[9]	train-auc:0.95386	eval-auc:0.60470




[0]	train-auc:0.92264	eval-auc:0.62146
[1]	train-auc:0.93700	eval-auc:0.64977
[2]	train-auc:0.95436	eval-auc:0.66215
[3]	train-auc:0.95473	eval-auc:0.65462
[4]	train-auc:0.96375	eval-auc:0.66585
[5]	train-auc:0.96252	eval-auc:0.66438
[6]	train-auc:0.96465	eval-auc:0.66923
[7]	train-auc:0.96249	eval-auc:0.65985
[8]	train-auc:0.96448	eval-auc:0.67054
[9]	train-auc:0.96369	eval-auc:0.66415




[0]	train-auc:0.90740	eval-auc:0.59142
[1]	train-auc:0.90767	eval-auc:0.58685
[2]	train-auc:0.90845	eval-auc:0.58535
[3]	train-auc:0.91065	eval-auc:0.58953
[4]	train-auc:0.92609	eval-auc:0.60268
[5]	train-auc:0.93165	eval-auc:0.59748
[6]	train-auc:0.93089	eval-auc:0.59362
[7]	train-auc:0.93538	eval-auc:0.60386
[8]	train-auc:0.93728	eval-auc:0.60748
[9]	train-auc:0.93856	eval-auc:0.60488




[0]	train-auc:0.95509	eval-auc:0.60049
[1]	train-auc:0.95702	eval-auc:0.60082
[2]	train-auc:0.95663	eval-auc:0.60065
[3]	train-auc:0.95745	eval-auc:0.60996
[4]	train-auc:0.96101	eval-auc:0.60580
[5]	train-auc:0.96074	eval-auc:0.61429
[6]	train-auc:0.96165	eval-auc:0.61159
[7]	train-auc:0.96268	eval-auc:0.60988
[8]	train-auc:0.96575	eval-auc:0.61306
[9]	train-auc:0.96723	eval-auc:0.61502




[0]	train-auc:0.89260	eval-auc:0.61790
[1]	train-auc:0.89187	eval-auc:0.60968
[2]	train-auc:0.89363	eval-auc:0.61250
[3]	train-auc:0.91270	eval-auc:0.60782
[4]	train-auc:0.91264	eval-auc:0.60629
[5]	train-auc:0.92310	eval-auc:0.61629
[6]	train-auc:0.92440	eval-auc:0.60855
[7]	train-auc:0.93392	eval-auc:0.61089
[8]	train-auc:0.93628	eval-auc:0.61169
[9]	train-auc:0.93590	eval-auc:0.61315




[0]	train-auc:0.90669	eval-auc:0.57064
[1]	train-auc:0.91096	eval-auc:0.56931
[2]	train-auc:0.91084	eval-auc:0.56829
[3]	train-auc:0.90728	eval-auc:0.56672
[4]	train-auc:0.90877	eval-auc:0.57645
[5]	train-auc:0.93834	eval-auc:0.58061
[6]	train-auc:0.93718	eval-auc:0.57606
[7]	train-auc:0.94040	eval-auc:0.58093
[8]	train-auc:0.94497	eval-auc:0.58218
[9]	train-auc:0.94707	eval-auc:0.58666




[0]	train-auc:0.92803	eval-auc:0.62865
[1]	train-auc:0.93912	eval-auc:0.63578
[2]	train-auc:0.93842	eval-auc:0.63495
[3]	train-auc:0.93949	eval-auc:0.62798
[4]	train-auc:0.96373	eval-auc:0.61596
[5]	train-auc:0.97215	eval-auc:0.60942
[6]	train-auc:0.97534	eval-auc:0.60618
[7]	train-auc:0.97963	eval-auc:0.61845
[8]	train-auc:0.97993	eval-auc:0.60958
[9]	train-auc:0.98398	eval-auc:0.60627




[0]	train-auc:0.89557	eval-auc:0.57833
[1]	train-auc:0.89674	eval-auc:0.59082
[2]	train-auc:0.89645	eval-auc:0.59252
[3]	train-auc:0.89952	eval-auc:0.58386
[4]	train-auc:0.94485	eval-auc:0.58360
[5]	train-auc:0.95135	eval-auc:0.59346
[6]	train-auc:0.95610	eval-auc:0.59618
[7]	train-auc:0.95529	eval-auc:0.59652
[8]	train-auc:0.95705	eval-auc:0.59839
[9]	train-auc:0.96030	eval-auc:0.60263




[0]	train-auc:0.90136	eval-auc:0.61499
[1]	train-auc:0.93155	eval-auc:0.63543
[2]	train-auc:0.93167	eval-auc:0.65404
[3]	train-auc:0.95289	eval-auc:0.67024
[4]	train-auc:0.95399	eval-auc:0.68062
[5]	train-auc:0.96210	eval-auc:0.68062
[6]	train-auc:0.95921	eval-auc:0.68445
[7]	train-auc:0.96347	eval-auc:0.68195
[8]	train-auc:0.96391	eval-auc:0.68445
[9]	train-auc:0.96548	eval-auc:0.68860




[0]	train-auc:0.91225	eval-auc:0.52634
[1]	train-auc:0.92996	eval-auc:0.56517
[2]	train-auc:0.92771	eval-auc:0.54877
[3]	train-auc:0.92974	eval-auc:0.55760
[4]	train-auc:0.93460	eval-auc:0.55913
[5]	train-auc:0.93463	eval-auc:0.55913
[6]	train-auc:0.93583	eval-auc:0.55455
[7]	train-auc:0.93666	eval-auc:0.56389
[8]	train-auc:0.93801	eval-auc:0.56015
[9]	train-auc:0.93881	eval-auc:0.56194




[0]	train-auc:0.90637	eval-auc:0.68175
[1]	train-auc:0.93179	eval-auc:0.68456
[2]	train-auc:0.93178	eval-auc:0.67702
[3]	train-auc:0.94268	eval-auc:0.68456
[4]	train-auc:0.94296	eval-auc:0.68193
[5]	train-auc:0.95062	eval-auc:0.69061
[6]	train-auc:0.94915	eval-auc:0.68658
[7]	train-auc:0.95226	eval-auc:0.69228
[8]	train-auc:0.96067	eval-auc:0.70026
[9]	train-auc:0.96647	eval-auc:0.70114




[0]	train-auc:0.91308	eval-auc:0.62469
[1]	train-auc:0.93939	eval-auc:0.63929
[2]	train-auc:0.93815	eval-auc:0.63871
[3]	train-auc:0.95604	eval-auc:0.64601
[4]	train-auc:0.96827	eval-auc:0.65093
[5]	train-auc:0.96586	eval-auc:0.64486
[6]	train-auc:0.97060	eval-auc:0.65093
[7]	train-auc:0.97161	eval-auc:0.64748
[8]	train-auc:0.97387	eval-auc:0.64871
[9]	train-auc:0.97479	eval-auc:0.64888




[0]	train-auc:0.91843	eval-auc:0.56831
[1]	train-auc:0.92176	eval-auc:0.56433
[2]	train-auc:0.92063	eval-auc:0.57022
[3]	train-auc:0.92826	eval-auc:0.56952
[4]	train-auc:0.92744	eval-auc:0.56848
[5]	train-auc:0.93323	eval-auc:0.57117
[6]	train-auc:0.93368	eval-auc:0.57238
[7]	train-auc:0.93669	eval-auc:0.57247
[8]	train-auc:0.93752	eval-auc:0.56537
[9]	train-auc:0.93994	eval-auc:0.56926




[0]	train-auc:0.93134	eval-auc:0.68541
[1]	train-auc:0.93977	eval-auc:0.69657
[2]	train-auc:0.96775	eval-auc:0.70510
[3]	train-auc:0.96499	eval-auc:0.69630
[4]	train-auc:0.96934	eval-auc:0.69693
[5]	train-auc:0.98201	eval-auc:0.71163
[6]	train-auc:0.98298	eval-auc:0.70056
[7]	train-auc:0.98684	eval-auc:0.71145
[8]	train-auc:0.98498	eval-auc:0.71798
[9]	train-auc:0.98456	eval-auc:0.71290




[0]	train-auc:0.90578	eval-auc:0.62475
[1]	train-auc:0.91558	eval-auc:0.60921
[2]	train-auc:0.91620	eval-auc:0.60590
[3]	train-auc:0.94782	eval-auc:0.62087
[4]	train-auc:0.94624	eval-auc:0.62103
[5]	train-auc:0.95243	eval-auc:0.62533
[6]	train-auc:0.95232	eval-auc:0.62401
[7]	train-auc:0.95501	eval-auc:0.62525
[8]	train-auc:0.95408	eval-auc:0.62748
[9]	train-auc:0.95597	eval-auc:0.62550




[0]	train-auc:0.92146	eval-auc:0.51687
[1]	train-auc:0.94109	eval-auc:0.52839
[2]	train-auc:0.95650	eval-auc:0.56071
[3]	train-auc:0.95898	eval-auc:0.57000
[4]	train-auc:0.95850	eval-auc:0.57536
[5]	train-auc:0.97135	eval-auc:0.59348
[6]	train-auc:0.97498	eval-auc:0.59473
[7]	train-auc:0.97823	eval-auc:0.59714
[8]	train-auc:0.97788	eval-auc:0.59286
[9]	train-auc:0.97933	eval-auc:0.59795




[0]	train-auc:0.91676	eval-auc:0.62331
[1]	train-auc:0.94045	eval-auc:0.62652
[2]	train-auc:0.93723	eval-auc:0.62939
[3]	train-auc:0.94045	eval-auc:0.62525
[4]	train-auc:0.93759	eval-auc:0.62053
[5]	train-auc:0.94188	eval-auc:0.65655
[6]	train-auc:0.94608	eval-auc:0.66135
[7]	train-auc:0.94879	eval-auc:0.65857
[8]	train-auc:0.94862	eval-auc:0.66211
[9]	train-auc:0.95473	eval-auc:0.64921




[0]	train-auc:0.91258	eval-auc:0.61670
[1]	train-auc:0.91565	eval-auc:0.60655
[2]	train-auc:0.94749	eval-auc:0.61653
[3]	train-auc:0.94455	eval-auc:0.63304
[4]	train-auc:0.96949	eval-auc:0.64474
[5]	train-auc:0.96633	eval-auc:0.63880
[6]	train-auc:0.97558	eval-auc:0.63605
[7]	train-auc:0.97740	eval-auc:0.62934
[8]	train-auc:0.98068	eval-auc:0.64585
[9]	train-auc:0.97951	eval-auc:0.64198




[0]	train-auc:0.91884	eval-auc:0.55655
[1]	train-auc:0.92515	eval-auc:0.56176
[2]	train-auc:0.92427	eval-auc:0.56019
[3]	train-auc:0.92973	eval-auc:0.55820
[4]	train-auc:0.93856	eval-auc:0.56159
[5]	train-auc:0.93946	eval-auc:0.55952
[6]	train-auc:0.94020	eval-auc:0.56217
[7]	train-auc:0.94142	eval-auc:0.56523
[8]	train-auc:0.94140	eval-auc:0.57118
[9]	train-auc:0.94114	eval-auc:0.57102




[0]	train-auc:0.91321	eval-auc:0.57285
[1]	train-auc:0.95434	eval-auc:0.56792
[2]	train-auc:0.95015	eval-auc:0.57710
[3]	train-auc:0.97253	eval-auc:0.55966
[4]	train-auc:0.97685	eval-auc:0.55215
[5]	train-auc:0.98474	eval-auc:0.56893
[6]	train-auc:0.98720	eval-auc:0.58428
[7]	train-auc:0.98859	eval-auc:0.58595
[8]	train-auc:0.98884	eval-auc:0.59563
[9]	train-auc:0.98910	eval-auc:0.60280




[0]	train-auc:0.92661	eval-auc:0.51145
[1]	train-auc:0.94525	eval-auc:0.52089
[2]	train-auc:0.94775	eval-auc:0.51947
[3]	train-auc:0.94770	eval-auc:0.51696
[4]	train-auc:0.94717	eval-auc:0.51445
[5]	train-auc:0.95668	eval-auc:0.52439
[6]	train-auc:0.95617	eval-auc:0.52506
[7]	train-auc:0.95861	eval-auc:0.52974
[8]	train-auc:0.96161	eval-auc:0.52832
[9]	train-auc:0.96268	eval-auc:0.53241




[0]	train-auc:0.91519	eval-auc:0.64567
[1]	train-auc:0.91522	eval-auc:0.63847
[2]	train-auc:0.91540	eval-auc:0.62872
[3]	train-auc:0.91622	eval-auc:0.62568
[4]	train-auc:0.92154	eval-auc:0.63072
[5]	train-auc:0.92254	eval-auc:0.62944
[6]	train-auc:0.92698	eval-auc:0.63080
[7]	train-auc:0.92829	eval-auc:0.63032
[8]	train-auc:0.92756	eval-auc:0.62976
[9]	train-auc:0.93129	eval-auc:0.63200




[0]	train-auc:0.90363	eval-auc:0.66633
[1]	train-auc:0.95583	eval-auc:0.69109
[2]	train-auc:0.95793	eval-auc:0.69025
[3]	train-auc:0.96105	eval-auc:0.68399
[4]	train-auc:0.95881	eval-auc:0.68704
[5]	train-auc:0.95814	eval-auc:0.69008
[6]	train-auc:0.96225	eval-auc:0.69304
[7]	train-auc:0.96329	eval-auc:0.69084
[8]	train-auc:0.96762	eval-auc:0.70960
[9]	train-auc:0.96944	eval-auc:0.70850




[0]	train-auc:0.92603	eval-auc:0.55911
[1]	train-auc:0.93154	eval-auc:0.56430
[2]	train-auc:0.93923	eval-auc:0.56266
[3]	train-auc:0.93849	eval-auc:0.56029
[4]	train-auc:0.94391	eval-auc:0.56585
[5]	train-auc:0.94323	eval-auc:0.56648
[6]	train-auc:0.94375	eval-auc:0.56794
[7]	train-auc:0.94361	eval-auc:0.57031
[8]	train-auc:0.94359	eval-auc:0.56794
[9]	train-auc:0.95032	eval-auc:0.56302




[0]	train-auc:0.91837	eval-auc:0.60773
[1]	train-auc:0.92230	eval-auc:0.60941
[2]	train-auc:0.93886	eval-auc:0.61773
[3]	train-auc:0.94177	eval-auc:0.61176
[4]	train-auc:0.94259	eval-auc:0.61597
[5]	train-auc:0.95143	eval-auc:0.62252
[6]	train-auc:0.95335	eval-auc:0.62647
[7]	train-auc:0.96135	eval-auc:0.62849
[8]	train-auc:0.96308	eval-auc:0.63605
[9]	train-auc:0.96437	eval-auc:0.62756




[0]	train-auc:0.89004	eval-auc:0.65057
[1]	train-auc:0.88960	eval-auc:0.63466
[2]	train-auc:0.88955	eval-auc:0.63835
[3]	train-auc:0.90941	eval-auc:0.65142
[4]	train-auc:0.92576	eval-auc:0.62112
[5]	train-auc:0.92761	eval-auc:0.62955
[6]	train-auc:0.93345	eval-auc:0.61856
[7]	train-auc:0.93334	eval-auc:0.62027
[8]	train-auc:0.93708	eval-auc:0.62169
[9]	train-auc:0.93429	eval-auc:0.62131




[0]	train-auc:0.90556	eval-auc:0.57577
[1]	train-auc:0.90522	eval-auc:0.56627
[2]	train-auc:0.90415	eval-auc:0.56609
[3]	train-auc:0.90654	eval-auc:0.57362
[4]	train-auc:0.90534	eval-auc:0.56618
[5]	train-auc:0.90430	eval-auc:0.56824
[6]	train-auc:0.90743	eval-auc:0.57039
[7]	train-auc:0.91906	eval-auc:0.57676
[8]	train-auc:0.92095	eval-auc:0.57891
[9]	train-auc:0.92396	eval-auc:0.58025




[0]	train-auc:0.88583	eval-auc:0.60697
[1]	train-auc:0.88558	eval-auc:0.62095
[2]	train-auc:0.91773	eval-auc:0.63064
[3]	train-auc:0.92108	eval-auc:0.64805
[4]	train-auc:0.93138	eval-auc:0.64659
[5]	train-auc:0.93203	eval-auc:0.64522
[6]	train-auc:0.93239	eval-auc:0.64985
[7]	train-auc:0.94987	eval-auc:0.66838
[8]	train-auc:0.95480	eval-auc:0.67422
[9]	train-auc:0.96612	eval-auc:0.67482




[0]	train-auc:0.86780	eval-auc:0.59701
[1]	train-auc:0.87110	eval-auc:0.58835
[2]	train-auc:0.87242	eval-auc:0.58835
[3]	train-auc:0.89565	eval-auc:0.57180
[4]	train-auc:0.93961	eval-auc:0.61408
[5]	train-auc:0.93470	eval-auc:0.61117
[6]	train-auc:0.95146	eval-auc:0.62884
[7]	train-auc:0.95348	eval-auc:0.61932
[8]	train-auc:0.96097	eval-auc:0.63158
[9]	train-auc:0.95817	eval-auc:0.62112




[0]	train-auc:0.92130	eval-auc:0.60521
[1]	train-auc:0.92122	eval-auc:0.61466
[2]	train-auc:0.92082	eval-auc:0.61522
[3]	train-auc:0.92070	eval-auc:0.61448
[4]	train-auc:0.93368	eval-auc:0.62291
[5]	train-auc:0.94083	eval-auc:0.62959
[6]	train-auc:0.94418	eval-auc:0.63061
[7]	train-auc:0.94630	eval-auc:0.62838
[8]	train-auc:0.95020	eval-auc:0.62152
[9]	train-auc:0.95482	eval-auc:0.62709




[0]	train-auc:0.92522	eval-auc:0.60023
[1]	train-auc:0.92925	eval-auc:0.61307
[2]	train-auc:0.93854	eval-auc:0.61807
[3]	train-auc:0.95804	eval-auc:0.63822
[4]	train-auc:0.96562	eval-auc:0.66025
[5]	train-auc:0.96612	eval-auc:0.67558
[6]	train-auc:0.96606	eval-auc:0.67434
[7]	train-auc:0.96603	eval-auc:0.67853
[8]	train-auc:0.96584	eval-auc:0.68272
[9]	train-auc:0.96517	eval-auc:0.68486




[0]	train-auc:0.89546	eval-auc:0.52738
[1]	train-auc:0.94812	eval-auc:0.49380
[2]	train-auc:0.94384	eval-auc:0.49776
[3]	train-auc:0.94103	eval-auc:0.49742
[4]	train-auc:0.94642	eval-auc:0.49957
[5]	train-auc:0.94416	eval-auc:0.50439
[6]	train-auc:0.94749	eval-auc:0.50887
[7]	train-auc:0.94566	eval-auc:0.50835
[8]	train-auc:0.94879	eval-auc:0.50818
[9]	train-auc:0.94851	eval-auc:0.51205




[0]	train-auc:0.91437	eval-auc:0.62802
[1]	train-auc:0.92177	eval-auc:0.63562
[2]	train-auc:0.92087	eval-auc:0.64954
[3]	train-auc:0.92235	eval-auc:0.64716
[4]	train-auc:0.92506	eval-auc:0.64258
[5]	train-auc:0.92624	eval-auc:0.64148
[6]	train-auc:0.92642	eval-auc:0.63645
[7]	train-auc:0.92736	eval-auc:0.63297
[8]	train-auc:0.93772	eval-auc:0.66264
[9]	train-auc:0.95145	eval-auc:0.66163




[0]	train-auc:0.92199	eval-auc:0.64739
[1]	train-auc:0.92504	eval-auc:0.63202
[2]	train-auc:0.92660	eval-auc:0.61469
[3]	train-auc:0.92885	eval-auc:0.61594
[4]	train-auc:0.92541	eval-auc:0.61247
[5]	train-auc:0.92562	eval-auc:0.62020
[6]	train-auc:0.92522	eval-auc:0.61469
[7]	train-auc:0.92589	eval-auc:0.62056
[8]	train-auc:0.92666	eval-auc:0.61549
[9]	train-auc:0.92669	eval-auc:0.61532




[0]	train-auc:0.92999	eval-auc:0.63649
[1]	train-auc:0.93685	eval-auc:0.62397
[2]	train-auc:0.93794	eval-auc:0.62853
[3]	train-auc:0.94821	eval-auc:0.66038
[4]	train-auc:0.94480	eval-auc:0.65501
[5]	train-auc:0.95334	eval-auc:0.65832
[6]	train-auc:0.95348	eval-auc:0.66225
[7]	train-auc:0.95577	eval-auc:0.65653
[8]	train-auc:0.95333	eval-auc:0.66404
[9]	train-auc:0.95557	eval-auc:0.65921




In [21]:
# Store average results for each cluster
average_results_per_cluster = {}

for cluster_label, metrics in results.items():
    average_results_per_cluster[cluster_label] = {
        'avg_accuracy': sum(metrics['accuracy']) / len(metrics['accuracy']),
        'avg_auc': sum(metrics['auc']) / len(metrics['auc']),
        'avg_f1': sum(metrics['f1']) / len(metrics['f1']),
        'avg_recall': sum(metrics['recall']) / len(metrics['recall']),
        'avg_fp1':sum(metrics['F1 Positive']) / len(metrics['F1 Positive']),
        'avg_pre':sum(metrics['Precision']) / len(metrics['Precision'])
    }

# Calculate overall averages
all_accuracies = [avg['avg_accuracy'] for cluster, avg in average_results_per_cluster.items()]
all_aucs = [avg['avg_auc'] for cluster, avg in average_results_per_cluster.items()]
all_f1s = [avg['avg_f1'] for cluster, avg in average_results_per_cluster.items()]
all_recalls = [avg['avg_recall'] for cluster, avg in average_results_per_cluster.items()]
all_fp1 = [avg['avg_fp1'] for cluster, avg in average_results_per_cluster.items()]
all_pre = [avg['avg_pre'] for cluster, avg in average_results_per_cluster.items()]

print("\n")

overall_avg_results = {
    'overall_avg_accuracy': sum(all_accuracies) / len(all_accuracies),
    'overall_avg_auc': sum(all_aucs) / len(all_aucs),
    'overall_avg_f1': sum(all_f1s) / len(all_f1s),
    'overall_avg_recall': sum(all_recalls) / len(all_recalls),
    'overall_avg_f1p' : sum(all_fp1) / len(all_fp1),
    'overall_avg_pre' : sum(all_pre) / len(all_pre)
}

print(average_results_per_cluster)
print("\n")
print(overall_avg_results)



{2: {'avg_accuracy': 0.5376149983176912, 'avg_auc': 0.4920580409184682, 'avg_f1': 0.4449902139968375, 'avg_recall': 0.47407937083956386, 'avg_fp1': 0.28406282380072856, 'avg_pre': 0.4832865550219083}, 0: {'avg_accuracy': 0.5133018708256417, 'avg_auc': 0.5225880985292437, 'avg_f1': 0.4702400151212414, 'avg_recall': 0.5052543796483762, 'avg_fp1': 0.3514465558802181, 'avg_pre': 0.5043785445651633}, 1: {'avg_accuracy': 0.5116192355054808, 'avg_auc': 0.5292789876602001, 'avg_f1': 0.4572959452140577, 'avg_recall': 0.5211456474120524, 'avg_fp1': 0.38153066769925287, 'avg_pre': 0.5189874723783882}}


{'overall_avg_accuracy': 0.5208453682162713, 'overall_avg_auc': 0.5146417090359706, 'overall_avg_f1': 0.45750872477737886, 'overall_avg_recall': 0.5001597992999974, 'overall_avg_f1p': 0.33901334912673314, 'overall_avg_pre': 0.5022175239884866}
