# Import Prerequisite Libraries

In [28]:
from Funcs.Setup import *
import numpy as np
import pandas as pd

# import statsmodels.api as sm
from sklearn.feature_selection import VarianceThreshold
# from ISLP import confusion_table
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, LeaveOneOut
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier 
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV, KFold, StratifiedGroupKFold, StratifiedKFold
from sklearn.feature_selection import SelectFromModel
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, roc_auc_score, f1_score)
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from datetime import timedelta as td
from sklearn.model_selection import GroupKFold
from imblearn.over_sampling import SMOTENC, SMOTE
from sklearn.impute import KNNImputer
import warnings
from tqdm import tqdm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LassoCV
import shap
import matplotlib.pyplot as plt
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
import optuna
from collections import Counter
from sklearn.metrics import balanced_accuracy_score
warnings.filterwarnings("ignore")


# Load Dataset

In [29]:
RANDOM_STATE = 42

In [30]:
def user_demographic(demo):
    demo = pd.get_dummies(demo, columns=['Gender'])
    selected_columns = ['UID', 'Extroversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness', 'PHQ-9', 'GAD-7', 'PSS', 'GHQ', 'Gender_F', 'Gender_M']
    demo = demo[selected_columns]
    demo = demo.rename(columns={'UID': 'uid'})  # rename UID to uid
    return demo

In [31]:
def get_part_of_day(hour):
    if 5 <= hour < 12:
        return 1 #Morning
    elif 12 <= hour < 17:
        return 2 #Afternoon
    elif 17 <= hour < 21:
        return 3 #Evening
    else:
        return 4 #Night

In [None]:
def load_aqara_data_updated(file_name, hour, yesterday):
    aqara_data = pd.read_csv(file_name, index_col=None)
            
    suffix = f'_{hour*60}min'

    filtered_columns = [col for col in aqara_data.columns if col.endswith(suffix)]
     
    # Select the filtered columns along with any essential columns like 'uid', 'startTime'
    essential_columns = ['uid', 'startTime']
    aqara_features = aqara_data[essential_columns + filtered_columns]

    if yesterday == True:
        # 각 행에서 '[None]' 인 열이 하나라도 있으면, 그 행 제거
        aqara_features = aqara_features.replace(np.nan, 0)
        aqara_features = aqara_features.replace('[None]', np.nan)

    # Identify aqara columns only (after filtering by hour)
    aqara_columns_only = [col for col in aqara_features.columns if col.startswith('aqara_')]

    for col in aqara_columns_only:
        # Convert the column to numeric, setting errors='coerce' will turn invalid parsing into NaN
        aqara_features[col] = pd.to_numeric(aqara_features[col], errors='coerce')
        
        # Calculate the mean per uid
        mean_per_uid = aqara_features.groupby('uid')[col].transform('mean')
        
        # Calculate variance and comparison
        aqara_features[f'{col}_deviation'] = aqara_features[col] - mean_per_uid
        aqara_features[f'{col}_comparison'] = (aqara_features[col] > mean_per_uid).astype(int) - (aqara_features[col] < mean_per_uid).astype(int)

    # Calculate routines based on filtered columns
    if yesterday == False:
        fridge_columns = [col for col in aqara_columns_only if 'fridge_ImmediatePast' in col]
        microwave_columns = [col for col in aqara_columns_only if 'microwave_ImmediatePast' in col]

        cleaner_columns = [col for col in aqara_columns_only if 'cleaner_ImmediatePast' in col]
        washer_columns = [col for col in aqara_columns_only if 'washer_ImmediatePast' in col]
    
        if fridge_columns and microwave_columns:
            aqara_features[f'aqara_eating_routine_immediatePast_{hour*60}min'] = (
                (aqara_features[fridge_columns[0]] >= 1) | 
                (aqara_features[microwave_columns[0]] >= 1)
            ).astype(int)
            
        if cleaner_columns and washer_columns:
            aqara_features[f'aqara_chores_routine_immediatePast_{hour*60}min'] = (
                (aqara_features[cleaner_columns[0]] >= 1) | 
                (aqara_features[washer_columns[0]] >= 1)
            ).astype(int)

    else:
        fridge_columns = [col for col in aqara_columns_only if 'fridge_yesterday' in col and 'mean' in col]
        microwave_columns = [col for col in aqara_columns_only if 'microwave_yesterday' in col and 'mean' in col]

        cleaner_columns = [col for col in aqara_columns_only if 'cleaner_yesterday' in col and 'mean' in col]
        washer_columns = [col for col in aqara_columns_only if 'washer_yesterday' in col and 'mean' in col]

        # 공통 시간대 접미사를 찾고 새로운 열을 생성
        for fridge_col in fridge_columns:
            for microwave_col in microwave_columns:
                # 공통 시간대 접미사를 확인
                if fridge_col.split('fridge_yesterday_')[1] == microwave_col.split('microwave_yesterday_')[1]:
                    common_suffix = fridge_col.split('fridge_yesterday_')[1]  # 공통 접미사 추출
                    new_col_name = f'aqara_eating_routine_yesterday_{common_suffix}'

                    # 새로운 열을 생성하고 값을 설정
                    aqara_features[new_col_name] = (
                        (aqara_features[fridge_col] >= 1) | 
                        (aqara_features[microwave_col] >= 1)
                    ).astype(int)

        # 공통 시간대 접미사를 찾고 새로운 열을 생성 (chores routine)
        for cleaner_col in cleaner_columns:
            for washer_col in washer_columns:
                # 공통 시간대 접미사를 확인
                if cleaner_col.split('cleaner_yesterday_')[1] == washer_col.split('washer_yesterday_')[1]:
                    common_suffix = cleaner_col.split('cleaner_yesterday_')[1]  # 공통 접미사 추출
                    new_col_name = f'aqara_chores_routine_yesterday_{common_suffix}'

                    # 새로운 열을 생성하고 값을 설정
                    aqara_features[new_col_name] = (
                        (aqara_features[cleaner_col] >= 1) | 
                        (aqara_features[washer_col] >= 1)
                    ).astype(int)


    return aqara_features


In [None]:
def load_smartphone_data(file_name):
    smartphone = load(file_name)
    smartphone_feature, _, uids, dates = smartphone

    smartphone_feature = smartphone_feature.apply(lambda x: x.astype(int) if x.dtype == 'bool' else x.astype('float64'), axis=0)

    df_smartphone = pd.concat([smartphone_feature, pd.DataFrame(uids.tolist(), columns=['uid']), pd.DataFrame(dates.tolist(), columns=['timestamp'])], axis=1)
    df_smartphone['uid'] = df_smartphone['uid'].str.replace('P', '').astype('int64')
    df_smartphone['hour'] = df_smartphone['timestamp'].dt.hour
    df_smartphone['part_of_day'] = df_smartphone['hour'].apply(get_part_of_day)
    df_smartphone['timestamp'] = df_smartphone['timestamp'].apply(to_unix_timestamp)
        

    # df_smartphone 의 LOC_LABEL#RLV_SUP=eating#ImmediatePast_60,LOC_LABEL#RLV_SUP=home#ImmediatePast_60,LOC_LABEL#RLV_SUP=work#ImmediatePast_60,LOC_LABEL#RLV_SUP=social#ImmediatePast_60,LOC_LABEL#RLV_SUP=others#ImmediatePast_60 열 중 LOC_LABEL#RLV_SUP=eating#ImmediatePast_60 이 가장 큰 값을 가지는 경우 1, 아니면 0
    df_smartphone['eating_routine_immediatePast_60'] = (
        df_smartphone['LOC_LABEL#RLV_SUP=eating#ImmediatePast_60'] == df_smartphone[
            [
                'LOC_LABEL#RLV_SUP=eating#ImmediatePast_60', 
                'LOC_LABEL#RLV_SUP=home#ImmediatePast_60', 
                'LOC_LABEL#RLV_SUP=work#ImmediatePast_60', 
                'LOC_LABEL#RLV_SUP=social#ImmediatePast_60', 
                'LOC_LABEL#RLV_SUP=others#ImmediatePast_60'
            ]
        ].max(axis=1)
        ).astype(int)

    df_smartphone['eating_routine_YesterdayDawn'] = (
        df_smartphone['LOC_LABEL#RLV_SUP=eating#YesterdayDawn'] == df_smartphone[
            [
                'LOC_LABEL#RLV_SUP=eating#YesterdayDawn', 
                'LOC_LABEL#RLV_SUP=home#YesterdayDawn', 
                'LOC_LABEL#RLV_SUP=work#YesterdayDawn', 
                'LOC_LABEL#RLV_SUP=social#YesterdayDawn', 
                'LOC_LABEL#RLV_SUP=others#YesterdayDawn'
            ]
        ].max(axis=1)
        ).astype(int)

    df_smartphone['eating_routine_YesterdayMorning'] = (
        df_smartphone['LOC_LABEL#RLV_SUP=eating#YesterdayMorning'] == df_smartphone[
            [
                'LOC_LABEL#RLV_SUP=eating#YesterdayMorning', 
                'LOC_LABEL#RLV_SUP=home#YesterdayMorning', 
                'LOC_LABEL#RLV_SUP=work#YesterdayMorning', 
                'LOC_LABEL#RLV_SUP=social#YesterdayMorning', 
                'LOC_LABEL#RLV_SUP=others#YesterdayMorning'
            ]
        ].max(axis=1)
        ).astype(int)

    df_smartphone['eating_routine_YesterdayAfternoon'] = (
        df_smartphone['LOC_LABEL#RLV_SUP=eating#YesterdayAfternoon'] == df_smartphone[
            [
                'LOC_LABEL#RLV_SUP=eating#YesterdayAfternoon', 
                'LOC_LABEL#RLV_SUP=home#YesterdayAfternoon', 
                'LOC_LABEL#RLV_SUP=work#YesterdayAfternoon', 
                'LOC_LABEL#RLV_SUP=social#YesterdayAfternoon', 
                'LOC_LABEL#RLV_SUP=others#YesterdayAfternoon'
            ]
        ].max(axis=1)
        ).astype(int)
    

    df_smartphone['eating_routine_YesterdayEvening'] = (
        df_smartphone['LOC_LABEL#RLV_SUP=eating#YesterdayEvening'] == df_smartphone[
            [
                'LOC_LABEL#RLV_SUP=eating#YesterdayEvening', 
                'LOC_LABEL#RLV_SUP=home#YesterdayEvening', 
                'LOC_LABEL#RLV_SUP=work#YesterdayEvening', 
                'LOC_LABEL#RLV_SUP=social#YesterdayEvening', 
                'LOC_LABEL#RLV_SUP=others#YesterdayEvening'
            ]
        ].max(axis=1)
        ).astype(int)
    
    df_smartphone['eating_routine_YesterdayNight'] = (
        df_smartphone['LOC_LABEL#RLV_SUP=eating#YesterdayNight'] == df_smartphone[
            [
                'LOC_LABEL#RLV_SUP=eating#YesterdayNight', 
                'LOC_LABEL#RLV_SUP=home#YesterdayNight', 
                'LOC_LABEL#RLV_SUP=work#YesterdayNight', 
                'LOC_LABEL#RLV_SUP=social#YesterdayNight', 
                'LOC_LABEL#RLV_SUP=others#YesterdayNight'
            ]
        ].max(axis=1)
        ).astype(int)
    
    # Today Epoch feature removed
    df_smartphone = df_smartphone[df_smartphone.columns.drop(list(df_smartphone.filter(regex='_Today')))]

    return df_smartphone

In [34]:
def load_label_data(file_name):

    label = pd.read_csv(file_name, index_col=None)
    label['phq4_result'] = label['phq2_result'] + label['gad2_result']
    df_label = binarize_by_range(label)

    return label

In [35]:
df_label = load_label_data('FEATURES/label_2023.csv')
df_audio = pd.read_pickle('FEATURES/librosa_modified.pkl')
df_speech = pd.read_csv('FEATURES/speech_data_2023.csv', index_col=None) 
df_bluSensor = pd.read_csv('FEATURES/bluSensor_features_15min.csv', index_col=None)
df_fitbit = pd.read_csv('FEATURES/fitbit_features_24h.csv', index_col=None)
# df_aqara = load_aqara_data('FEATURES/aqara_features_12h_24h_without_env.csv')
df_aqara_before = load_aqara_data_updated('FEATURES/aqara_before_1h_3h_6h_12h_and.csv', 1, yesterday=False)
df_aqara_yesterday = load_aqara_data_updated('FEATURES/aqara_yesterday_12h.csv', 12, yesterday=True)

df_withings = pd.read_csv('FEATURES/withings_features_24h.csv', index_col=None)
df_pre_survey = pd.read_csv('FEATURES/user_demographics_pre_test_2023.csv', index_col=None)
df_smartphone = load_smartphone_data('FEATURES/smartphone_features_60min_yesterday_today_impute_median.pkl')
df_demo = user_demographic(df_pre_survey)

df_label.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_audio.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_aqara_before.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_aqara_yesterday.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_withings.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_fitbit.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_demo.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_speech.rename(columns={'startTime': 'timestamp'}, inplace=True)
df_bluSensor.rename(columns={'startTime': 'timestamp'}, inplace=True)




In [36]:
len(df_label)

1619

# Use Data Combination

In [None]:
data_sources = [
    (df_smartphone, ['uid', 'timestamp'], None, None),   # Smartphone 데이터
    (df_audio, ['uid', 'timestamp'], lambda df: df['duration'] > 0, None), # Audio 데이터, 조건 적용
    (df_aqara_before, ['uid', 'timestamp'], None, 0),       # aqara 센서 데이터
    (df_aqara_yesterday, ['uid', 'timestamp'], None, 0),       # aqara 센서 데이터
    (df_withings, ['uid', 'timestamp'], None, 0),    # withings 센서 데이터
    (df_fitbit, ['uid', 'timestamp'], None, 0),   # Fitbit 데이터
    # (df_demo, ['uid'], None, None),                  # 데모그래픽 데이터
    (df_speech, ['uid', 'timestamp'], None, None),
    (df_bluSensor, ['uid', 'timestamp'], None, None), # 블루센서 데이터 
]

In [38]:
def data_sources_combinations(base_df, data_sources):
    merged_df = base_df.copy()
    for source_df, on_columns, filter_condition, fill_na in data_sources:
        merged_df = pd.merge(merged_df, source_df, on=on_columns, how='left')
       
        if filter_condition is not None:
            merged_df = merged_df.loc[filter_condition(merged_df)]

        if fill_na is not None:
            merged_df.fillna(fill_na, inplace=True)

    return merged_df

In [39]:
# Drop the user with skewed label distribution
# df_label = df_label.groupby('uid').filter(lambda x: (x[label].nunique() > 1))
base_df = df_label.copy()

merged_df = data_sources_combinations(base_df, data_sources)
merged_df.columns = merged_df.columns.map(str)

unnamed_columns = [col for col in merged_df.columns if 'Unnamed' in col]
merged_df = merged_df.drop(columns=unnamed_columns)
merged_df = merged_df.set_index('timestamp').sort_index()

# print(merged_df.shape)
# rows_with_nan = merged_df[merged_df.isna().any(axis=1)]
# print(rows_with_nan.shape)

In [40]:
# Unnamed가 포함된 컬럼 이름이 있는지 확인하는 코드
merged_df.columns = merged_df.columns.map(str)
unnamed_columns = [col for col in merged_df.columns if 'Unnamed' in col]

if unnamed_columns:
    print(f"Unnamed가 포함된 컬럼 이름: {unnamed_columns}")
else:
    print("Unnamed가 포함된 컬럼 이름이 없습니다.")

Unnamed가 포함된 컬럼 이름이 없습니다.


In [None]:
### ESM에서 음성 데이터까지 완벽하게 있는 데이터만 필터링 
# merged_df: 음성 제외 원하는 source merged_df
merged_df = pd.merge(merged_df, df_audio[df_audio['duration'] > 0], on=['uid', 'timestamp'], how='inner')
all_merged_dropna_df = merged_df.dropna()
merged_df = merged_df.iloc[:, :-182]

In [44]:
# categorical data 컬럼 개수 출력
categorical_data = merged_df.select_dtypes(include=['object'])
print(f"Categorical data 컬럼 개수: {len(categorical_data.columns)}")

# numerical data 컬럼 개수 출력
numerical_data = merged_df.select_dtypes(include=['float64', 'int'])
print(f"Numerical data 컬럼 개수: {len(numerical_data.columns)}")

Categorical data 컬럼 개수: 0
Numerical data 컬럼 개수: 2925


In [45]:
# Missing value 개수 확인
missing_values = merged_df.isnull().sum().sum()
print(f"Missing values: {missing_values}")

Missing values: 30349


In [46]:
len(merged_df)

1157

# Classification Models

In [47]:
def check_single_class_groups(df, label):
    single_class_groups = []
    grouped = df.groupby('uid')

    for group_id, group in grouped:
        if len(group[label].unique()) == 1:
            single_class_groups.append(group_id)
    
    return single_class_groups



In [48]:
def data_splitting(df, label):

    to_drop = ['phq4_result', 'phq4_result_binary', 'phq2_result', 'gad2_result', 'stress_result', 'posNeg_result', 'arousal_result', 'phq2_result_binary', 'gad2_result_binary', 'stress_result_binary', 'posNeg_result_binary', 'arousal_result_binary']

    # define X and y dataset
    X = df.drop(columns=to_drop + ['uid'])
    y = df[label].values
    UIDS = df['uid'].values

    
    return X, y, UIDS

In [49]:
def remove_user_with_skewed_label(data, label):
    # 'phq_result'가 0인 비율 계산
    label_ratios = data[data[label] == 0].groupby('uid').size() / data.groupby('uid').size()
    label_ratios = label_ratios.fillna(0)

    users_to_remove = label_ratios[(label_ratios < 0.1) | (label_ratios > 0.9)].index
    # print(label_ratios)  

    return users_to_remove

In [50]:
def select_features_lasso(data, target):
    """
    Performs feature selection using LASSO regression.
    
    Args:
        data (pd.DataFrame): The input dataset with features.
        target (pd.Series): The target variable.
        
    Returns:
        pd.DataFrame: The dataset with features selected by LASSO.
    """
    lasso = LassoCV(cv=5, random_state=42, alphas=[0.001, 0.01, 0.1, 1, 10]).fit(data, target)
    selected_features = data.columns[lasso.coef_ != 0]
    return selected_features


In [51]:
# 소수 클래스의 크기를 가져오는 함수
def get_minority_class_size(y):
    counter = Counter(y)
    return min(counter.values())

In [None]:
def train_ml(data, label, model_instance, zero, high_pariwise, lasso, scaling, cv_method='loso', oversample=False, sampling_method=None, correlation_threshold=None):
    """
    Classify data using a given pre-instantiated model with a Leave-One-Subject-Out (LOSO) cross-validation approach
    on the complete data of 5 weeks.

    Args:
    - data: The dataset to classify, expected to span exactly 5 weeks.
    - model_instance: A pre-instantiated model ready for training.
    - zero: Indicates whether to remove features with zero variance.
    - high_pariwise: Indicates whether to remove highly correlated features.
    - oversample: Indicates whether to apply oversampling to the training data.
    - sampling_method: The strategy to use for oversampling if enabled.
    - correlation_threshold: Threshold for removing highly correlated features.
    """

    #interpolate to fill missing values as much as possible
    # data = data.groupby('uid').apply(lambda group: group.interpolate())

    X, y, groups = data_splitting(data, label)
    num_users = np.unique(groups).size
    y = pd.Series(y).astype(float)

 
    # Select cross-validation method
    if cv_method == 'loso':
        cv = LeaveOneGroupOut().split(X, y, groups=groups)
    elif cv_method == 'kfold':
        cv = KFold(n_splits=10, shuffle=False, random_state=RANDOM_STATE).split(X, y)
    elif cv_method == 'groupkfold':
        cv = StratifiedGroupKFold(n_splits=5, shuffle=False, random_state=RANDOM_STATE).split(X, y, groups)
    else:         
        raise ValueError("cv_method must be 'louo' or 'kfold'")

    res = []
    feature_importances_list = []

    for train_index, test_index in cv:
    # for train_index, test_index in tqdm(logo.split(X, y, groups), total=logo.get_n_splits(X, y, groups), desc="LOSO"):
        # print("X shape", X.shape)
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
        if cv_method != 'kfold':
            uid_test = groups[test_index]

        # Impute missing values using KNNImputer
        X_train.columns = X_train.columns.astype(str)
        numeric_cols = X_train.select_dtypes(include=['float64', 'int']).columns

        # Apply KNNImputer only to numeric columns 
        knn_imputer = KNNImputer(n_neighbors=3)
        X_train_imputed = knn_imputer.fit_transform(X_train[numeric_cols])
        X_test_imputed = knn_imputer.transform(X_test[numeric_cols])

        # Replace the numeric columns in the original df with imputed values
        X_train[numeric_cols] = X_train_imputed
        X_test[numeric_cols] = X_test_imputed
  
        if zero:
            # Remove any features that have the same value in all rows
            selector = VarianceThreshold(threshold=0)
            X_train_var_threshold = selector.fit_transform(X_train)
            selected_features = selector.get_support(indices=True)
            selected_columns = X_train.columns[selected_features]

            X_train = pd.DataFrame(X_train_var_threshold, columns=selected_columns)
            X_test = X_test.iloc[:, selected_features]


        if high_pariwise:
            # Identify and drop highly correlated features on the training set
            # X_train, to_drop = filter_high_correlation(X_train, threshold=correlation_threshold)
            to_drop = filter_high_correlation(X_train, threshold=correlation_threshold)
            X_train = X_train.drop(columns=to_drop)
            X_test = X_test.drop(columns=to_drop)

        if lasso:
            selected_features = select_features_lasso(X_train, y_train)
            X_train = X_train[selected_features]
            X_test = X_test[selected_features]
        
        if len(selected_features) == 0:
            # Handle case with no selected features
            raise ValueError("No features were selected by Lasso. Try using a lower alpha value or check data quality.")


        if scaling == True:
            # Apply standard scaling separately to the training and testing sets
            scaler = StandardScaler()
            X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
            X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)


        # Apply oversampling if enabled
        if oversample:
            smote = SMOTE(sampling_strategy=sampling_method, random_state=42)
            X_train, y_train = smote.fit_resample(X_train, y_train)

    
        print("X shape", X_train.shape)
        # Convert back to NumPy arrays for model training and evaluation
        X_train = X_train.values
        X_test = X_test.values

        # Train the provided model instance on the training set
        model_instance.fit(X_train, y_train)

        # Evaluate the model on the test set
        y_pred = model_instance.predict(X_test)
        y_pred_proba = model_instance.predict_proba(X_test)[:, 1]

        f1_weighted = f1_score(y_test, y_pred, average='weighted')
        f1_macro = f1_score(y_test, y_pred, average='macro')
        accuracy = accuracy_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_pred_proba)

        
        # Append results for this iteration
        if cv_method == 'loso':
            res.append((set(uid_test), auc, accuracy, f1_macro))
            print("uid: ", set(uid_test), "\t auc: ", auc, "\t f1_macro: ", f1_macro, "\t accuracy: ", accuracy)
        elif cv_method == 'kfold':
            res.append((auc, accuracy, f1_macro))
            print("\t auc: ", auc, "\t f1_macro: ", f1_macro, "\t accuracy: ", accuracy)
    
    return res

In [55]:
def personalization_fold(data, label, model_instance, zero, high_pariwise, lasso, oversample=False, sampling_method=None, correlation_threshold=None):
   
    res = []

    users = sorted(data['uid'].unique())
    print(users)


    # 각 사용자에 대해 partial personalization 수행
    for target_user in users:
        
        target_user_data = data[data['uid'] == target_user]
        target_user_data = target_user_data.sort_index()
        
        X, y, _ = data_splitting(target_user_data, label)
        
        # k-fold cross validation
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    

        for train_index, test_index in cv.split(X, y):

            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y[train_index], y[test_index]


            if zero:
                selector = VarianceThreshold(threshold=0)
                X_train_var_threshold = selector.fit_transform(X_train)
                selected_features = selector.get_support(indices=True)
                selected_columns = X_train.columns[selected_features]

                X_train = pd.DataFrame(X_train_var_threshold, columns=selected_columns)
                X_test = X_test.iloc[:, selected_features]

            if high_pariwise:
                to_drop = filter_high_correlation(X_train, threshold=correlation_threshold)
                X_train = X_train.drop(columns=to_drop)
                X_test = X_test.drop(columns=to_drop)

            if lasso:
                selected_features = select_features_lasso(X_train, y_train)
                X_train = X_train[selected_features]
                X_test = X_test[selected_features]

            scaler = StandardScaler()
            X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
            # X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
            X_test = pd.DataFrame(scaler.fit_transform(X_test), columns=X_test.columns)
            
            # 이웃 수 결정
            minority_class_size = get_minority_class_size(y_train)
            n_neighbors = max(1, min(5, minority_class_size - 1))  # 최소값을 1로 설정하여 0이 되지 않도록 함
            
            if oversample:
                # smote = SMOTE(sampling_strategy=sampling_method, random_state=42)
                # X_train, y_train = smote.fit_resample(X_train, y_train)
                
                if minority_class_size > 1:  # 소수 클래스 샘플이 1보다 많을 때만 SMOTE 적용
                    smote = SMOTE(sampling_strategy='auto', random_state=42, k_neighbors=n_neighbors)
                    X_train, y_train = smote.fit_resample(X_train, y_train)
                else:
                    X_train, y_train = X_train, y_train  # SMOTE를 적용할 수 없는 경우 원본 데이터 사용
  
            # 결과 확인
            # print("원래 훈련 세트 크기:", X_train.shape, y_train.shape)
            # print("SMOTE 적용 후 훈련 세트 크기:", X_train_resampled.shape, y_train_resampled.shape)

            X_train = X_train.values
            X_test = X_test.values

            model_instance.fit(X_train, y_train)

    
            y_pred = model_instance.predict(X_test)
            y_pred_proba = model_instance.predict_proba(X_test)[:, 1]

            f1_macro = f1_score(y_test, y_pred, average='macro')
            accuracy = accuracy_score(y_test, y_pred)
            balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_pred_proba)
           
            res.append((str(target_user), auc, accuracy, balanced_accuracy, f1_macro))
            print("uid: ", target_user, "\t auc: ", auc, "\t accuracy: ", accuracy,  "\t balanced_accuray", balanced_accuracy, "\t f1_macro: ", f1_macro, )
         
            # ROC AUC 점수 계산 (두 개의 클래스가 있는 경우에만)
            if len(np.unique(y_test)) <= 1 and y_pred_proba is None:
                print("AUC 점수를 계산할 수 없습니다. y_test에 하나의 클래스만 존재합니다.")



    return res

In [None]:
# label_list = ['phq2_result_binary', 'gad2_result_binary', 'stress_result_binary', 'posNeg_result_binary', 'arousal_result_binary']
label_list = [ 'gad2_result_binary']

correlation_threshold_list = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 2.0]
zero_variance = True
high_pariwise = True
oversample = True
lasso = False
scaling = False
cv_method = 'loso'
shuffle = True
n_trials = 100
k = 5

for label in label_list:
    print(f"Processing label: {label}")
    
    # 모델 결과를 저장할 데이터프레임 초기화
    results = pd.DataFrame()
    
    for correlation_threshold in correlation_threshold_list:
        print(f"Threshold: {correlation_threshold}")

        df = merged_df.copy()
               
        # Identify groups with only one class label
        # single_class_groups = check_single_class_groups(df, label)
        user_with_skewed_label = remove_user_with_skewed_label(df, label)

        # user_with_skewed_label에 2, 14, 16 인덱스 추가
        user_with_skewed_label = list(user_with_skewed_label)
        index_list = []
        # index_list = [2, 14, 16]
        for idx in index_list:
            if idx not in user_with_skewed_label:
                user_with_skewed_label.append(idx)
        user_with_skewed_label = pd.Index(user_with_skewed_label)

        print(f"Users with skewed label: {user_with_skewed_label}")
        df = df[~df['uid'].isin(user_with_skewed_label)]
        # print(df.shape)
    
 
        
        MODELS = [
                      ('Decision Tree', DecisionTreeClassifier(random_state=42)),
                      ('Random Forest', RandomForestClassifier(random_state=42)),
                      ('AdaBoost', AdaBoostClassifier(random_state=42)),
                      ('XGBoost', XGBClassifier(random_state=42)),
                      ('LDA', LinearDiscriminantAnalysis()),
                      ('kNN', KNeighborsClassifier()),
                      ('SVM', SVC(probability=True, random_state=42)),
                    ]


        for model_name, model in tqdm(MODELS, desc="Processing Models"):

            print(f"Running {model_name}") 
            
            # Generalization
            res = train_ml(df, label, model, zero=zero_variance, high_pariwise=high_pariwise, lasso=lasso, scaling=scaling,
                cv_method = cv_method, 
                oversample=oversample,
                sampling_method='auto', correlation_threshold=correlation_threshold)

            
            if cv_method == 'loso':
                table = pd.DataFrame(res, columns=['uid', 'auc', 'accuracy', 'f1_macro'])
            elif cv_method == 'kfold':  
                table = pd.DataFrame(res, columns=['auc', 'accuracy', 'f1_macro', 'f1_weighted'])        

            # Personalization
            res = personalization_fold(df, label, model, zero=zero_variance, high_pariwise=high_pariwise, lasso=lasso,
                oversample=oversample,
                sampling_method='auto', correlation_threshold = correlation_threshold)
    
    
            table['model'] = model_name
            table['label'] = label
            table['correlation_threshold'] = correlation_threshold  

            print(table)


            average_metrics = table.mean(numeric_only=True)
            std_metrics = table.std(numeric_only=True)


           # Create a new row with both average and standard deviation
            new_row = {
                'uid': 'Average',
                'auc': average_metrics['auc'],
                'auc_std': std_metrics['auc'],
                'accuracy': average_metrics['accuracy'],
                'accuracy_std': std_metrics['accuracy'],
                # 'balanced_accuracy': average_metrics['balanced_accuracy'],
                # 'balanced_accuracy_std': std_metrics['balanced_accuracy'],
                'f1_macro': average_metrics['f1_macro'],
                'f1_macro_std': std_metrics['f1_macro'],
                'model': model_name,
                'label': label,
                'correlation_threshold': correlation_threshold
            }

            
            new_row_df = pd.DataFrame([new_row])

            # 기존 DataFrame에 행 추가
            results = pd.concat([results, table, new_row_df], ignore_index=True)


            print("Average Metrics:")
            print(average_metrics)
            print("-"*50)



        # 모든 모델의 결과를 CSV 파일로 저장
        results.to_csv("RESULT/" + label + "/LOSO/" + "all_data.csv", index=False)
        print("File saved successfully")

Processing label: phq2_result_binary
Threshold: 0.7
Users with skewed label: Index([4, 8, 10, 12, 13, 14, 15, 19, 20], dtype='int64')


Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 987)
uid:  {1} 	 auc:  0.4743589743589744 	 f1_macro:  0.45806451612903226 	 accuracy:  0.4642857142857143
X shape (576, 968)
uid:  {2} 	 auc:  0.5397727272727273 	 f1_macro:  0.4857142857142857 	 accuracy:  0.5
X shape (602, 964)
uid:  {3} 	 auc:  0.45691609977324266 	 f1_macro:  0.4012654012654012 	 accuracy:  0.46551724137931033
X shape (580, 963)
uid:  {5} 	 auc:  0.4916666666666667 	 f1_macro:  0.4909862142099682 	 accuracy:  0.53125
X shape (580, 956)
uid:  {6} 	 auc:  0.43333333333333335 	 f1_macro:  0.35714285714285715 	 accuracy:  0.3888888888888889
X shape (538, 962)
uid:  {7} 	 auc:  0.42407407407407405 	 f1_macro:  0.4110305958132045 	 accuracy:  0.5064935064935064
X shape (552, 964)
uid:  {9} 	 auc:  0.5630952380952381 	 f1_macro:  0.5262187088274045 	 accuracy:  0.532258064516129
X shape (528, 954)
uid:  {11} 	 auc:  0.5678228021978022 	 f1_macro:  0.5416149068322982 	 accuracy:  0.5853658536585366
X shape (604, 972)
uid:  {16} 	 auc:  

Processing Models:  14%|█▍        | 1/7 [01:14<07:25, 74.19s/it]

X shape (604, 977)
uid:  {18} 	 auc:  0.41666666666666663 	 f1_macro:  0.4097222222222222 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.474359  0.464286  0.458065  Decision Tree  phq2_result_binary   
1    {2}  0.539773  0.500000  0.485714  Decision Tree  phq2_result_binary   
2    {3}  0.456916  0.465517  0.401265  Decision Tree  phq2_result_binary   
3    {5}  0.491667  0.531250  0.490986  Decision Tree  phq2_result_binary   
4    {6}  0.433333  0.388889  0.357143  Decision Tree  phq2_result_binary   
5    {7}  0.424074  0.506494  0.411031  Decision Tree  phq2_result_binary   
6    {9}  0.563095  0.532258  0.526219  Decision Tree  phq2_result_binary   
7   {11}  0.567823  0.585366  0.541615  Decision Tree  phq2_result_binary   
8   {16}  0.715278  0.653846  0.649175  Decision Tree  phq2_result_binary   
9   {17}  0.614706  0.675214  0.542593  Decision Tree  phq2_result_binary   
10  {18}  0.416667  0.411765  0

Processing Models:  29%|██▊       | 2/7 [02:33<06:26, 77.27s/it]

uid:  {18} 	 auc:  0.2361111111111111 	 f1_macro:  0.43333333333333335 	 accuracy:  0.5294117647058824
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.507692  0.571429  0.475000  Random Forest  phq2_result_binary   
1    {2}  0.801136  0.733333  0.712919  Random Forest  phq2_result_binary   
2    {3}  0.165533  0.258621  0.230959  Random Forest  phq2_result_binary   
3    {5}  0.531250  0.687500  0.583333  Random Forest  phq2_result_binary   
4    {6}  0.780556  0.583333  0.540426  Random Forest  phq2_result_binary   
5    {7}  0.604074  0.610390  0.475953  Random Forest  phq2_result_binary   
6    {9}  0.623810  0.677419  0.578804  Random Forest  phq2_result_binary   
7   {11}  0.571257  0.601626  0.522765  Random Forest  phq2_result_binary   
8   {16}  0.645833  0.307692  0.235294  Random Forest  phq2_result_binary   
9   {17}  0.602941  0.461538  0.418002  Random Forest  phq2_result_binary   
10  {18}  0.236111  0.529412  0.433333  Random For

Processing Models:  43%|████▎     | 3/7 [04:01<05:27, 81.91s/it]

uid:  {18} 	 auc:  0.375 	 f1_macro:  0.43956043956043955 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.712821  0.535714  0.348837  AdaBoost  phq2_result_binary   
1    {2}  0.420455  0.500000  0.450549  AdaBoost  phq2_result_binary   
2    {3}  0.410431  0.344828  0.324755  AdaBoost  phq2_result_binary   
3    {5}  0.575000  0.562500  0.515152  AdaBoost  phq2_result_binary   
4    {6}  0.483333  0.500000  0.410909  AdaBoost  phq2_result_binary   
5    {7}  0.588148  0.610390  0.572222  AdaBoost  phq2_result_binary   
6    {9}  0.451190  0.435484  0.417137  AdaBoost  phq2_result_binary   
7   {11}  0.618647  0.536585  0.528609  AdaBoost  phq2_result_binary   
8   {16}  0.604167  0.538462  0.535714  AdaBoost  phq2_result_binary   
9   {17}  0.577124  0.487179  0.442857  AdaBoost  phq2_result_binary   
10  {18}  0.375000  0.470588  0.439560  AdaBoost  phq2_result_binary   

    correlation_threshold  
0               

Processing Models:  57%|█████▋    | 4/7 [05:27<04:11, 83.78s/it]

uid:  {18} 	 auc:  0.3472222222222222 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.456410  0.428571  0.300000  XGBoost  phq2_result_binary   
1    {2}  0.704545  0.566667  0.566185  XGBoost  phq2_result_binary   
2    {3}  0.326531  0.362069  0.302114  XGBoost  phq2_result_binary   
3    {5}  0.633333  0.593750  0.539313  XGBoost  phq2_result_binary   
4    {6}  0.444444  0.472222  0.417872  XGBoost  phq2_result_binary   
5    {7}  0.570370  0.649351  0.536455  XGBoost  phq2_result_binary   
6    {9}  0.432143  0.451613  0.436966  XGBoost  phq2_result_binary   
7   {11}  0.632898  0.658537  0.587644  XGBoost  phq2_result_binary   
8   {16}  0.840278  0.615385  0.615385  XGBoost  phq2_result_binary   
9   {17}  0.672549  0.547009  0.480000  XGBoost  phq2_result_binary   
10  {18}  0.347222  0.411765  0.392857  XGBoost  phq2_result_binary   

    correlation_threshold  
0               

Processing Models:  71%|███████▏  | 5/7 [06:41<02:40, 80.26s/it]

X shape (604, 977)
uid:  {18} 	 auc:  0.4930555555555556 	 f1_macro:  0.5277777777777778 	 accuracy:  0.5294117647058824
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.405128  0.464286  0.317073   LDA  phq2_result_binary   
1    {2}  0.525568  0.633333  0.459902   LDA  phq2_result_binary   
2    {3}  0.653061  0.758621  0.632246   LDA  phq2_result_binary   
3    {5}  0.522917  0.562500  0.533333   LDA  phq2_result_binary   
4    {6}  0.602778  0.638889  0.535253   LDA  phq2_result_binary   
5    {7}  0.412963  0.441558  0.422466   LDA  phq2_result_binary   
6    {9}  0.681548  0.645161  0.626096   LDA  phq2_result_binary   
7   {11}  0.507898  0.495935  0.474214   LDA  phq2_result_binary   
8   {16}  0.586806  0.576923  0.560676   LDA  phq2_result_binary   
9   {17}  0.391176  0.384615  0.331429   LDA  phq2_result_binary   
10  {18}  0.493056  0.529412  0.527778   LDA  phq2_result_binary   

    correlation_threshold  
0                     0.7  
1     

Processing Models:  86%|████████▌ | 6/7 [07:55<01:18, 78.19s/it]

X shape (604, 977)
uid:  {18} 	 auc:  0.5208333333333334 	 f1_macro:  0.5882352941176471 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.843537  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.550000  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.791667  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.551111  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.569048  0.564516  0.550363   kNN  phq2_result_binary   
7   {11}  0.666380  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.527778  0.538462  0.527273   kNN  phq2_result_binary   
9   {17}  0.555556  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.520833  0.588235  0.588235   kNN  phq2_result_binary   

    correlation_threshold  
0                     0.7  
1     

Processing Models: 100%|██████████| 7/7 [09:12<00:00, 78.94s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                     0.7  
1                     0.7  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 1135)
uid:  {1} 	 auc:  0.34871794871794876 	 f1_macro:  0.34375 	 accuracy:  0.35714285714285715
X shape (576, 1122)
uid:  {2} 	 auc:  0.6363636363636364 	 f1_macro:  0.6306429548563611 	 accuracy:  0.7
X shape (602, 1127)
uid:  {3} 	 auc:  0.4931972789115646 	 f1_macro:  0.47126436781609193 	 accuracy:  0.603448275862069
X shape (580, 1138)
uid:  {5} 	 auc:  0.5750000000000001 	 f1_macro:  0.5733333333333333 	 accuracy:  0.59375
X shape (580, 1112)
uid:  {6} 	 auc:  0.36666666666666664 	 f1_macro:  0.26875 	 accuracy:  0.2777777777777778
X shape (538, 1120)
uid:  {7} 	 auc:  0.4881481481481481 	 f1_macro:  0.48695983247668 	 accuracy:  0.5454545454545454
X shape (552, 1119)
uid:  {9} 	 auc:  0.4630952380952381 	 f1_macro:  0.45044319097502017 	 accuracy:  0.46774193548387094
X shape (528, 1104)
uid:  {11} 	 auc:  0.49690934065934067 	 f1_macro:  0.49513078470824945 	 accuracy:  0.5853658536585366
X shape (604, 1124)
uid:  {16} 	 auc:  0.55555555555

Processing Models:  14%|█▍        | 1/7 [01:15<07:33, 75.56s/it]

Running Random Forest
X shape (590, 1135)
uid:  {1} 	 auc:  0.6538461538461539 	 f1_macro:  0.5906432748538011 	 accuracy:  0.6428571428571429
X shape (576, 1122)
uid:  {2} 	 auc:  0.6278409090909091 	 f1_macro:  0.7 	 accuracy:  0.7333333333333333
X shape (602, 1127)
uid:  {3} 	 auc:  0.40476190476190477 	 f1_macro:  0.3283255086071987 	 accuracy:  0.3620689655172414
X shape (580, 1138)
uid:  {5} 	 auc:  0.2791666666666667 	 f1_macro:  0.3469387755102041 	 accuracy:  0.53125
X shape (580, 1112)
uid:  {6} 	 auc:  0.4888888888888889 	 f1_macro:  0.40519276160503537 	 accuracy:  0.4166666666666667
X shape (538, 1120)
uid:  {7} 	 auc:  0.4874074074074074 	 f1_macro:  0.33620689655172414 	 accuracy:  0.5064935064935064
X shape (552, 1119)
uid:  {9} 	 auc:  0.5678571428571428 	 f1_macro:  0.5115546218487395 	 accuracy:  0.5161290322580645
X shape (528, 1104)
uid:  {11} 	 auc:  0.7041552197802199 	 f1_macro:  0.6269157088122606 	 accuracy:  0.6910569105691057
X shape (604, 1124)
uid:  {16} 	

Processing Models:  29%|██▊       | 2/7 [02:34<06:29, 77.83s/it]

uid:  {18} 	 auc:  0.6805555555555556 	 f1_macro:  0.43333333333333335 	 accuracy:  0.5294117647058824
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.653846  0.642857  0.590643  Random Forest  phq2_result_binary   
1    {2}  0.627841  0.733333  0.700000  Random Forest  phq2_result_binary   
2    {3}  0.404762  0.362069  0.328326  Random Forest  phq2_result_binary   
3    {5}  0.279167  0.531250  0.346939  Random Forest  phq2_result_binary   
4    {6}  0.488889  0.416667  0.405193  Random Forest  phq2_result_binary   
5    {7}  0.487407  0.506494  0.336207  Random Forest  phq2_result_binary   
6    {9}  0.567857  0.516129  0.511555  Random Forest  phq2_result_binary   
7   {11}  0.704155  0.691057  0.626916  Random Forest  phq2_result_binary   
8   {16}  0.687500  0.307692  0.235294  Random Forest  phq2_result_binary   
9   {17}  0.640523  0.547009  0.472390  Random Forest  phq2_result_binary   
10  {18}  0.680556  0.529412  0.433333  Random For

Processing Models:  43%|████▎     | 3/7 [04:05<05:33, 83.42s/it]

uid:  {18} 	 auc:  0.48611111111111116 	 f1_macro:  0.5641025641025641 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.584615  0.535714  0.520422  AdaBoost  phq2_result_binary   
1    {2}  0.625000  0.633333  0.548564  AdaBoost  phq2_result_binary   
2    {3}  0.628118  0.206897  0.205952  AdaBoost  phq2_result_binary   
3    {5}  0.514583  0.562500  0.533333  AdaBoost  phq2_result_binary   
4    {6}  0.405556  0.416667  0.377778  AdaBoost  phq2_result_binary   
5    {7}  0.554815  0.649351  0.576147  AdaBoost  phq2_result_binary   
6    {9}  0.492857  0.532258  0.517056  AdaBoost  phq2_result_binary   
7   {11}  0.561470  0.536585  0.524000  AdaBoost  phq2_result_binary   
8   {16}  0.715278  0.500000  0.499259  AdaBoost  phq2_result_binary   
9   {17}  0.492810  0.410256  0.379620  AdaBoost  phq2_result_binary   
10  {18}  0.486111  0.588235  0.564103  AdaBoost  phq2_result_binary   

    correlation_threshold  
0   

Processing Models:  57%|█████▋    | 4/7 [05:33<04:15, 85.31s/it]

uid:  {18} 	 auc:  0.3472222222222222 	 f1_macro:  0.35294117647058826 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.476923  0.392857  0.322902  XGBoost  phq2_result_binary   
1    {2}  0.761364  0.733333  0.682540  XGBoost  phq2_result_binary   
2    {3}  0.403628  0.465517  0.401265  XGBoost  phq2_result_binary   
3    {5}  0.425000  0.562500  0.490909  XGBoost  phq2_result_binary   
4    {6}  0.416667  0.416667  0.329193  XGBoost  phq2_result_binary   
5    {7}  0.433333  0.610390  0.408299  XGBoost  phq2_result_binary   
6    {9}  0.466667  0.467742  0.460870  XGBoost  phq2_result_binary   
7   {11}  0.517514  0.560976  0.499849  XGBoost  phq2_result_binary   
8   {16}  0.826389  0.576923  0.576296  XGBoost  phq2_result_binary   
9   {17}  0.789542  0.598291  0.532119  XGBoost  phq2_result_binary   
10  {18}  0.347222  0.352941  0.352941  XGBoost  phq2_result_binary   

    correlation_threshold  
0              

Processing Models:  71%|███████▏  | 5/7 [06:47<02:42, 81.28s/it]

X shape (604, 1131)
uid:  {18} 	 auc:  0.29861111111111105 	 f1_macro:  0.27142857142857146 	 accuracy:  0.29411764705882354
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.415385  0.464286  0.402560   LDA  phq2_result_binary   
1    {2}  0.590909  0.633333  0.576380   LDA  phq2_result_binary   
2    {3}  0.469388  0.793103  0.442308   LDA  phq2_result_binary   
3    {5}  0.558333  0.531250  0.530792   LDA  phq2_result_binary   
4    {6}  0.616667  0.722222  0.556650   LDA  phq2_result_binary   
5    {7}  0.383704  0.428571  0.411806   LDA  phq2_result_binary   
6    {9}  0.658929  0.645161  0.626096   LDA  phq2_result_binary   
7   {11}  0.437157  0.682927  0.469652   LDA  phq2_result_binary   
8   {16}  0.513889  0.538462  0.490196   LDA  phq2_result_binary   
9   {17}  0.411111  0.307692  0.290909   LDA  phq2_result_binary   
10  {18}  0.298611  0.294118  0.271429   LDA  phq2_result_binary   

    correlation_threshold  
0                    0.75  
1 

Processing Models:  86%|████████▌ | 6/7 [08:01<01:18, 79.00s/it]

X shape (604, 1131)
uid:  {18} 	 auc:  0.5208333333333334 	 f1_macro:  0.5882352941176471 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.843537  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.550000  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.791667  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.551111  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.573810  0.564516  0.550363   kNN  phq2_result_binary   
7   {11}  0.666380  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.527778  0.538462  0.527273   kNN  phq2_result_binary   
9   {17}  0.555556  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.520833  0.588235  0.588235   kNN  phq2_result_binary   

    correlation_threshold  
0                    0.75  
1    

Processing Models: 100%|██████████| 7/7 [09:20<00:00, 80.08s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                    0.75  
1                    0.75  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 1306)
uid:  {1} 	 auc:  0.5358974358974358 	 f1_macro:  0.5351213282247764 	 accuracy:  0.5357142857142857
X shape (576, 1289)
uid:  {2} 	 auc:  0.5170454545454546 	 f1_macro:  0.4990723562152134 	 accuracy:  0.7
X shape (602, 1284)
uid:  {3} 	 auc:  0.6337868480725624 	 f1_macro:  0.49073170731707316 	 accuracy:  0.5344827586206896
X shape (580, 1292)
uid:  {5} 	 auc:  0.425 	 f1_macro:  0.4231177094379639 	 accuracy:  0.46875
X shape (580, 1281)
uid:  {6} 	 auc:  0.28333333333333327 	 f1_macro:  0.23524783634933125 	 accuracy:  0.25
X shape (538, 1301)
uid:  {7} 	 auc:  0.46370370370370373 	 f1_macro:  0.4600280504908836 	 accuracy:  0.4805194805194805
X shape (552, 1296)
uid:  {9} 	 auc:  0.5238095238095238 	 f1_macro:  0.5107482993197279 	 accuracy:  0.532258064516129
X shape (528, 1278)
uid:  {11} 	 auc:  0.40401785714285715 	 f1_macro:  0.40053167922020383 	 accuracy:  0.5528455284552846
X shape (604, 1283)
uid:  {16} 	 auc:  0.5902777777777778

Processing Models:  14%|█▍        | 1/7 [01:15<07:34, 75.74s/it]

Average Metrics:
auc                      0.484781
accuracy                 0.506926
f1_macro                 0.450136
correlation_threshold    0.800000
dtype: float64
--------------------------------------------------
Running Random Forest
X shape (590, 1306)
uid:  {1} 	 auc:  0.5384615384615384 	 f1_macro:  0.40256045519203415 	 accuracy:  0.4642857142857143
X shape (576, 1289)
uid:  {2} 	 auc:  0.6761363636363636 	 f1_macro:  0.576379974326059 	 accuracy:  0.6333333333333333
X shape (602, 1284)
uid:  {3} 	 auc:  0.4489795918367347 	 f1_macro:  0.14884696016771487 	 accuracy:  0.15517241379310345
X shape (580, 1292)
uid:  {5} 	 auc:  0.4270833333333333 	 f1_macro:  0.45893719806763283 	 accuracy:  0.5625
X shape (580, 1281)
uid:  {6} 	 auc:  0.35277777777777775 	 f1_macro:  0.42857142857142855 	 accuracy:  0.5277777777777778
X shape (538, 1301)
uid:  {7} 	 auc:  0.41185185185185186 	 f1_macro:  0.41996233521657245 	 accuracy:  0.5844155844155844
X shape (552, 1296)
uid:  {9} 	 auc:  

Processing Models:  29%|██▊       | 2/7 [02:34<06:27, 77.47s/it]

uid:  {18} 	 auc:  0.3333333333333333 	 f1_macro:  0.43956043956043955 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.538462  0.464286  0.402560  Random Forest  phq2_result_binary   
1    {2}  0.676136  0.633333  0.576380  Random Forest  phq2_result_binary   
2    {3}  0.448980  0.155172  0.148847  Random Forest  phq2_result_binary   
3    {5}  0.427083  0.562500  0.458937  Random Forest  phq2_result_binary   
4    {6}  0.352778  0.527778  0.428571  Random Forest  phq2_result_binary   
5    {7}  0.411852  0.584416  0.419962  Random Forest  phq2_result_binary   
6    {9}  0.552976  0.500000  0.448177  Random Forest  phq2_result_binary   
7   {11}  0.727679  0.739837  0.654979  Random Forest  phq2_result_binary   
8   {16}  0.548611  0.461538  0.448485  Random Forest  phq2_result_binary   
9   {17}  0.717974  0.495726  0.454955  Random Forest  phq2_result_binary   
10  {18}  0.333333  0.470588  0.439560  Random Fo

Processing Models:  43%|████▎     | 3/7 [04:08<05:39, 84.87s/it]

uid:  {18} 	 auc:  0.3194444444444444 	 f1_macro:  0.29166666666666663 	 accuracy:  0.29411764705882354
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.551282  0.500000  0.455556  AdaBoost  phq2_result_binary   
1    {2}  0.460227  0.566667  0.466484  AdaBoost  phq2_result_binary   
2    {3}  0.519274  0.448276  0.402831  AdaBoost  phq2_result_binary   
3    {5}  0.320833  0.343750  0.287381  AdaBoost  phq2_result_binary   
4    {6}  0.355556  0.416667  0.356596  AdaBoost  phq2_result_binary   
5    {7}  0.533333  0.519481  0.476768  AdaBoost  phq2_result_binary   
6    {9}  0.496429  0.483871  0.475132  AdaBoost  phq2_result_binary   
7   {11}  0.550481  0.577236  0.544444  AdaBoost  phq2_result_binary   
8   {16}  0.618056  0.500000  0.499259  AdaBoost  phq2_result_binary   
9   {17}  0.630719  0.529915  0.467698  AdaBoost  phq2_result_binary   
10  {18}  0.319444  0.294118  0.291667  AdaBoost  phq2_result_binary   

    correlation_threshold  
0  

Processing Models:  57%|█████▋    | 4/7 [05:39<04:22, 87.47s/it]

uid:  {18} 	 auc:  0.2916666666666667 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.235897  0.392857  0.322902  XGBoost  phq2_result_binary   
1    {2}  0.625000  0.566667  0.422222  XGBoost  phq2_result_binary   
2    {3}  0.555556  0.413793  0.387578  XGBoost  phq2_result_binary   
3    {5}  0.395833  0.468750  0.397564  XGBoost  phq2_result_binary   
4    {6}  0.544444  0.527778  0.479149  XGBoost  phq2_result_binary   
5    {7}  0.469630  0.597403  0.402204  XGBoost  phq2_result_binary   
6    {9}  0.436905  0.435484  0.423332  XGBoost  phq2_result_binary   
7   {11}  0.556319  0.577236  0.511905  XGBoost  phq2_result_binary   
8   {16}  0.833333  0.576923  0.576296  XGBoost  phq2_result_binary   
9   {17}  0.784314  0.598291  0.532119  XGBoost  phq2_result_binary   
10  {18}  0.291667  0.411765  0.392857  XGBoost  phq2_result_binary   

    correlation_threshold  
0               

Processing Models:  71%|███████▏  | 5/7 [06:54<02:45, 82.98s/it]

X shape (604, 1291)
uid:  {18} 	 auc:  0.3402777777777778 	 f1_macro:  0.343859649122807 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.348718  0.357143  0.343750   LDA  phq2_result_binary   
1    {2}  0.545455  0.400000  0.397321   LDA  phq2_result_binary   
2    {3}  0.479592  0.810345  0.447619   LDA  phq2_result_binary   
3    {5}  0.481250  0.500000  0.498039   LDA  phq2_result_binary   
4    {6}  0.469444  0.666667  0.467980   LDA  phq2_result_binary   
5    {7}  0.410000  0.441558  0.416358   LDA  phq2_result_binary   
6    {9}  0.569048  0.580645  0.520238   LDA  phq2_result_binary   
7   {11}  0.410543  0.569106  0.409136   LDA  phq2_result_binary   
8   {16}  0.590278  0.500000  0.499259   LDA  phq2_result_binary   
9   {17}  0.389216  0.282051  0.266567   LDA  phq2_result_binary   
10  {18}  0.340278  0.352941  0.343860   LDA  phq2_result_binary   

    correlation_threshold  
0                     0.8  
1    

Processing Models:  86%|████████▌ | 6/7 [08:09<01:20, 80.33s/it]

X shape (604, 1291)
uid:  {18} 	 auc:  0.5208333333333334 	 f1_macro:  0.5882352941176471 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.843537  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.550000  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.788889  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.551111  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.573810  0.564516  0.550363   kNN  phq2_result_binary   
7   {11}  0.668613  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.520833  0.538462  0.527273   kNN  phq2_result_binary   
9   {17}  0.555556  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.520833  0.588235  0.588235   kNN  phq2_result_binary   

    correlation_threshold  
0                     0.8  
1    

Processing Models: 100%|██████████| 7/7 [09:28<00:00, 81.25s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                     0.8  
1                     0.8  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 1483)
uid:  {1} 	 auc:  0.5538461538461539 	 f1_macro:  0.5333333333333333 	 accuracy:  0.5714285714285714
X shape (576, 1476)
uid:  {2} 	 auc:  0.4318181818181818 	 f1_macro:  0.3877551020408163 	 accuracy:  0.6333333333333333
X shape (602, 1470)
uid:  {3} 	 auc:  0.6643990929705215 	 f1_macro:  0.5303643724696356 	 accuracy:  0.5862068965517241
X shape (580, 1475)
uid:  {5} 	 auc:  0.5083333333333333 	 f1_macro:  0.5076923076923077 	 accuracy:  0.53125
X shape (580, 1462)
uid:  {6} 	 auc:  0.5333333333333333 	 f1_macro:  0.33126934984520123 	 accuracy:  0.3333333333333333
X shape (538, 1456)
uid:  {7} 	 auc:  0.46814814814814815 	 f1_macro:  0.46777507939473195 	 accuracy:  0.5194805194805194
X shape (552, 1476)
uid:  {9} 	 auc:  0.4892857142857143 	 f1_macro:  0.4608695652173913 	 accuracy:  0.46774193548387094
X shape (528, 1464)
uid:  {11} 	 auc:  0.5252403846153846 	 f1_macro:  0.5255960729312763 	 accuracy:  0.6422764227642277
X shape (604, 14

Processing Models:  14%|█▍        | 1/7 [01:16<07:40, 76.76s/it]

uid:  {18} 	 auc:  0.2777777777777778 	 f1_macro:  0.22727272727272727 	 accuracy:  0.29411764705882354
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.553846  0.571429  0.533333  Decision Tree  phq2_result_binary   
1    {2}  0.431818  0.633333  0.387755  Decision Tree  phq2_result_binary   
2    {3}  0.664399  0.586207  0.530364  Decision Tree  phq2_result_binary   
3    {5}  0.508333  0.531250  0.507692  Decision Tree  phq2_result_binary   
4    {6}  0.533333  0.333333  0.331269  Decision Tree  phq2_result_binary   
5    {7}  0.468148  0.519481  0.467775  Decision Tree  phq2_result_binary   
6    {9}  0.489286  0.467742  0.460870  Decision Tree  phq2_result_binary   
7   {11}  0.525240  0.642276  0.525596  Decision Tree  phq2_result_binary   
8   {16}  0.548611  0.615385  0.548611  Decision Tree  phq2_result_binary   
9   {17}  0.767647  0.743590  0.638889  Decision Tree  phq2_result_binary   
10  {18}  0.277778  0.294118  0.227273  Decision 

Processing Models:  29%|██▊       | 2/7 [02:37<06:34, 78.91s/it]

uid:  {18} 	 auc:  0.4027777777777778 	 f1_macro:  0.3952569169960475 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.484615  0.500000  0.333333  Random Forest  phq2_result_binary   
1    {2}  0.633523  0.666667  0.603175  Random Forest  phq2_result_binary   
2    {3}  0.361678  0.206897  0.198317  Random Forest  phq2_result_binary   
3    {5}  0.435417  0.593750  0.434014  Random Forest  phq2_result_binary   
4    {6}  0.538889  0.500000  0.410909  Random Forest  phq2_result_binary   
5    {7}  0.450000  0.636364  0.388889  Random Forest  phq2_result_binary   
6    {9}  0.502976  0.548387  0.516165  Random Forest  phq2_result_binary   
7   {11}  0.705701  0.682927  0.620160  Random Forest  phq2_result_binary   
8   {16}  0.600694  0.346154  0.295056  Random Forest  phq2_result_binary   
9   {17}  0.731699  0.418803  0.395441  Random Forest  phq2_result_binary   
10  {18}  0.402778  0.470588  0.395257  Random For

Processing Models:  43%|████▎     | 3/7 [04:12<05:46, 86.56s/it]

uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.617949  0.500000  0.500000  AdaBoost  phq2_result_binary   
1    {2}  0.477273  0.700000  0.558101  AdaBoost  phq2_result_binary   
2    {3}  0.444444  0.258621  0.247662  AdaBoost  phq2_result_binary   
3    {5}  0.479167  0.468750  0.397564  AdaBoost  phq2_result_binary   
4    {6}  0.400000  0.388889  0.371429  AdaBoost  phq2_result_binary   
5    {7}  0.585185  0.623377  0.555975  AdaBoost  phq2_result_binary   
6    {9}  0.496429  0.500000  0.489237  AdaBoost  phq2_result_binary   
7   {11}  0.454670  0.585366  0.455894  AdaBoost  phq2_result_binary   
8   {16}  0.541667  0.384615  0.380952  AdaBoost  phq2_result_binary   
9   {17}  0.511765  0.418803  0.380760  AdaBoost  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857  AdaBoost  phq2_result_binary   

    correlation_threshold  
0                  

Processing Models:  57%|█████▋    | 4/7 [05:46<04:27, 89.20s/it]

uid:  {18} 	 auc:  0.3472222222222222 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.430769  0.500000  0.426901  XGBoost  phq2_result_binary   
1    {2}  0.693182  0.733333  0.583333  XGBoost  phq2_result_binary   
2    {3}  0.541950  0.431034  0.409806  XGBoost  phq2_result_binary   
3    {5}  0.358333  0.468750  0.423118  XGBoost  phq2_result_binary   
4    {6}  0.627778  0.500000  0.474026  XGBoost  phq2_result_binary   
5    {7}  0.513333  0.623377  0.414372  XGBoost  phq2_result_binary   
6    {9}  0.571429  0.532258  0.494233  XGBoost  phq2_result_binary   
7   {11}  0.594093  0.569106  0.512233  XGBoost  phq2_result_binary   
8   {16}  0.805556  0.576923  0.576296  XGBoost  phq2_result_binary   
9   {17}  0.727451  0.529915  0.480755  XGBoost  phq2_result_binary   
10  {18}  0.347222  0.411765  0.392857  XGBoost  phq2_result_binary   

    correlation_threshold  
0               

Processing Models:  71%|███████▏  | 5/7 [07:01<02:48, 84.20s/it]

X shape (604, 1472)
uid:  {18} 	 auc:  0.3194444444444444 	 f1_macro:  0.29166666666666663 	 accuracy:  0.29411764705882354
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.356410  0.392857  0.372859   LDA  phq2_result_binary   
1    {2}  0.525568  0.366667  0.360269   LDA  phq2_result_binary   
2    {3}  0.459184  0.775862  0.436893   LDA  phq2_result_binary   
3    {5}  0.477083  0.500000  0.466667   LDA  phq2_result_binary   
4    {6}  0.594444  0.583333  0.463754   LDA  phq2_result_binary   
5    {7}  0.619630  0.662338  0.629259   LDA  phq2_result_binary   
6    {9}  0.604762  0.580645  0.564324   LDA  phq2_result_binary   
7   {11}  0.481113  0.382114  0.381746   LDA  phq2_result_binary   
8   {16}  0.614583  0.615385  0.606061   LDA  phq2_result_binary   
9   {17}  0.475490  0.333333  0.315276   LDA  phq2_result_binary   
10  {18}  0.319444  0.294118  0.291667   LDA  phq2_result_binary   

    correlation_threshold  
0                    0.85  
1  

Processing Models:  86%|████████▌ | 6/7 [08:16<01:21, 81.15s/it]

X shape (604, 1472)
uid:  {18} 	 auc:  0.5208333333333334 	 f1_macro:  0.5882352941176471 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.843537  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.550000  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.788889  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.551111  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.573810  0.564516  0.550363   kNN  phq2_result_binary   
7   {11}  0.665865  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.548611  0.576923  0.571214   kNN  phq2_result_binary   
9   {17}  0.555556  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.520833  0.588235  0.588235   kNN  phq2_result_binary   

    correlation_threshold  
0                    0.85  
1    

Processing Models: 100%|██████████| 7/7 [09:36<00:00, 82.35s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                    0.85  
1                    0.85  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 1711)
uid:  {1} 	 auc:  0.5974358974358975 	 f1_macro:  0.5942028985507246 	 accuracy:  0.6071428571428571
X shape (576, 1713)
uid:  {2} 	 auc:  0.4318181818181818 	 f1_macro:  0.3877551020408163 	 accuracy:  0.6333333333333333
X shape (602, 1710)
uid:  {3} 	 auc:  0.6031746031746033 	 f1_macro:  0.4507575757575758 	 accuracy:  0.4827586206896552
X shape (580, 1706)
uid:  {5} 	 auc:  0.6083333333333333 	 f1_macro:  0.5901477832512315 	 accuracy:  0.59375
X shape (580, 1703)
uid:  {6} 	 auc:  0.6000000000000001 	 f1_macro:  0.4285714285714286 	 accuracy:  0.4444444444444444
X shape (538, 1694)
uid:  {7} 	 auc:  0.47370370370370374 	 f1_macro:  0.47065044949762036 	 accuracy:  0.4935064935064935
X shape (552, 1702)
uid:  {9} 	 auc:  0.47380952380952385 	 f1_macro:  0.4689140646587455 	 accuracy:  0.5
X shape (528, 1713)
uid:  {11} 	 auc:  0.4613667582417582 	 f1_macro:  0.4454304730551778 	 accuracy:  0.4878048780487805
X shape (604, 1708)
uid:  {16} 	

Processing Models:  14%|█▍        | 1/7 [01:17<07:46, 77.74s/it]

uid:  {18} 	 auc:  0.29166666666666663 	 f1_macro:  0.29166666666666663 	 accuracy:  0.29411764705882354
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.597436  0.607143  0.594203  Decision Tree  phq2_result_binary   
1    {2}  0.431818  0.633333  0.387755  Decision Tree  phq2_result_binary   
2    {3}  0.603175  0.482759  0.450758  Decision Tree  phq2_result_binary   
3    {5}  0.608333  0.593750  0.590148  Decision Tree  phq2_result_binary   
4    {6}  0.600000  0.444444  0.428571  Decision Tree  phq2_result_binary   
5    {7}  0.473704  0.493506  0.470650  Decision Tree  phq2_result_binary   
6    {9}  0.473810  0.500000  0.468914  Decision Tree  phq2_result_binary   
7   {11}  0.461367  0.487805  0.445430  Decision Tree  phq2_result_binary   
8   {16}  0.465278  0.500000  0.460925  Decision Tree  phq2_result_binary   
9   {17}  0.662745  0.461538  0.433566  Decision Tree  phq2_result_binary   
10  {18}  0.291667  0.294118  0.291667  Decision

Processing Models:  29%|██▊       | 2/7 [02:39<06:39, 79.99s/it]

uid:  {18} 	 auc:  0.2361111111111111 	 f1_macro:  0.2916666666666667 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.556410  0.535714  0.348837  Random Forest  phq2_result_binary   
1    {2}  0.656250  0.733333  0.627329  Random Forest  phq2_result_binary   
2    {3}  0.235828  0.344828  0.304293  Random Forest  phq2_result_binary   
3    {5}  0.562500  0.593750  0.372549  Random Forest  phq2_result_binary   
4    {6}  0.538889  0.500000  0.458194  Random Forest  phq2_result_binary   
5    {7}  0.483333  0.571429  0.389916  Random Forest  phq2_result_binary   
6    {9}  0.632738  0.629032  0.557005  Random Forest  phq2_result_binary   
7   {11}  0.670845  0.682927  0.574922  Random Forest  phq2_result_binary   
8   {16}  0.513889  0.384615  0.350000  Random Forest  phq2_result_binary   
9   {17}  0.638235  0.487179  0.436778  Random Forest  phq2_result_binary   
10  {18}  0.236111  0.411765  0.291667  Random Fore

Processing Models:  43%|████▎     | 3/7 [04:18<05:54, 88.75s/it]

uid:  {18} 	 auc:  0.6805555555555556 	 f1_macro:  0.5824561403508772 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.543590  0.428571  0.416667  AdaBoost  phq2_result_binary   
1    {2}  0.556818  0.533333  0.497608  AdaBoost  phq2_result_binary   
2    {3}  0.401361  0.844828  0.457944  AdaBoost  phq2_result_binary   
3    {5}  0.456250  0.562500  0.515152  AdaBoost  phq2_result_binary   
4    {6}  0.561111  0.388889  0.371429  AdaBoost  phq2_result_binary   
5    {7}  0.581111  0.545455  0.476190  AdaBoost  phq2_result_binary   
6    {9}  0.558333  0.548387  0.506818  AdaBoost  phq2_result_binary   
7   {11}  0.502060  0.658537  0.537097  AdaBoost  phq2_result_binary   
8   {16}  0.687500  0.576923  0.576296  AdaBoost  phq2_result_binary   
9   {17}  0.518954  0.547009  0.445795  AdaBoost  phq2_result_binary   
10  {18}  0.680556  0.588235  0.582456  AdaBoost  phq2_result_binary   

    correlation_threshold  
0    

Processing Models:  57%|█████▋    | 4/7 [05:55<04:36, 92.17s/it]

uid:  {18} 	 auc:  0.25 	 f1_macro:  0.29166666666666663 	 accuracy:  0.29411764705882354
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.348718  0.428571  0.345029  XGBoost  phq2_result_binary   
1    {2}  0.681818  0.700000  0.558101  XGBoost  phq2_result_binary   
2    {3}  0.575964  0.724138  0.473923  XGBoost  phq2_result_binary   
3    {5}  0.291667  0.500000  0.418182  XGBoost  phq2_result_binary   
4    {6}  0.544444  0.611111  0.541818  XGBoost  phq2_result_binary   
5    {7}  0.471111  0.610390  0.456215  XGBoost  phq2_result_binary   
6    {9}  0.588095  0.612903  0.577273  XGBoost  phq2_result_binary   
7   {11}  0.592033  0.552846  0.505663  XGBoost  phq2_result_binary   
8   {16}  0.694444  0.615385  0.615385  XGBoost  phq2_result_binary   
9   {17}  0.732026  0.555556  0.486149  XGBoost  phq2_result_binary   
10  {18}  0.250000  0.294118  0.291667  XGBoost  phq2_result_binary   

    correlation_threshold  
0                     0.9  
1

Processing Models:  71%|███████▏  | 5/7 [07:12<02:53, 86.53s/it]

X shape (604, 1707)
uid:  {18} 	 auc:  0.5833333333333333 	 f1_macro:  0.5824561403508772 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.397436  0.392857  0.322902   LDA  phq2_result_binary   
1    {2}  0.610795  0.700000  0.558101   LDA  phq2_result_binary   
2    {3}  0.448980  0.758621  0.431373   LDA  phq2_result_binary   
3    {5}  0.362500  0.437500  0.376623   LDA  phq2_result_binary   
4    {6}  0.388889  0.583333  0.368421   LDA  phq2_result_binary   
5    {7}  0.569630  0.493506  0.493506   LDA  phq2_result_binary   
6    {9}  0.576190  0.532258  0.529196   LDA  phq2_result_binary   
7   {11}  0.383242  0.447154  0.385362   LDA  phq2_result_binary   
8   {16}  0.583333  0.423077  0.400922   LDA  phq2_result_binary   
9   {17}  0.635294  0.760684  0.593750   LDA  phq2_result_binary   
10  {18}  0.583333  0.588235  0.582456   LDA  phq2_result_binary   

    correlation_threshold  
0                     0.9  
1    

Processing Models:  86%|████████▌ | 6/7 [08:28<01:22, 82.91s/it]

X shape (604, 1707)
uid:  {18} 	 auc:  0.5208333333333334 	 f1_macro:  0.5882352941176471 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.843537  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.550000  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.788889  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.548519  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.573810  0.564516  0.550363   kNN  phq2_result_binary   
7   {11}  0.664492  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.548611  0.576923  0.571214   kNN  phq2_result_binary   
9   {17}  0.555556  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.520833  0.588235  0.588235   kNN  phq2_result_binary   

    correlation_threshold  
0                     0.9  
1    

Processing Models: 100%|██████████| 7/7 [09:50<00:00, 84.33s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                     0.9  
1                     0.9  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 1956)
uid:  {1} 	 auc:  0.5589743589743591 	 f1_macro:  0.5508021390374331 	 accuracy:  0.5714285714285714
X shape (576, 1948)
uid:  {2} 	 auc:  0.4772727272727273 	 f1_macro:  0.4117647058823529 	 accuracy:  0.7
X shape (602, 1946)
uid:  {3} 	 auc:  0.5873015873015872 	 f1_macro:  0.37259615384615385 	 accuracy:  0.3793103448275862
X shape (580, 1947)
uid:  {5} 	 auc:  0.525 	 f1_macro:  0.5195195195195195 	 accuracy:  0.53125
X shape (580, 1937)
uid:  {6} 	 auc:  0.6833333333333333 	 f1_macro:  0.5404255319148936 	 accuracy:  0.5833333333333334
X shape (538, 1957)
uid:  {7} 	 auc:  0.4722222222222222 	 f1_macro:  0.4652777777777778 	 accuracy:  0.4805194805194805
X shape (552, 1946)
uid:  {9} 	 auc:  0.475 	 f1_macro:  0.46378378378378377 	 accuracy:  0.4838709677419355
X shape (528, 1959)
uid:  {11} 	 auc:  0.6129807692307693 	 f1_macro:  0.5558333333333333 	 accuracy:  0.5772357723577236
X shape (604, 1948)
uid:  {16} 	 auc:  0.7152777777777778 	

Processing Models:  14%|█▍        | 1/7 [01:19<07:56, 79.40s/it]

uid:  {18} 	 auc:  0.3472222222222222 	 f1_macro:  0.343859649122807 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.558974  0.571429  0.550802  Decision Tree  phq2_result_binary   
1    {2}  0.477273  0.700000  0.411765  Decision Tree  phq2_result_binary   
2    {3}  0.587302  0.379310  0.372596  Decision Tree  phq2_result_binary   
3    {5}  0.525000  0.531250  0.519520  Decision Tree  phq2_result_binary   
4    {6}  0.683333  0.583333  0.540426  Decision Tree  phq2_result_binary   
5    {7}  0.472222  0.480519  0.465278  Decision Tree  phq2_result_binary   
6    {9}  0.475000  0.483871  0.463784  Decision Tree  phq2_result_binary   
7   {11}  0.612981  0.577236  0.555833  Decision Tree  phq2_result_binary   
8   {16}  0.715278  0.653846  0.649175  Decision Tree  phq2_result_binary   
9   {17}  0.642157  0.623932  0.526838  Decision Tree  phq2_result_binary   
10  {18}  0.347222  0.352941  0.343860  Decision Tr

Processing Models:  29%|██▊       | 2/7 [02:42<06:47, 81.46s/it]

uid:  {18} 	 auc:  0.3125 	 f1_macro:  0.43956043956043955 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.433333  0.500000  0.387500  Random Forest  phq2_result_binary   
1    {2}  0.678977  0.733333  0.583333  Random Forest  phq2_result_binary   
2    {3}  0.257370  0.241379  0.226667  Random Forest  phq2_result_binary   
3    {5}  0.456250  0.531250  0.346939  Random Forest  phq2_result_binary   
4    {6}  0.722222  0.527778  0.509222  Random Forest  phq2_result_binary   
5    {7}  0.574444  0.636364  0.510889  Random Forest  phq2_result_binary   
6    {9}  0.599405  0.612903  0.592105  Random Forest  phq2_result_binary   
7   {11}  0.569883  0.569106  0.512233  Random Forest  phq2_result_binary   
8   {16}  0.659722  0.384615  0.350000  Random Forest  phq2_result_binary   
9   {17}  0.673203  0.367521  0.353881  Random Forest  phq2_result_binary   
10  {18}  0.312500  0.470588  0.439560  Random Forest  phq2_r

Processing Models:  43%|████▎     | 3/7 [04:25<06:06, 91.56s/it]

uid:  {18} 	 auc:  0.3611111111111111 	 f1_macro:  0.35294117647058826 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.620513  0.535714  0.535121  AdaBoost  phq2_result_binary   
1    {2}  0.414773  0.433333  0.377289  AdaBoost  phq2_result_binary   
2    {3}  0.553288  0.758621  0.577083  AdaBoost  phq2_result_binary   
3    {5}  0.495833  0.593750  0.558855  AdaBoost  phq2_result_binary   
4    {6}  0.433333  0.333333  0.314286  AdaBoost  phq2_result_binary   
5    {7}  0.475185  0.558442  0.383710  AdaBoost  phq2_result_binary   
6    {9}  0.553571  0.661290  0.595527  AdaBoost  phq2_result_binary   
7   {11}  0.595467  0.585366  0.530640  AdaBoost  phq2_result_binary   
8   {16}  0.659722  0.576923  0.571214  AdaBoost  phq2_result_binary   
9   {17}  0.628758  0.427350  0.402196  AdaBoost  phq2_result_binary   
10  {18}  0.361111  0.352941  0.352941  AdaBoost  phq2_result_binary   

    correlation_threshold  
0  

Processing Models:  57%|█████▋    | 4/7 [06:06<04:45, 95.23s/it]

uid:  {18} 	 auc:  0.2777777777777778 	 f1_macro:  0.343859649122807 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.358974  0.392857  0.282051  XGBoost  phq2_result_binary   
1    {2}  0.664773  0.700000  0.630643  XGBoost  phq2_result_binary   
2    {3}  0.399093  0.586207  0.369565  XGBoost  phq2_result_binary   
3    {5}  0.354167  0.500000  0.418182  XGBoost  phq2_result_binary   
4    {6}  0.727778  0.500000  0.474026  XGBoost  phq2_result_binary   
5    {7}  0.462963  0.636364  0.420430  XGBoost  phq2_result_binary   
6    {9}  0.490476  0.467742  0.424473  XGBoost  phq2_result_binary   
7   {11}  0.548077  0.552846  0.472515  XGBoost  phq2_result_binary   
8   {16}  0.729167  0.461538  0.458333  XGBoost  phq2_result_binary   
9   {17}  0.590850  0.307692  0.294499  XGBoost  phq2_result_binary   
10  {18}  0.277778  0.352941  0.343860  XGBoost  phq2_result_binary   

    correlation_threshold  
0                

Processing Models:  71%|███████▏  | 5/7 [07:24<02:57, 88.79s/it]

X shape (604, 1944)
uid:  {18} 	 auc:  0.8263888888888888 	 f1_macro:  0.8235294117647058 	 accuracy:  0.8235294117647058
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.471795  0.500000  0.387500   LDA  phq2_result_binary   
1    {2}  0.613636  0.433333  0.427609   LDA  phq2_result_binary   
2    {3}  0.208617  0.275862  0.231061   LDA  phq2_result_binary   
3    {5}  0.525000  0.531250  0.519520   LDA  phq2_result_binary   
4    {6}  0.300000  0.500000  0.333333   LDA  phq2_result_binary   
5    {7}  0.420370  0.454545  0.419181   LDA  phq2_result_binary   
6    {9}  0.442857  0.467742  0.443265   LDA  phq2_result_binary   
7   {11}  0.405220  0.333333  0.332230   LDA  phq2_result_binary   
8   {16}  0.576389  0.461538  0.458333   LDA  phq2_result_binary   
9   {17}  0.648039  0.683761  0.560284   LDA  phq2_result_binary   
10  {18}  0.826389  0.823529  0.823529   LDA  phq2_result_binary   

    correlation_threshold  
0                    0.95  
1    

Processing Models:  86%|████████▌ | 6/7 [08:40<01:24, 84.69s/it]

X shape (604, 1944)
uid:  {18} 	 auc:  0.5208333333333334 	 f1_macro:  0.5882352941176471 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.843537  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.562500  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.786111  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.548519  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.563095  0.548387  0.536325   kNN  phq2_result_binary   
7   {11}  0.664492  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.548611  0.576923  0.571214   kNN  phq2_result_binary   
9   {17}  0.555556  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.520833  0.588235  0.588235   kNN  phq2_result_binary   

    correlation_threshold  
0                    0.95  
1    

Processing Models: 100%|██████████| 7/7 [10:04<00:00, 86.29s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                    0.95  
1                    0.95  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (590, 2629)
uid:  {1} 	 auc:  0.5435897435897435 	 f1_macro:  0.475 	 accuracy:  0.5714285714285714
X shape (576, 2631)
uid:  {2} 	 auc:  0.5625 	 f1_macro:  0.542483660130719 	 accuracy:  0.7666666666666667
X shape (602, 2630)
uid:  {3} 	 auc:  0.3764172335600907 	 f1_macro:  0.37589670014347204 	 accuracy:  0.4827586206896552
X shape (580, 2630)
uid:  {5} 	 auc:  0.5499999999999999 	 f1_macro:  0.5465587044534412 	 accuracy:  0.5625
X shape (580, 2601)
uid:  {6} 	 auc:  0.6333333333333333 	 f1_macro:  0.474025974025974 	 accuracy:  0.5
X shape (538, 2630)
uid:  {7} 	 auc:  0.4992592592592593 	 f1_macro:  0.48481729284611425 	 accuracy:  0.4935064935064935
X shape (552, 2631)
uid:  {9} 	 auc:  0.5392857142857144 	 f1_macro:  0.4967268918565069 	 accuracy:  0.5
X shape (528, 2628)
uid:  {11} 	 auc:  0.5420673076923077 	 f1_macro:  0.524390243902439 	 accuracy:  0.5772357723577236
X shape (604, 2629)
uid:  {16} 	 auc:  0.6875 	 f1_macro:  0.6130952380952381

Processing Models:  14%|█▍        | 1/7 [01:22<08:14, 82.36s/it]

uid:  {18} 	 auc:  0.35416666666666663 	 f1_macro:  0.35294117647058826 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.543590  0.571429  0.475000  Decision Tree  phq2_result_binary   
1    {2}  0.562500  0.766667  0.542484  Decision Tree  phq2_result_binary   
2    {3}  0.376417  0.482759  0.375897  Decision Tree  phq2_result_binary   
3    {5}  0.550000  0.562500  0.546559  Decision Tree  phq2_result_binary   
4    {6}  0.633333  0.500000  0.474026  Decision Tree  phq2_result_binary   
5    {7}  0.499259  0.493506  0.484817  Decision Tree  phq2_result_binary   
6    {9}  0.539286  0.500000  0.496727  Decision Tree  phq2_result_binary   
7   {11}  0.542067  0.577236  0.524390  Decision Tree  phq2_result_binary   
8   {16}  0.687500  0.615385  0.613095  Decision Tree  phq2_result_binary   
9   {17}  0.602941  0.555556  0.478395  Decision Tree  phq2_result_binary   
10  {18}  0.354167  0.352941  0.352941  Decision

Processing Models:  29%|██▊       | 2/7 [02:47<06:59, 83.97s/it]

uid:  {18} 	 auc:  0.3055555555555556 	 f1_macro:  0.4631578947368421 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.441026  0.464286  0.402560  Random Forest  phq2_result_binary   
1    {2}  0.644886  0.733333  0.682540  Random Forest  phq2_result_binary   
2    {3}  0.319728  0.224138  0.212670  Random Forest  phq2_result_binary   
3    {5}  0.427083  0.593750  0.434014  Random Forest  phq2_result_binary   
4    {6}  0.547222  0.444444  0.428571  Random Forest  phq2_result_binary   
5    {7}  0.440741  0.610390  0.379032  Random Forest  phq2_result_binary   
6    {9}  0.535714  0.596774  0.518484  Random Forest  phq2_result_binary   
7   {11}  0.599073  0.569106  0.512233  Random Forest  phq2_result_binary   
8   {16}  0.611111  0.346154  0.295056  Random Forest  phq2_result_binary   
9   {17}  0.712418  0.427350  0.406361  Random Forest  phq2_result_binary   
10  {18}  0.305556  0.470588  0.463158  Random For

Processing Models:  43%|████▎     | 3/7 [04:41<06:30, 97.61s/it]

uid:  {18} 	 auc:  0.3611111111111111 	 f1_macro:  0.4631578947368421 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.505128  0.500000  0.455556  AdaBoost  phq2_result_binary   
1    {2}  0.454545  0.566667  0.499358  AdaBoost  phq2_result_binary   
2    {3}  0.369615  0.724138  0.551257  AdaBoost  phq2_result_binary   
3    {5}  0.479167  0.468750  0.442051  AdaBoost  phq2_result_binary   
4    {6}  0.444444  0.333333  0.325000  AdaBoost  phq2_result_binary   
5    {7}  0.656667  0.675325  0.617220  AdaBoost  phq2_result_binary   
6    {9}  0.635714  0.580645  0.564324  AdaBoost  phq2_result_binary   
7   {11}  0.466346  0.512195  0.474359  AdaBoost  phq2_result_binary   
8   {16}  0.854167  0.615385  0.615385  AdaBoost  phq2_result_binary   
9   {17}  0.505882  0.461538  0.411873  AdaBoost  phq2_result_binary   
10  {18}  0.361111  0.470588  0.463158  AdaBoost  phq2_result_binary   

    correlation_threshold  
0   

Processing Models:  57%|█████▋    | 4/7 [06:32<05:09, 103.09s/it]

uid:  {18} 	 auc:  0.2777777777777778 	 f1_macro:  0.43956043956043955 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.435897  0.321429  0.299078  XGBoost  phq2_result_binary   
1    {2}  0.607955  0.666667  0.573864  XGBoost  phq2_result_binary   
2    {3}  0.680272  0.672414  0.563218  XGBoost  phq2_result_binary   
3    {5}  0.320833  0.500000  0.381643  XGBoost  phq2_result_binary   
4    {6}  0.638889  0.527778  0.496296  XGBoost  phq2_result_binary   
5    {7}  0.493333  0.636364  0.447746  XGBoost  phq2_result_binary   
6    {9}  0.540476  0.500000  0.489237  XGBoost  phq2_result_binary   
7   {11}  0.544643  0.617886  0.534728  XGBoost  phq2_result_binary   
8   {16}  0.638889  0.500000  0.499259  XGBoost  phq2_result_binary   
9   {17}  0.595425  0.401709  0.368056  XGBoost  phq2_result_binary   
10  {18}  0.277778  0.470588  0.439560  XGBoost  phq2_result_binary   

    correlation_threshold  
0              

Processing Models:  71%|███████▏  | 5/7 [07:52<03:09, 94.63s/it] 

uid:  {18} 	 auc:  0.5694444444444444 	 f1_macro:  0.5296442687747036 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.464286  0.317073   LDA  phq2_result_binary   
1    {2}  0.460227  0.266667  0.236111   LDA  phq2_result_binary   
2    {3}  0.238095  0.172414  0.168459   LDA  phq2_result_binary   
3    {5}  0.533333  0.562500  0.533333   LDA  phq2_result_binary   
4    {6}  0.450000  0.416667  0.377778   LDA  phq2_result_binary   
5    {7}  0.514074  0.506494  0.499658   LDA  phq2_result_binary   
6    {9}  0.426190  0.435484  0.417137   LDA  phq2_result_binary   
7   {11}  0.440247  0.349593  0.349206   LDA  phq2_result_binary   
8   {16}  0.493056  0.346154  0.321045   LDA  phq2_result_binary   
9   {17}  0.646732  0.726496  0.580645   LDA  phq2_result_binary   
10  {18}  0.569444  0.588235  0.529644   LDA  phq2_result_binary   

    correlation_threshold  
0                     2.0  
1                     2.0

Processing Models:  86%|████████▌ | 6/7 [09:10<01:29, 89.07s/it]

X shape (604, 2630)
uid:  {18} 	 auc:  0.4652777777777778 	 f1_macro:  0.5277777777777778 	 accuracy:  0.5294117647058824
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.651282  0.678571  0.678161   kNN  phq2_result_binary   
1    {2}  0.536932  0.533333  0.497608   kNN  phq2_result_binary   
2    {3}  0.859410  0.758621  0.697917   kNN  phq2_result_binary   
3    {5}  0.562500  0.593750  0.539313   kNN  phq2_result_binary   
4    {6}  0.788889  0.361111  0.356643   kNN  phq2_result_binary   
5    {7}  0.551111  0.545455  0.540494   kNN  phq2_result_binary   
6    {9}  0.564286  0.548387  0.536325   kNN  phq2_result_binary   
7   {11}  0.674966  0.682927  0.636398   kNN  phq2_result_binary   
8   {16}  0.548611  0.576923  0.571214   kNN  phq2_result_binary   
9   {17}  0.555229  0.555556  0.470035   kNN  phq2_result_binary   
10  {18}  0.465278  0.529412  0.527778   kNN  phq2_result_binary   

    correlation_threshold  
0                     2.0  
1    

Processing Models: 100%|██████████| 7/7 [10:39<00:00, 91.33s/it]


uid:  {18} 	 auc:  0.5 	 f1_macro:  0.39285714285714285 	 accuracy:  0.4117647058823529
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.500000  0.428571  0.401070   SVM  phq2_result_binary   
1    {2}  0.500000  0.600000  0.440994   SVM  phq2_result_binary   
2    {3}  0.131519  0.775862  0.551990   SVM  phq2_result_binary   
3    {5}  0.500000  0.562500  0.515152   SVM  phq2_result_binary   
4    {6}  0.500000  0.472222  0.461841   SVM  phq2_result_binary   
5    {7}  0.500000  0.558442  0.515185   SVM  phq2_result_binary   
6    {9}  0.500000  0.532258  0.526219   SVM  phq2_result_binary   
7   {11}  0.500000  0.666667  0.635437   SVM  phq2_result_binary   
8   {16}  0.500000  0.576923  0.576296   SVM  phq2_result_binary   
9   {17}  0.500000  0.726496  0.633229   SVM  phq2_result_binary   
10  {18}  0.500000  0.411765  0.392857   SVM  phq2_result_binary   

    correlation_threshold  
0                     2.0  
1                     2.0  
2          

Processing Models:   0%|          | 0/7 [00:00<?, ?it/s]

Running Decision Tree
X shape (1012, 1023)
uid:  {1} 	 auc:  0.5714285714285714 	 f1_macro:  0.5692307692307692 	 accuracy:  0.5714285714285714
X shape (1000, 1008)
uid:  {2} 	 auc:  0.47500000000000003 	 f1_macro:  0.475 	 accuracy:  0.5333333333333333
X shape (1024, 1016)
uid:  {3} 	 auc:  0.385 	 f1_macro:  0.37589670014347204 	 accuracy:  0.4827586206896552
X shape (796, 1002)
uid:  {4} 	 auc:  0.4668567355666429 	 f1_macro:  0.4683333333333333 	 accuracy:  0.696551724137931
X shape (1006, 1018)
uid:  {5} 	 auc:  0.5098039215686274 	 f1_macro:  0.4920634920634921 	 accuracy:  0.5
X shape (976, 1010)
uid:  {6} 	 auc:  0.4375 	 f1_macro:  0.42245989304812837 	 accuracy:  0.5833333333333334
X shape (940, 1013)
uid:  {7} 	 auc:  0.4907407407407407 	 f1_macro:  0.4808988764044944 	 accuracy:  0.4935064935064935
X shape (902, 1028)
uid:  {8} 	 auc:  0.38405797101449274 	 f1_macro:  0.4076923076923077 	 accuracy:  0.6883116883116883
X shape (982, 1014)
uid:  {9} 	 auc:  0.5726227795193313

Processing Models:  14%|█▍        | 1/7 [02:16<13:36, 136.12s/it]

uid:  {18} 	 auc:  0.7357142857142858 	 f1_macro:  0.7424242424242424 	 accuracy:  0.7647058823529411
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.571429  0.571429  0.569231  Decision Tree  gad2_result_binary   
1    {2}  0.475000  0.533333  0.475000  Decision Tree  gad2_result_binary   
2    {3}  0.385000  0.482759  0.375897  Decision Tree  gad2_result_binary   
3    {4}  0.466857  0.696552  0.468333  Decision Tree  gad2_result_binary   
4    {5}  0.509804  0.500000  0.492063  Decision Tree  gad2_result_binary   
5    {6}  0.437500  0.583333  0.422460  Decision Tree  gad2_result_binary   
6    {7}  0.490741  0.493506  0.480899  Decision Tree  gad2_result_binary   
7    {8}  0.384058  0.688312  0.407692  Decision Tree  gad2_result_binary   
8    {9}  0.572623  0.580645  0.569444  Decision Tree  gad2_result_binary   
9   {10}  0.535948  0.622642  0.535088  Decision Tree  gad2_result_binary   
10  {11}  0.524419  0.577236  0.524390  Decision Tr

Processing Models:  29%|██▊       | 2/7 [04:37<11:36, 139.21s/it]

uid:  {18} 	 auc:  0.3928571428571428 	 f1_macro:  0.315018315018315 	 accuracy:  0.35294117647058826
     uid       auc  accuracy  f1_macro          model               label  \
0    {1}  0.216837  0.321429  0.299078  Random Forest  gad2_result_binary   
1    {2}  0.595000  0.500000  0.485714  Random Forest  gad2_result_binary   
2    {3}  0.520000  0.362069  0.352640  Random Forest  gad2_result_binary   
3    {4}  0.444939  0.772414  0.549477  Random Forest  gad2_result_binary   
4    {5}  0.590196  0.562500  0.458937  Random Forest  gad2_result_binary   
5    {6}  0.316406  0.444444  0.307692  Random Forest  gad2_result_binary   
6    {7}  0.499259  0.493506  0.416327  Random Forest  gad2_result_binary   
7    {8}  0.656703  0.844156  0.457746  Random Forest  gad2_result_binary   
8    {9}  0.657262  0.629032  0.626604  Random Forest  gad2_result_binary   
9   {10}  0.448529  0.433962  0.428879  Random Forest  gad2_result_binary   
10  {11}  0.720203  0.617886  0.617482  Random Fore

Processing Models:  43%|████▎     | 3/7 [07:20<09:59, 149.89s/it]

uid:  {18} 	 auc:  0.6857142857142857 	 f1_macro:  0.47058823529411764 	 accuracy:  0.47058823529411764
     uid       auc  accuracy  f1_macro     model               label  \
0    {1}  0.352041  0.500000  0.387500  AdaBoost  gad2_result_binary   
1    {2}  0.575000  0.466667  0.464286  AdaBoost  gad2_result_binary   
2    {3}  0.145000  0.155172  0.154921  AdaBoost  gad2_result_binary   
3    {4}  0.432288  0.634483  0.449853  AdaBoost  gad2_result_binary   
4    {5}  0.784314  0.656250  0.626723  AdaBoost  gad2_result_binary   
5    {6}  0.234375  0.305556  0.259259  AdaBoost  gad2_result_binary   
6    {7}  0.497778  0.480519  0.473324  AdaBoost  gad2_result_binary   
7    {8}  0.124094  0.818182  0.450000  AdaBoost  gad2_result_binary   
8    {9}  0.597179  0.532258  0.517056  AdaBoost  gad2_result_binary   
9   {10}  0.672386  0.603774  0.478200  AdaBoost  gad2_result_binary   
10  {11}  0.563808  0.634146  0.568421  AdaBoost  gad2_result_binary   
11  {16}  0.763636  0.615385  0.

Processing Models:  57%|█████▋    | 4/7 [09:53<07:33, 151.11s/it]

uid:  {18} 	 auc:  0.6285714285714286 	 f1_macro:  0.5096153846153846 	 accuracy:  0.6470588235294118
     uid       auc  accuracy  f1_macro    model               label  \
0    {1}  0.336735  0.500000  0.455556  XGBoost  gad2_result_binary   
1    {2}  0.425000  0.500000  0.494949  XGBoost  gad2_result_binary   
2    {3}  0.420000  0.206897  0.206897  XGBoost  gad2_result_binary   
3    {4}  0.419815  0.710345  0.437361  XGBoost  gad2_result_binary   
4    {5}  0.670588  0.625000  0.623529  XGBoost  gad2_result_binary   
5    {6}  0.304688  0.444444  0.345455  XGBoost  gad2_result_binary   
6    {7}  0.424815  0.597403  0.484783  XGBoost  gad2_result_binary   
7    {8}  0.416667  0.779221  0.437956  XGBoost  gad2_result_binary   
8    {9}  0.578892  0.467742  0.443265  XGBoost  gad2_result_binary   
9   {10}  0.624183  0.735849  0.660256  XGBoost  gad2_result_binary   
10  {11}  0.704360  0.626016  0.573946  XGBoost  gad2_result_binary   
11  {16}  0.806061  0.576923  0.571214  XGBoos

Processing Models:  71%|███████▏  | 5/7 [12:05<04:48, 144.44s/it]

X shape (1020, 1019)
uid:  {18} 	 auc:  0.7071428571428571 	 f1_macro:  0.5641025641025641 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.497449  0.500000  0.333333   LDA  gad2_result_binary   
1    {2}  0.515000  0.533333  0.497608   LDA  gad2_result_binary   
2    {3}  0.280000  0.586207  0.369565   LDA  gad2_result_binary   
3    {4}  0.329829  0.475862  0.380063   LDA  gad2_result_binary   
4    {5}  0.680392  0.625000  0.611336   LDA  gad2_result_binary   
5    {6}  0.437500  0.583333  0.422460   LDA  gad2_result_binary   
6    {7}  0.516296  0.454545  0.454453   LDA  gad2_result_binary   
7    {8}  0.433877  0.246753  0.236320   LDA  gad2_result_binary   
8    {9}  0.568443  0.580645  0.580208   LDA  gad2_result_binary   
9   {10}  0.650327  0.584906  0.583571   LDA  gad2_result_binary   
10  {11}  0.562500  0.487805  0.486583   LDA  gad2_result_binary   
11  {16}  0.621212  0.653846  0.626794   LDA  gad2_result_bin

Processing Models:  86%|████████▌ | 6/7 [14:16<02:19, 139.86s/it]

X shape (1020, 1019)
uid:  {18} 	 auc:  0.48571428571428565 	 f1_macro:  0.5296442687747036 	 accuracy:  0.5882352941176471
     uid       auc  accuracy  f1_macro model               label  \
0    {1}  0.571429  0.607143  0.606641   kNN  gad2_result_binary   
1    {2}  0.425000  0.533333  0.444444   kNN  gad2_result_binary   
2    {3}  0.667500  0.637931  0.517241   kNN  gad2_result_binary   
3    {4}  0.553635  0.544828  0.474638   kNN  gad2_result_binary   
4    {5}  0.562745  0.625000  0.611336   kNN  gad2_result_binary   
5    {6}  0.222656  0.361111  0.295319   kNN  gad2_result_binary   
6    {7}  0.578519  0.584416  0.572222   kNN  gad2_result_binary   
7    {8}  0.501812  0.545455  0.450561   kNN  gad2_result_binary   
8    {9}  0.622780  0.580645  0.578892   kNN  gad2_result_binary   
9   {10}  0.700980  0.660377  0.645089   kNN  gad2_result_binary   
10  {11}  0.663808  0.650407  0.617653   kNN  gad2_result_binary   
11  {16}  0.563636  0.576923  0.576296   kNN  gad2_result_bi