# Settings

## Constants

In [1]:
import os

PATH_DATA = './Dataset'
sensor_data = ['UserInfo.csv', 'Service.csv', 'ContextualFactor.csv', 'Interruptibility.csv']

# Utility Functions

In [2]:
import pandas as pd
import numpy as np
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import LeaveOneGroupOut
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, f1_score


# Load the Dataset into Dataframe

In [3]:
dataframes = {
    filename: pd.read_csv(os.path.join(PATH_DATA, filename)).reset_index(drop=True)
    for filename in sensor_data
}
dfService = dataframes['Service.csv']
dfContextualFactor = dataframes['ContextualFactor.csv']
dfUserInfo = dataframes['UserInfo.csv']
dfInterruptibility = dataframes['Interruptibility.csv']

# Preprocessing

In [None]:
# Select specific columns from dfContextualFactor, dfService, and dfUserInfo DataFrames
# Combine relevant columns to create a unified dataset
dfContextualFactor_selected_columns=dfContextualFactor[['uid','sid','activity1','activity2','activity3','userLocation','userPosition',]]
dfService_selected_columns=dfService[['weekOfExperiment','dayOfWeek','startTime', 'activityInquiry','availabilityInquiry','speechShadowing_1','speechShadowing_2','speechShadowing_3','speechShadowing_4','speechShadowing_5','continue-to-nextInquiry_1','continue-to-nextInquiry_2','continue-to-nextInquiry_3','continue-to-nextInquiry_4','endTime','endType']]
dfInterruptibility_selected_columns=dfInterruptibility[['SHORT_INTERACTION_interruptibility', 'LONG_INTERACTION_interruptibility']]

## Create binary columns for interaction types
## SHORT_INTERACTION: True if availabilityInquiry is not NaN
# dfCombinedAll['SHORT_INTERACTION_interruptibility'] = dfCombinedAll['availabilityInquiry'].notna()
## LONG_INTERACTION: True if continue-to-nextInquiry_1 is not NaN
# dfCombinedAll['LONG_INTERACTION_interruptibility'] = dfCombinedAll['continue-to-nextInquiry_1'].notna()

# ! For those who want to redefine LONG_INTERACTION with thresholds longer than 3 minutes,
# you can use the following columns:
# 5 minutes  => use continue-to-nextInquiry_2
# 7 minutes  => use continue-to-nextInquiry_3
# 9 minutes  => use continue-to-nextInquiry_4


dfCombinedAll=pd.concat([dfContextualFactor_selected_columns, dfService_selected_columns,dfInterruptibility_selected_columns], axis=1)

In [None]:
# Concatenate and merge the all columns from dfUserInfo (dfUserInfo.csv)
dfUserInfo_selected_columns = dfUserInfo[['uid', 'homeType', 'speakerLocation', 'speakerPosition']]
dfCombinedAll = pd.merge(dfCombinedAll, dfUserInfo_selected_columns, on='uid', how='left')

## Position Processing

In [None]:
# Define function to calculate proximity between user and speaker
def calculate_proximity(row):
    # Return 0 if user and speaker are in different rooms
    if row['userLocation'] != row['speakerLocation']:
        return 0
    # If in the same room
    elif row['userPosition'] == row['speakerPosition']:
        return 2  # Same position
    else:
        return 1  # Different positions (including missing position)

# Apply proximity calculation to create a new 'proximity' column
dfCombinedAll['proximity'] = dfCombinedAll.apply(calculate_proximity, axis=1)

print(dfCombinedAll[['userLocation', 'userPosition', 'speakerLocation', 'speakerPosition', 'proximity']].head(10))


      userRoom userPosition speakerRoom speakerPosition  proximity
0     Bed Room          Bed    Bed Room            Desk          1
1    Rest Room          NaN    Bed Room            Desk          0
2  Living Room          NaN    Bed Room            Desk          0
3  Living Room          NaN    Bed Room            Desk          0
4     Bed Room          Bed    Bed Room            Desk          1
5     Bed Room          Bed    Bed Room            Desk          1
6     Bed Room          Bed    Bed Room            Desk          1
7     Bed Room         Desk    Bed Room            Desk          2
8     Bed Room          Bed    Bed Room            Desk          1
9     Bed Room          Bed    Bed Room            Desk          1


## Activity and Time Processing

In [7]:
# Process activity columns for one-hot encoding
activity_cols = ['activity1', 'activity2', 'activity3']
df_activity = dfContextualFactor[activity_cols].copy()

# Get unique activities across all activity columns, excluding NaN
all_unique_activities = pd.unique(df_activity.values.ravel())
all_unique_activities = [x for x in all_unique_activities if pd.notna(x)]

# Create a DataFrame for one-hot encoding of activities
dfActivity_one_hot_encoding = pd.DataFrame(0, index=df_activity.index, columns=['act_' + str(val) for val in all_unique_activities])

# Perform one-hot encoding for each activity column
for col in activity_cols:
    for val in all_unique_activities:
        dfActivity_one_hot_encoding['act_' + str(val)] |= (df_activity[col] == val).astype(int)

# Concatenate one-hot encoded activity columns to dfCombinedAll
dfActivity_one_hot_encoding
dfCombinedAll = pd.concat([dfCombinedAll, dfActivity_one_hot_encoding], axis=1)

In [8]:
# Convert startTime to datetime and extract total minutes since midnight
dfCombinedAll['startTime'] = pd.to_datetime(dfCombinedAll['startTime'], format='%H:%M:%S', errors='coerce')
dfCombinedAll['minute'] = dfCombinedAll['startTime'].dt.hour * 60 + dfCombinedAll['startTime'].dt.minute

# Map days of the week to numerical values (MON=0, TUE=1, ..., SUN=6)
day_map = {'MON': 0, 'TUE': 1, 'WED': 2, 'THU': 3, 'FRI': 4, 'SAT': 5, 'SUN': 6}
dfCombinedAll['dayOfWeek'] = dfCombinedAll['dayOfWeek'].map(day_map)

# Bin minutes into 30-minute intervals for temporal analysis
dfCombinedAll['minute_bin'] = (dfCombinedAll['minute'] // 30).astype(int)

In [9]:
dfCombinedAll.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2901 entries, 0 to 2900
Data columns (total 43 columns):
 #   Column                                    Non-Null Count  Dtype         
---  ------                                    --------------  -----         
 0   uid                                       2901 non-null   int64         
 1   sid                                       2901 non-null   int64         
 2   activity1                                 2901 non-null   object        
 3   activity2                                 112 non-null    object        
 4   activity3                                 3 non-null      object        
 5   userRoom                                  2901 non-null   object        
 6   userPosition                              2414 non-null   object        
 7   weekOfExperiment                          2901 non-null   int64         
 8   dayOfWeek                                 2901 non-null   int64         
 9   startTime                     

# Feature Extraction

In [None]:
# Select features for response prediction
dfFeatresForResponse = dfCombinedAll[['uid',
    'act_Taking a Nap / Sleeping','act_Hygiene','act_Eating','act_Using Media','act_Social Interaction',
    'act_Returning from Outside / Other Rooms','act_Studying / Working','act_Others','act_House Chores',
    'act_Self Caring','act_Visiting Outside / Other Rooms','act_Resting',
    'homeType','userLocation','userPosition','speakerLocation','speakerPosition',
    'minute_bin','dayOfWeek','SHORT_INTERACTION_interruptibility','LONG_INTERACTION_interruptibility']].copy()

# Encode categorical columns using LabelEncoder
categorical_columns = ['homeType', 'userLocation', 'userPosition', 'speakerLocation', 'speakerPosition', 'minute_bin']
label_encoders = defaultdict(LabelEncoder)

# Apply label encoding to each categorical column
for col in categorical_columns:
    dfFeatresForResponse[col] = label_encoders[col].fit_transform(dfFeatresForResponse[col])

# Create a copy of the encoded data for further processing
encoded_data = dfFeatresForResponse.copy()


encoded_data.info()
encoded_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2901 entries, 0 to 2900
Data columns (total 22 columns):
 #   Column                                    Non-Null Count  Dtype
---  ------                                    --------------  -----
 0   uid                                       2901 non-null   int64
 1   act_Taking a Nap / Sleeping               2901 non-null   int64
 2   act_Hygiene                               2901 non-null   int64
 3   act_Eating                                2901 non-null   int64
 4   act_Using Media                           2901 non-null   int64
 5   act_Social Interaction                    2901 non-null   int64
 6   act_Returning from Outside / Other Rooms  2901 non-null   int64
 7   act_Studying / Working                    2901 non-null   int64
 8   act_Others                                2901 non-null   int64
 9   act_House Chores                          2901 non-null   int64
 10  act_Self Caring                           2901 non-null   in

Unnamed: 0,uid,act_Taking a Nap / Sleeping,act_Hygiene,act_Eating,act_Using Media,act_Social Interaction,act_Returning from Outside / Other Rooms,act_Studying / Working,act_Others,act_House Chores,...,act_Resting,settingType,userRoom,userPosition,speakerRoom,speakerPosition,minute_bin,dayOfWeek,SHORT_INTERACTION_availability,LONG_INTERACTION_availability
0,1,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,12,0,True,False
1,1,0,1,0,0,0,0,0,0,0,...,0,0,7,3,0,1,15,0,False,False
2,1,0,0,1,0,0,0,0,0,0,...,0,0,5,3,0,1,12,1,False,False
3,1,0,0,1,0,0,0,0,0,0,...,0,0,5,3,0,1,13,1,False,False
4,1,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,18,1,True,False


## Label: SHORT_INTERACTION

In [None]:
# Label distribution before balancing
print(encoded_data['SHORT_INTERACTION_interruptibility'].value_counts())

SHORT_INTERACTION_availability
True     2158
False     743
Name: count, dtype: int64


### Model Building and LOSO CV

In [None]:
# Prepare features (X) and target variables (y) for SHORT_INTERACTION
X = encoded_data.drop(columns=['SHORT_INTERACTION_interruptibility', 'LONG_INTERACTION_interruptibility', 'uid'], axis=1) # Drop target and unrelated columns
y = encoded_data['SHORT_INTERACTION_interruptibility'] # Target variable
groups = encoded_data['uid'] # Group by user ID for Leave-One-Group-Out CV

# Initialize Leave-One-Group-Out cross-validator
logo = LeaveOneGroupOut()

# Initialize SMOTE for oversampling to handle class imbalance
smote = SMOTE(random_state=42)

# Define models to evaluate
models1 = {
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42),
    'XGBoost': XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42),
    'LightGBM': LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42),
    'CatBoost': CatBoostClassifier(iterations=100, depth=5, learning_rate=0.1, loss_function='Logloss', cat_features=[0], random_seed=42),
    'SVM': SVC(random_state=42),
    'Dummy': DummyClassifier(strategy="stratified", random_state=42)
}

results1_logo = {}

# Loop over each model
for model_name, model1 in models1.items():
    accuracies = []
    f1_scores = []

    # Loop over each fold in Leave-One-Group-Out cross-validation
    for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):
        # Split the data into training and testing sets for the current fold
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Apply SMOTE to the training data to balance class distribution
        X_train_oversampled, y_train_oversampled = smote.fit_resample(X_train, y_train)

        # Train the model on the oversampled training data
        model1.fit(X_train_oversampled, y_train_oversampled.ravel())

        # Predict the target on the test data
        y_pred = model1.predict(X_test)

        # Evaluate the prediction
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='macro')

        accuracies.append(accuracy)
        f1_scores.append(f1)
    # Compute average accuracy and F1-score across all folds
    avg_accuracy = sum(accuracies) / len(accuracies)
    avg_f1_score = sum(f1_scores) / len(f1_scores)

    results1_logo[model_name] = {
        'Average Accuracy': avg_accuracy,
        'Average F1-Score (macro)': avg_f1_score
    }


  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.r

[LightGBM] [Info] Number of positive: 2104, number of negative: 2104
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 4208, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2120, number of negative: 2120
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000293 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4240, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 2088, number of negative: 2088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000267 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 4176, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2128, number of negative: 2128
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 94
[LightGBM] [Info] Number of data points in the train set: 4256, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 2081, number of negative: 2081
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4162, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2102, number of negative: 2102
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000272 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4204, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 2056, number of negative: 2056
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000300 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4112, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2039, number of negative: 2039
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000359 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4078, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 2111, number of negative: 2111
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000303 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 4222, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2006, number of negative: 2006
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000286 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4012, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 2015, number of negative: 2015
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 4030, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2107, number of negative: 2107
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000346 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 95
[LightGBM] [Info] Number of data points in the train set: 4214, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6534294	total: 140ms	remaining: 13.9s
1:	learn: 0.6226364	total: 143ms	remaining: 6.98s
2:	learn: 0.5924299	total: 144ms	remaining: 4.66s
3:	learn: 0.5712169	total: 146ms	remaining: 3.49s
4:	learn: 0.5494688	total: 147ms	remaining: 2.79s
5:	learn: 0.5314734	total: 148ms	remaining: 2.32s
6:	learn: 0.5132233	total: 149ms	remaining: 1.98s
7:	learn: 0.5011659	total: 150ms	remaining: 1.73s
8:	learn: 0.4863488	total: 151ms	remaining: 1.53s
9:	learn: 0.4788633	total: 152ms	remaining: 1.37s
10:	learn: 0.4671365	total: 154ms	remaining: 1.24s
11:	learn: 0.4569192	total: 155ms	remaining: 1.14s
12:	learn: 0.4485407	total: 156ms	remaining: 1.04s
13:	learn: 0.4410974	total: 157ms	remaining: 965ms
14:	learn: 0.4338743	total: 158ms	remaining: 897ms
15:	learn: 0.4285682	total: 159ms	remaining: 837ms
16:	learn: 0.4235378	total: 161ms	remaining: 784ms
17:	learn: 0.4193825	total: 162ms	remaining: 738ms
18:	learn: 0.4153372	total: 163ms	remaining: 697ms
19:	learn: 0.4119561	total: 165ms	remaini

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


47:	learn: 0.3733678	total: 44.4ms	remaining: 48.1ms
48:	learn: 0.3725615	total: 45.3ms	remaining: 47.2ms
49:	learn: 0.3715212	total: 46.3ms	remaining: 46.3ms
50:	learn: 0.3704157	total: 47.3ms	remaining: 45.4ms
51:	learn: 0.3697877	total: 48.1ms	remaining: 44.4ms
52:	learn: 0.3688987	total: 48.9ms	remaining: 43.4ms
53:	learn: 0.3683775	total: 49.8ms	remaining: 42.4ms
54:	learn: 0.3679278	total: 50.6ms	remaining: 41.4ms
55:	learn: 0.3669231	total: 51.3ms	remaining: 40.3ms
56:	learn: 0.3660818	total: 52.1ms	remaining: 39.3ms
57:	learn: 0.3653407	total: 52.9ms	remaining: 38.3ms
58:	learn: 0.3644819	total: 53.7ms	remaining: 37.3ms
59:	learn: 0.3637484	total: 54.4ms	remaining: 36.3ms
60:	learn: 0.3630701	total: 55.3ms	remaining: 35.4ms
61:	learn: 0.3621909	total: 56.2ms	remaining: 34.5ms
62:	learn: 0.3614099	total: 57ms	remaining: 33.5ms
63:	learn: 0.3605732	total: 57.8ms	remaining: 32.5ms
64:	learn: 0.3596290	total: 58.7ms	remaining: 31.6ms
65:	learn: 0.3585669	total: 59.5ms	remaining: 30

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6541271	total: 1ms	remaining: 99.1ms
1:	learn: 0.6225486	total: 1.95ms	remaining: 95.4ms
2:	learn: 0.5942773	total: 2.82ms	remaining: 91.1ms
3:	learn: 0.5702577	total: 3.81ms	remaining: 91.5ms
4:	learn: 0.5481456	total: 4.73ms	remaining: 89.8ms
5:	learn: 0.5305464	total: 5.51ms	remaining: 86.3ms
6:	learn: 0.5161991	total: 6.43ms	remaining: 85.4ms
7:	learn: 0.5031961	total: 7.37ms	remaining: 84.8ms
8:	learn: 0.4886982	total: 8.26ms	remaining: 83.5ms
9:	learn: 0.4785169	total: 9.14ms	remaining: 82.2ms
10:	learn: 0.4649798	total: 9.99ms	remaining: 80.8ms
11:	learn: 0.4544028	total: 10.9ms	remaining: 79.8ms
12:	learn: 0.4441123	total: 11.8ms	remaining: 78.7ms
13:	learn: 0.4362772	total: 12.7ms	remaining: 77.7ms
14:	learn: 0.4300315	total: 13.5ms	remaining: 76.7ms
15:	learn: 0.4225105	total: 14.4ms	remaining: 75.8ms
16:	learn: 0.4156728	total: 15.3ms	remaining: 74.7ms
17:	learn: 0.4103054	total: 16.2ms	remaining: 73.8ms
18:	learn: 0.4055727	total: 17.1ms	remaining: 72.8ms
19:	le

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6539688	total: 1.19ms	remaining: 118ms
1:	learn: 0.6201180	total: 2.28ms	remaining: 112ms
2:	learn: 0.5921067	total: 3.32ms	remaining: 108ms
3:	learn: 0.5642549	total: 4.28ms	remaining: 103ms
4:	learn: 0.5409878	total: 5.29ms	remaining: 101ms
5:	learn: 0.5257600	total: 6.29ms	remaining: 98.5ms
6:	learn: 0.5103737	total: 7.69ms	remaining: 102ms
7:	learn: 0.4988321	total: 8.96ms	remaining: 103ms
8:	learn: 0.4836065	total: 10.3ms	remaining: 104ms
9:	learn: 0.4718635	total: 11.4ms	remaining: 102ms
10:	learn: 0.4604390	total: 12.3ms	remaining: 99.8ms
11:	learn: 0.4530065	total: 13.3ms	remaining: 97.2ms
12:	learn: 0.4464572	total: 14.1ms	remaining: 94.3ms
13:	learn: 0.4391641	total: 14.9ms	remaining: 91.5ms
14:	learn: 0.4322036	total: 15.8ms	remaining: 89.5ms
15:	learn: 0.4276202	total: 16.7ms	remaining: 87.7ms
16:	learn: 0.4227952	total: 17.5ms	remaining: 85.7ms
17:	learn: 0.4191438	total: 18.4ms	remaining: 83.9ms
18:	learn: 0.4162484	total: 19.4ms	remaining: 82.8ms
19:	learn: 0

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


47:	learn: 0.3658804	total: 41ms	remaining: 44.4ms
48:	learn: 0.3649974	total: 42.1ms	remaining: 43.8ms
49:	learn: 0.3644381	total: 43ms	remaining: 43ms
50:	learn: 0.3637155	total: 43.9ms	remaining: 42.2ms
51:	learn: 0.3631126	total: 44.8ms	remaining: 41.3ms
52:	learn: 0.3619170	total: 45.7ms	remaining: 40.5ms
53:	learn: 0.3613749	total: 46.6ms	remaining: 39.7ms
54:	learn: 0.3597934	total: 47.5ms	remaining: 38.9ms
55:	learn: 0.3591309	total: 48.4ms	remaining: 38.1ms
56:	learn: 0.3582921	total: 49.4ms	remaining: 37.2ms
57:	learn: 0.3577606	total: 50.3ms	remaining: 36.4ms
58:	learn: 0.3573066	total: 51.1ms	remaining: 35.5ms
59:	learn: 0.3565848	total: 51.9ms	remaining: 34.6ms
60:	learn: 0.3557964	total: 52.7ms	remaining: 33.7ms
61:	learn: 0.3550575	total: 53.5ms	remaining: 32.8ms
62:	learn: 0.3546774	total: 54.4ms	remaining: 31.9ms
63:	learn: 0.3540003	total: 55.3ms	remaining: 31.1ms
64:	learn: 0.3535265	total: 56.1ms	remaining: 30.2ms
65:	learn: 0.3529900	total: 57ms	remaining: 29.3ms
6

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6511075	total: 993us	remaining: 98.4ms
1:	learn: 0.6164987	total: 1.85ms	remaining: 90.7ms
2:	learn: 0.5851852	total: 2.71ms	remaining: 87.5ms
3:	learn: 0.5618373	total: 3.54ms	remaining: 85ms
4:	learn: 0.5419235	total: 4.46ms	remaining: 84.8ms
5:	learn: 0.5235975	total: 5.37ms	remaining: 84.2ms
6:	learn: 0.5088547	total: 6.2ms	remaining: 82.4ms
7:	learn: 0.4914763	total: 7.08ms	remaining: 81.5ms
8:	learn: 0.4752153	total: 7.99ms	remaining: 80.8ms
9:	learn: 0.4616958	total: 8.76ms	remaining: 78.8ms
10:	learn: 0.4510291	total: 9.63ms	remaining: 77.9ms
11:	learn: 0.4418446	total: 10.4ms	remaining: 76.5ms
12:	learn: 0.4330143	total: 11.2ms	remaining: 75.2ms
13:	learn: 0.4247581	total: 12.1ms	remaining: 74.2ms
14:	learn: 0.4200676	total: 13ms	remaining: 73.6ms
15:	learn: 0.4156013	total: 13.8ms	remaining: 72.7ms
16:	learn: 0.4095088	total: 14.7ms	remaining: 71.8ms
17:	learn: 0.4038418	total: 15.7ms	remaining: 71.3ms
18:	learn: 0.3996332	total: 16.6ms	remaining: 70.6ms
19:	learn

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6552908	total: 1.04ms	remaining: 103ms
1:	learn: 0.6185993	total: 1.95ms	remaining: 95.7ms
2:	learn: 0.5893813	total: 2.79ms	remaining: 90.3ms
3:	learn: 0.5637781	total: 3.6ms	remaining: 86.5ms
4:	learn: 0.5442358	total: 4.51ms	remaining: 85.6ms
5:	learn: 0.5255362	total: 5.3ms	remaining: 83ms
6:	learn: 0.5110016	total: 6.09ms	remaining: 80.9ms
7:	learn: 0.5000416	total: 6.92ms	remaining: 79.6ms
8:	learn: 0.4883284	total: 7.78ms	remaining: 78.7ms
9:	learn: 0.4792264	total: 8.69ms	remaining: 78.2ms
10:	learn: 0.4706169	total: 9.58ms	remaining: 77.5ms
11:	learn: 0.4639315	total: 10.5ms	remaining: 76.7ms
12:	learn: 0.4555868	total: 11.4ms	remaining: 76.6ms
13:	learn: 0.4501081	total: 12.3ms	remaining: 75.8ms
14:	learn: 0.4418611	total: 13.2ms	remaining: 74.6ms
15:	learn: 0.4353924	total: 14ms	remaining: 73.7ms
16:	learn: 0.4297994	total: 14.9ms	remaining: 72.6ms
17:	learn: 0.4261484	total: 15.7ms	remaining: 71.7ms
18:	learn: 0.4214450	total: 16.5ms	remaining: 70.5ms
19:	learn:

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


69:	learn: 0.3581687	total: 60.6ms	remaining: 26ms
70:	learn: 0.3574525	total: 61.6ms	remaining: 25.2ms
71:	learn: 0.3568862	total: 62.6ms	remaining: 24.3ms
72:	learn: 0.3565251	total: 63.6ms	remaining: 23.5ms
73:	learn: 0.3559911	total: 64.5ms	remaining: 22.7ms
74:	learn: 0.3551645	total: 65.5ms	remaining: 21.8ms
75:	learn: 0.3544077	total: 66.5ms	remaining: 21ms
76:	learn: 0.3537917	total: 67.4ms	remaining: 20.1ms
77:	learn: 0.3533735	total: 68.2ms	remaining: 19.2ms
78:	learn: 0.3528225	total: 69.2ms	remaining: 18.4ms
79:	learn: 0.3523544	total: 70.1ms	remaining: 17.5ms
80:	learn: 0.3518280	total: 71.1ms	remaining: 16.7ms
81:	learn: 0.3514879	total: 72ms	remaining: 15.8ms
82:	learn: 0.3507016	total: 72.9ms	remaining: 14.9ms
83:	learn: 0.3499662	total: 73.8ms	remaining: 14.1ms
84:	learn: 0.3493295	total: 74.8ms	remaining: 13.2ms
85:	learn: 0.3488680	total: 75.7ms	remaining: 12.3ms
86:	learn: 0.3480611	total: 76.6ms	remaining: 11.4ms
87:	learn: 0.3476706	total: 77.6ms	remaining: 10.6ms

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6550404	total: 928us	remaining: 91.9ms
1:	learn: 0.6242660	total: 1.88ms	remaining: 92ms
2:	learn: 0.5982940	total: 2.71ms	remaining: 87.5ms
3:	learn: 0.5766797	total: 3.58ms	remaining: 85.9ms
4:	learn: 0.5568920	total: 4.47ms	remaining: 84.9ms
5:	learn: 0.5396493	total: 5.36ms	remaining: 84ms
6:	learn: 0.5252273	total: 6.23ms	remaining: 82.8ms
7:	learn: 0.5116472	total: 7.05ms	remaining: 81.1ms
8:	learn: 0.4997403	total: 7.94ms	remaining: 80.3ms
9:	learn: 0.4857595	total: 9.13ms	remaining: 82.2ms
10:	learn: 0.4762659	total: 10.1ms	remaining: 81.6ms
11:	learn: 0.4688467	total: 10.9ms	remaining: 80.2ms
12:	learn: 0.4584632	total: 11.8ms	remaining: 79.2ms
13:	learn: 0.4488825	total: 12.7ms	remaining: 77.9ms
14:	learn: 0.4408022	total: 13.6ms	remaining: 77ms
15:	learn: 0.4339225	total: 14.4ms	remaining: 75.8ms
16:	learn: 0.4301669	total: 15.3ms	remaining: 74.8ms
17:	learn: 0.4251437	total: 16.1ms	remaining: 73.5ms
18:	learn: 0.4198903	total: 17ms	remaining: 72.4ms
19:	learn: 0

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6527250	total: 1.69ms	remaining: 168ms
1:	learn: 0.6203350	total: 2.79ms	remaining: 137ms
2:	learn: 0.5939447	total: 3.94ms	remaining: 127ms
3:	learn: 0.5667611	total: 5.09ms	remaining: 122ms
4:	learn: 0.5465512	total: 6.18ms	remaining: 117ms
5:	learn: 0.5248722	total: 7.23ms	remaining: 113ms
6:	learn: 0.5104637	total: 8.25ms	remaining: 110ms
7:	learn: 0.4972715	total: 9.27ms	remaining: 107ms
8:	learn: 0.4840915	total: 10.2ms	remaining: 103ms
9:	learn: 0.4751251	total: 11.3ms	remaining: 102ms
10:	learn: 0.4666326	total: 12.5ms	remaining: 101ms
11:	learn: 0.4554945	total: 14.2ms	remaining: 104ms
12:	learn: 0.4456216	total: 15.3ms	remaining: 102ms
13:	learn: 0.4385662	total: 16.3ms	remaining: 100ms
14:	learn: 0.4315271	total: 17.3ms	remaining: 97.8ms
15:	learn: 0.4273326	total: 18.3ms	remaining: 96ms
16:	learn: 0.4232332	total: 19.3ms	remaining: 94.4ms
17:	learn: 0.4181474	total: 20.8ms	remaining: 94.6ms
18:	learn: 0.4146868	total: 21.9ms	remaining: 93.2ms
19:	learn: 0.411370

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


30:	learn: 0.3928978	total: 35.9ms	remaining: 79.9ms
31:	learn: 0.3915529	total: 37ms	remaining: 78.7ms
32:	learn: 0.3903921	total: 38.1ms	remaining: 77.4ms
33:	learn: 0.3890394	total: 39.7ms	remaining: 77ms
34:	learn: 0.3879436	total: 40.9ms	remaining: 76ms
35:	learn: 0.3866024	total: 42.2ms	remaining: 75ms
36:	learn: 0.3848278	total: 43.4ms	remaining: 73.9ms
37:	learn: 0.3833183	total: 44.6ms	remaining: 72.8ms
38:	learn: 0.3824152	total: 45.8ms	remaining: 71.6ms
39:	learn: 0.3819827	total: 47ms	remaining: 70.5ms
40:	learn: 0.3807202	total: 48.1ms	remaining: 69.2ms
41:	learn: 0.3795396	total: 49.2ms	remaining: 67.9ms
42:	learn: 0.3786551	total: 50.4ms	remaining: 66.8ms
43:	learn: 0.3777588	total: 51.4ms	remaining: 65.4ms
44:	learn: 0.3771747	total: 52.4ms	remaining: 64.1ms
45:	learn: 0.3763477	total: 53.4ms	remaining: 62.7ms
46:	learn: 0.3756775	total: 54.4ms	remaining: 61.4ms
47:	learn: 0.3746981	total: 55.4ms	remaining: 60ms
48:	learn: 0.3730373	total: 56.5ms	remaining: 58.8ms
49:	l

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


98:	learn: 0.3361241	total: 115ms	remaining: 1.17ms
99:	learn: 0.3355899	total: 117ms	remaining: 0us
0:	learn: 0.6563287	total: 1.5ms	remaining: 148ms
1:	learn: 0.6223003	total: 3.04ms	remaining: 149ms
2:	learn: 0.5961929	total: 4.22ms	remaining: 137ms
3:	learn: 0.5685119	total: 5.97ms	remaining: 143ms
4:	learn: 0.5506872	total: 7.05ms	remaining: 134ms
5:	learn: 0.5338392	total: 8.2ms	remaining: 128ms
6:	learn: 0.5210480	total: 9.69ms	remaining: 129ms
7:	learn: 0.5078624	total: 10.7ms	remaining: 123ms
8:	learn: 0.4973310	total: 11.8ms	remaining: 120ms
9:	learn: 0.4862239	total: 12.9ms	remaining: 116ms
10:	learn: 0.4779945	total: 14.5ms	remaining: 117ms
11:	learn: 0.4700756	total: 16.2ms	remaining: 119ms
12:	learn: 0.4604217	total: 17.9ms	remaining: 120ms
13:	learn: 0.4523843	total: 19.3ms	remaining: 119ms
14:	learn: 0.4474760	total: 20.6ms	remaining: 116ms
15:	learn: 0.4401434	total: 21.6ms	remaining: 113ms
16:	learn: 0.4344762	total: 22.6ms	remaining: 110ms
17:	learn: 0.4285958	total:

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6539924	total: 1.3ms	remaining: 128ms
1:	learn: 0.6247162	total: 2.38ms	remaining: 116ms
2:	learn: 0.5950981	total: 3.55ms	remaining: 115ms
3:	learn: 0.5743052	total: 4.72ms	remaining: 113ms
4:	learn: 0.5560487	total: 5.76ms	remaining: 109ms
5:	learn: 0.5401743	total: 6.8ms	remaining: 106ms
6:	learn: 0.5206125	total: 7.89ms	remaining: 105ms
7:	learn: 0.5042045	total: 8.98ms	remaining: 103ms
8:	learn: 0.4910786	total: 10.4ms	remaining: 106ms
9:	learn: 0.4796726	total: 11.8ms	remaining: 106ms
10:	learn: 0.4683951	total: 12.8ms	remaining: 104ms
11:	learn: 0.4590041	total: 14.1ms	remaining: 103ms
12:	learn: 0.4510206	total: 15.2ms	remaining: 102ms
13:	learn: 0.4427894	total: 16.3ms	remaining: 100ms
14:	learn: 0.4381560	total: 17.3ms	remaining: 98.3ms
15:	learn: 0.4324047	total: 18.6ms	remaining: 97.5ms
16:	learn: 0.4268654	total: 19.8ms	remaining: 96.4ms
17:	learn: 0.4219865	total: 20.8ms	remaining: 95ms
18:	learn: 0.4181830	total: 22.1ms	remaining: 94.3ms
19:	learn: 0.4126268	

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


34:	learn: 0.3826080	total: 51.4ms	remaining: 95.4ms
35:	learn: 0.3810853	total: 52.8ms	remaining: 93.8ms
36:	learn: 0.3793010	total: 54ms	remaining: 91.9ms
37:	learn: 0.3779164	total: 55.1ms	remaining: 89.9ms
38:	learn: 0.3767670	total: 56.3ms	remaining: 88.1ms
39:	learn: 0.3753033	total: 57.6ms	remaining: 86.4ms
40:	learn: 0.3737899	total: 58.8ms	remaining: 84.5ms
41:	learn: 0.3721286	total: 59.9ms	remaining: 82.8ms
42:	learn: 0.3713586	total: 61ms	remaining: 80.8ms
43:	learn: 0.3703218	total: 62.1ms	remaining: 79ms
44:	learn: 0.3695194	total: 63.4ms	remaining: 77.4ms
45:	learn: 0.3690117	total: 65.1ms	remaining: 76.4ms
46:	learn: 0.3681845	total: 66.7ms	remaining: 75.2ms
47:	learn: 0.3670547	total: 67.8ms	remaining: 73.5ms
48:	learn: 0.3655041	total: 69ms	remaining: 71.8ms
49:	learn: 0.3646003	total: 70.2ms	remaining: 70.2ms
50:	learn: 0.3633814	total: 71.5ms	remaining: 68.7ms
51:	learn: 0.3629180	total: 72.8ms	remaining: 67.2ms
52:	learn: 0.3618414	total: 74.1ms	remaining: 65.7ms
5

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.r

### K-Fold CV

In [13]:
from sklearn.model_selection import KFold

# Initialize 5-fold cross-validator
kfold = KFold(n_splits=5, shuffle=False)

results1_kfold = {}

# Loop over each model
for model_name, model1 in models1.items():
    accuracies = []
    f1_scores = []

    # Loop over each fold in 5-fold cross-validation
    for train_index, test_index in kfold.split(X, y):
        # Split the data into training and testing sets for the current fold
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Apply SMOTE to the training data to balance class distribution
        X_train_oversampled, y_train_oversampled = smote.fit_resample(X_train, y_train)

        # Train the model on the oversampled training data
        model1.fit(X_train_oversampled, y_train_oversampled.ravel())

        # Predict the target on the test data
        y_pred = model1.predict(X_test)

        # Evaluate the prediction
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='macro')

        accuracies.append(accuracy)
        f1_scores.append(f1)

    # Compute average accuracy and F1-score across all folds
    avg_accuracy = sum(accuracies) / len(accuracies)
    avg_f1_score = sum(f1_scores) / len(f1_scores)

    results1_kfold[model_name] = {
        'Average Accuracy': avg_accuracy,
        'Average F1-Score (macro)': avg_f1_score
    }


  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.r

[LightGBM] [Info] Number of positive: 1716, number of negative: 1716
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000378 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 91
[LightGBM] [Info] Number of data points in the train set: 3432, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1778, number of negative: 1778
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000424 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 3556, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1721, number of negative: 1721
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000279 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 3442, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1728, number of negative: 1728
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000259 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 3456, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
0:	l

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6571793	total: 913us	remaining: 90.5ms
1:	learn: 0.6208501	total: 1.73ms	remaining: 84.8ms
2:	learn: 0.5952402	total: 2.47ms	remaining: 80ms
3:	learn: 0.5726471	total: 3.23ms	remaining: 77.6ms
4:	learn: 0.5540803	total: 3.96ms	remaining: 75.3ms
5:	learn: 0.5352691	total: 4.68ms	remaining: 73.4ms
6:	learn: 0.5166234	total: 5.41ms	remaining: 71.9ms
7:	learn: 0.5021342	total: 6.17ms	remaining: 70.9ms
8:	learn: 0.4899828	total: 6.9ms	remaining: 69.8ms
9:	learn: 0.4802835	total: 7.72ms	remaining: 69.5ms
10:	learn: 0.4697036	total: 8.43ms	remaining: 68.2ms
11:	learn: 0.4628898	total: 9.22ms	remaining: 67.6ms
12:	learn: 0.4559652	total: 9.98ms	remaining: 66.8ms
13:	learn: 0.4509534	total: 10.8ms	remaining: 66.4ms
14:	learn: 0.4443255	total: 11.6ms	remaining: 66ms
15:	learn: 0.4377172	total: 12.5ms	remaining: 65.6ms
16:	learn: 0.4337399	total: 13.2ms	remaining: 64.7ms
17:	learn: 0.4299591	total: 14.2ms	remaining: 64.8ms
18:	learn: 0.4275821	total: 15.1ms	remaining: 64.3ms
19:	learn

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


90:	learn: 0.3392440	total: 73.3ms	remaining: 7.25ms
91:	learn: 0.3388139	total: 74.3ms	remaining: 6.46ms
92:	learn: 0.3383148	total: 75.1ms	remaining: 5.65ms
93:	learn: 0.3374711	total: 75.9ms	remaining: 4.85ms
94:	learn: 0.3370359	total: 76.8ms	remaining: 4.04ms
95:	learn: 0.3361340	total: 77.5ms	remaining: 3.23ms
96:	learn: 0.3354600	total: 78.3ms	remaining: 2.42ms
97:	learn: 0.3351031	total: 79.1ms	remaining: 1.61ms
98:	learn: 0.3345523	total: 79.9ms	remaining: 807us
99:	learn: 0.3337097	total: 80.8ms	remaining: 0us
0:	learn: 0.6526784	total: 1ms	remaining: 99ms
1:	learn: 0.6212271	total: 1.83ms	remaining: 89.6ms
2:	learn: 0.5944227	total: 2.61ms	remaining: 84.3ms
3:	learn: 0.5705365	total: 3.33ms	remaining: 80.1ms
4:	learn: 0.5500772	total: 4.17ms	remaining: 79.2ms
5:	learn: 0.5328618	total: 4.95ms	remaining: 77.6ms
6:	learn: 0.5191033	total: 5.77ms	remaining: 76.7ms
7:	learn: 0.5075404	total: 6.58ms	remaining: 75.7ms
8:	learn: 0.4937290	total: 7.37ms	remaining: 74.5ms
9:	learn: 0

  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())
  model1.fit(X_train_oversampled, y_train_oversampled.ravel())


## Label: LONG_INTERACTION

In [None]:
# Label distribution before balancing
print(encoded_data['LONG_INTERACTION_interruptibility'].value_counts())

LONG_INTERACTION_availability
False    1468
True     1433
Name: count, dtype: int64


### Model building and LOSO CV

In [None]:
# Prepare features (X) and target variable (y) for predicting LONG_INTERACTION
X = encoded_data.drop(columns=['LONG_INTERACTION_interruptibility', 'SHORT_INTERACTION_interruptibility','uid'], axis=1)
y = encoded_data['LONG_INTERACTION_interruptibility']
groups = encoded_data['uid']

# Initialize Leave-One-Group-Out cross-validator
logo = LeaveOneGroupOut()

# Initialize SMOTE for oversampling to handle class imbalance
smote = SMOTE(random_state=42)

# Define models to evaluate
models2 = {
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42),
    'XGBoost': XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42),
    'LightGBM': LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42),
    'CatBoost': CatBoostClassifier(iterations=100, depth=5, learning_rate=0.1, loss_function='Logloss', cat_features=[0], random_seed=42),
    'SVM': SVC(random_state=42),
    'Dummy': DummyClassifier(strategy="stratified", random_state=42)
}

results2_logo = {}

# Loop over each model
for model_name, model2 in models2.items():
    accuracies = []
    f1_scores = []

    # Loop over each fold in Leave-One-Group-Out cross-validation
    for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):
        # Split the data into training and testing sets for the current fold
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Apply SMOTE to the training data to balance class distribution
        X_train_oversampled, y_train_oversampled = smote.fit_resample(X_train, y_train)

        # Train the model on the oversampled training data
        model2.fit(X_train_oversampled, y_train_oversampled.ravel())

        # Predict the target on the test data
        y_pred = model2.predict(X_test)

        # Evaluate the prediction
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='macro')

        accuracies.append(accuracy)
        f1_scores.append(f1)

    # Compute average accuracy and F1-score across all folds
    avg_accuracy = sum(accuracies) / len(accuracies)
    avg_f1_score = sum(f1_scores) / len(f1_scores)

    results2_logo[model_name] = {
        'Average Accuracy': avg_accuracy,
        'Average F1-Score (macro)': avg_f1_score
    }

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.r

[LightGBM] [Info] Number of positive: 1425, number of negative: 1425
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 90
[LightGBM] [Info] Number of data points in the train set: 2850, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1444, number of negative: 1444
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000279 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2888, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1365, number of negative: 1365
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000345 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2730, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1443, number of negative: 1443
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 90
[LightGBM] [Info] Number of data points in the train set: 2886, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1417, number of negative: 1417
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000325 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2834, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1446, number of negative: 1446
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000361 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2892, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1458, number of negative: 1458
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000360 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2916, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1452, number of negative: 1452
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000315 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2904, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1401, number of negative: 1401
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000333 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 2802, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1402, number of negative: 1402
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2804, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1406, number of negative: 1406
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000723 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 2812, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1408, number of negative: 1408
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000350 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2816, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1407, number of negative: 1407
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 2814, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1456, number of negative: 1456
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000364 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2912, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1417, number of negative: 1417
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2834, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1445, number of negative: 1445
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000385 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2890, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1423, number of negative: 1423
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000372 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2846, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1439, number of negative: 1439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000401 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2878, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1417, number of negative: 1417
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 91
[LightGBM] [Info] Number of data points in the train set: 2834, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1424, number of negative: 1424
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000398 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2848, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1398, number of negative: 1398
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1410, number of negative: 1410
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000808 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 2820, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6728474	total: 1.89ms	remaining: 187ms
1:	learn: 0.6569232	total: 3.33ms	remaining: 163ms
2:	learn: 0.6434571	total: 4.85ms	remaining: 157ms
3:	learn: 0.6337466	total: 6.51ms	remaining: 156ms
4:	learn: 0.6217621	total: 8.03ms	remaining: 153ms
5:	learn: 0.6149952	total: 9.43ms	remaining: 148ms
6:	learn: 0.6061168	total: 11.3ms	remaining: 150ms
7:	learn: 0.5991345	total: 13ms	remaining: 149ms
8:	learn: 0.5924261	total: 14.4ms	remaining: 146ms
9:	learn: 0.5863084	total: 15.8ms	remaining: 142ms
10:	learn: 0.5798708	total: 17.2ms	remaining: 140ms
11:	learn: 0.5764239	total: 18.1ms	remaining: 133ms
12:	learn: 0.5734171	total: 19ms	remaining: 127ms
13:	learn: 0.5686116	total: 20ms	remaining: 123ms
14:	learn: 0.5656370	total: 21ms	remaining: 119ms
15:	learn: 0.5615597	total: 22ms	remaining: 116ms
16:	learn: 0.5580848	total: 23.7ms	remaining: 116ms
17:	learn: 0.5550347	total: 25.5ms	remaining: 116ms
18:	learn: 0.5529056	total: 27.1ms	remaining: 115ms
19:	learn: 0.5503124	total: 28.7

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6735247	total: 1.43ms	remaining: 142ms
1:	learn: 0.6560164	total: 2.66ms	remaining: 130ms
2:	learn: 0.6416915	total: 3.95ms	remaining: 128ms
3:	learn: 0.6296269	total: 5.11ms	remaining: 123ms
4:	learn: 0.6203130	total: 6.31ms	remaining: 120ms
5:	learn: 0.6100476	total: 7.51ms	remaining: 118ms
6:	learn: 0.6025061	total: 14.7ms	remaining: 195ms
7:	learn: 0.5951878	total: 16ms	remaining: 184ms
8:	learn: 0.5892003	total: 17.6ms	remaining: 178ms
9:	learn: 0.5821452	total: 19.6ms	remaining: 177ms
10:	learn: 0.5767876	total: 20.7ms	remaining: 167ms
11:	learn: 0.5725558	total: 21.8ms	remaining: 160ms
12:	learn: 0.5691174	total: 22.9ms	remaining: 153ms
13:	learn: 0.5661488	total: 23.9ms	remaining: 147ms
14:	learn: 0.5630470	total: 25.4ms	remaining: 144ms
15:	learn: 0.5597161	total: 27.8ms	remaining: 146ms
16:	learn: 0.5562396	total: 29.2ms	remaining: 143ms
17:	learn: 0.5540058	total: 30.8ms	remaining: 141ms
18:	learn: 0.5530561	total: 31.7ms	remaining: 135ms
19:	learn: 0.5512618	tot

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6725124	total: 1.53ms	remaining: 151ms
1:	learn: 0.6593719	total: 3.09ms	remaining: 152ms
2:	learn: 0.6463467	total: 4.7ms	remaining: 152ms
3:	learn: 0.6352276	total: 6.46ms	remaining: 155ms
4:	learn: 0.6236075	total: 8ms	remaining: 152ms
5:	learn: 0.6132443	total: 9.41ms	remaining: 147ms
6:	learn: 0.6068984	total: 10.8ms	remaining: 144ms
7:	learn: 0.5997726	total: 12.3ms	remaining: 142ms
8:	learn: 0.5933327	total: 13.3ms	remaining: 134ms
9:	learn: 0.5880968	total: 14.8ms	remaining: 133ms
10:	learn: 0.5815261	total: 16.1ms	remaining: 130ms
11:	learn: 0.5755398	total: 17.1ms	remaining: 126ms
12:	learn: 0.5724889	total: 18.6ms	remaining: 125ms
13:	learn: 0.5683927	total: 19.8ms	remaining: 121ms
14:	learn: 0.5644668	total: 20.7ms	remaining: 118ms
15:	learn: 0.5613632	total: 21.7ms	remaining: 114ms
16:	learn: 0.5589125	total: 22.7ms	remaining: 111ms
17:	learn: 0.5561449	total: 23.7ms	remaining: 108ms
18:	learn: 0.5538605	total: 24.7ms	remaining: 105ms
19:	learn: 0.5515602	total

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6726471	total: 1.42ms	remaining: 140ms
1:	learn: 0.6549100	total: 3.07ms	remaining: 150ms
2:	learn: 0.6403111	total: 4.68ms	remaining: 151ms
3:	learn: 0.6281896	total: 6.06ms	remaining: 145ms
4:	learn: 0.6177638	total: 7.62ms	remaining: 145ms
5:	learn: 0.6084371	total: 9.24ms	remaining: 145ms
6:	learn: 0.6010205	total: 10.7ms	remaining: 143ms
7:	learn: 0.5946789	total: 12ms	remaining: 139ms
8:	learn: 0.5866462	total: 13.3ms	remaining: 134ms
9:	learn: 0.5813165	total: 14.5ms	remaining: 130ms
10:	learn: 0.5761641	total: 15.6ms	remaining: 126ms
11:	learn: 0.5722310	total: 16.9ms	remaining: 124ms
12:	learn: 0.5680694	total: 18ms	remaining: 121ms
13:	learn: 0.5629218	total: 19.2ms	remaining: 118ms
14:	learn: 0.5595088	total: 20.4ms	remaining: 115ms
15:	learn: 0.5555085	total: 21.6ms	remaining: 113ms
16:	learn: 0.5526894	total: 22.8ms	remaining: 111ms
17:	learn: 0.5499873	total: 23.9ms	remaining: 109ms
18:	learn: 0.5477593	total: 25.2ms	remaining: 107ms
19:	learn: 0.5457282	total

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6726801	total: 1.55ms	remaining: 154ms
1:	learn: 0.6547317	total: 3.59ms	remaining: 176ms
2:	learn: 0.6404877	total: 4.81ms	remaining: 156ms
3:	learn: 0.6280625	total: 6.03ms	remaining: 145ms
4:	learn: 0.6202614	total: 6.74ms	remaining: 128ms
5:	learn: 0.6121645	total: 7.96ms	remaining: 125ms
6:	learn: 0.6034897	total: 9.86ms	remaining: 131ms
7:	learn: 0.5976901	total: 11.3ms	remaining: 130ms
8:	learn: 0.5903029	total: 13.2ms	remaining: 133ms
9:	learn: 0.5838691	total: 14.9ms	remaining: 134ms
10:	learn: 0.5787111	total: 16.7ms	remaining: 135ms
11:	learn: 0.5748820	total: 18.3ms	remaining: 134ms
12:	learn: 0.5705860	total: 19.7ms	remaining: 132ms
13:	learn: 0.5669498	total: 21.4ms	remaining: 131ms
14:	learn: 0.5632034	total: 23.2ms	remaining: 131ms
15:	learn: 0.5599262	total: 24.8ms	remaining: 130ms
16:	learn: 0.5568026	total: 26.5ms	remaining: 129ms
17:	learn: 0.5538432	total: 27.8ms	remaining: 127ms
18:	learn: 0.5517841	total: 29.1ms	remaining: 124ms
19:	learn: 0.5497205	t

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6737428	total: 1.34ms	remaining: 133ms
1:	learn: 0.6553250	total: 2.86ms	remaining: 140ms
2:	learn: 0.6423243	total: 4.43ms	remaining: 143ms
3:	learn: 0.6322550	total: 5.9ms	remaining: 142ms
4:	learn: 0.6188645	total: 7.55ms	remaining: 143ms
5:	learn: 0.6110736	total: 9.05ms	remaining: 142ms
6:	learn: 0.6039902	total: 10.3ms	remaining: 137ms
7:	learn: 0.5964323	total: 11.3ms	remaining: 130ms
8:	learn: 0.5903408	total: 12.3ms	remaining: 124ms
9:	learn: 0.5836653	total: 13.9ms	remaining: 125ms
10:	learn: 0.5769899	total: 15.3ms	remaining: 124ms
11:	learn: 0.5730414	total: 16.8ms	remaining: 123ms
12:	learn: 0.5694737	total: 17.9ms	remaining: 120ms
13:	learn: 0.5658130	total: 19.2ms	remaining: 118ms
14:	learn: 0.5635333	total: 20.4ms	remaining: 116ms
15:	learn: 0.5599306	total: 22.1ms	remaining: 116ms
16:	learn: 0.5565455	total: 23.1ms	remaining: 113ms
17:	learn: 0.5532252	total: 24.1ms	remaining: 110ms
18:	learn: 0.5506283	total: 25.3ms	remaining: 108ms
19:	learn: 0.5488928	to

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6737706	total: 1.18ms	remaining: 117ms
1:	learn: 0.6603303	total: 1.93ms	remaining: 94.4ms
2:	learn: 0.6467684	total: 3.07ms	remaining: 99.2ms
3:	learn: 0.6356922	total: 4.36ms	remaining: 105ms
4:	learn: 0.6244514	total: 6.1ms	remaining: 116ms
5:	learn: 0.6156525	total: 7.68ms	remaining: 120ms
6:	learn: 0.6077385	total: 9.18ms	remaining: 122ms
7:	learn: 0.6004900	total: 10.9ms	remaining: 125ms
8:	learn: 0.5942929	total: 12.1ms	remaining: 123ms
9:	learn: 0.5892715	total: 13.9ms	remaining: 125ms
10:	learn: 0.5821713	total: 15.5ms	remaining: 125ms
11:	learn: 0.5784751	total: 16.6ms	remaining: 122ms
12:	learn: 0.5744053	total: 18ms	remaining: 121ms
13:	learn: 0.5698985	total: 19.8ms	remaining: 121ms
14:	learn: 0.5668630	total: 21.1ms	remaining: 119ms
15:	learn: 0.5636829	total: 22.8ms	remaining: 120ms
16:	learn: 0.5602172	total: 24ms	remaining: 117ms
17:	learn: 0.5569798	total: 25.1ms	remaining: 114ms
18:	learn: 0.5547667	total: 26.2ms	remaining: 112ms
19:	learn: 0.5522089	tota

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6746781	total: 1.18ms	remaining: 117ms
1:	learn: 0.6574521	total: 2.83ms	remaining: 139ms
2:	learn: 0.6422497	total: 4.32ms	remaining: 140ms
3:	learn: 0.6319662	total: 6ms	remaining: 144ms
4:	learn: 0.6224665	total: 7.11ms	remaining: 135ms
5:	learn: 0.6113831	total: 8.6ms	remaining: 135ms
6:	learn: 0.6040724	total: 10.1ms	remaining: 134ms
7:	learn: 0.5967782	total: 11.8ms	remaining: 136ms
8:	learn: 0.5908748	total: 14.2ms	remaining: 144ms
9:	learn: 0.5835709	total: 16.1ms	remaining: 145ms
10:	learn: 0.5776205	total: 18ms	remaining: 146ms
11:	learn: 0.5717681	total: 19.2ms	remaining: 141ms
12:	learn: 0.5685123	total: 20.8ms	remaining: 140ms
13:	learn: 0.5658185	total: 21.9ms	remaining: 134ms
14:	learn: 0.5621214	total: 23ms	remaining: 130ms
15:	learn: 0.5585388	total: 24.1ms	remaining: 127ms
16:	learn: 0.5557762	total: 25.1ms	remaining: 123ms
17:	learn: 0.5536294	total: 26.2ms	remaining: 119ms
18:	learn: 0.5526334	total: 26.8ms	remaining: 114ms
19:	learn: 0.5506996	total: 28

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6749735	total: 1.2ms	remaining: 119ms
1:	learn: 0.6588253	total: 2.21ms	remaining: 108ms
2:	learn: 0.6467520	total: 3.63ms	remaining: 117ms
3:	learn: 0.6341097	total: 4.7ms	remaining: 113ms
4:	learn: 0.6247940	total: 6.11ms	remaining: 116ms
5:	learn: 0.6138043	total: 7.09ms	remaining: 111ms
6:	learn: 0.6047067	total: 8.14ms	remaining: 108ms
7:	learn: 0.5975182	total: 9.11ms	remaining: 105ms
8:	learn: 0.5918862	total: 10.3ms	remaining: 105ms
9:	learn: 0.5848636	total: 11.8ms	remaining: 106ms
10:	learn: 0.5794412	total: 13.1ms	remaining: 106ms
11:	learn: 0.5738645	total: 14.3ms	remaining: 105ms
12:	learn: 0.5707164	total: 15.7ms	remaining: 105ms
13:	learn: 0.5681481	total: 17.2ms	remaining: 106ms
14:	learn: 0.5653206	total: 18.8ms	remaining: 107ms
15:	learn: 0.5620232	total: 20.8ms	remaining: 109ms
16:	learn: 0.5585403	total: 22.4ms	remaining: 110ms
17:	learn: 0.5563561	total: 23.7ms	remaining: 108ms
18:	learn: 0.5545521	total: 25.5ms	remaining: 109ms
19:	learn: 0.5526758	tot

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6742273	total: 1.13ms	remaining: 112ms
1:	learn: 0.6537273	total: 2.45ms	remaining: 120ms
2:	learn: 0.6371123	total: 3.63ms	remaining: 117ms
3:	learn: 0.6255034	total: 4.75ms	remaining: 114ms
4:	learn: 0.6138807	total: 5.81ms	remaining: 110ms
5:	learn: 0.6049889	total: 6.76ms	remaining: 106ms
6:	learn: 0.5942565	total: 7.71ms	remaining: 102ms
7:	learn: 0.5868694	total: 9.07ms	remaining: 104ms
8:	learn: 0.5803664	total: 10.6ms	remaining: 107ms
9:	learn: 0.5751180	total: 12ms	remaining: 108ms
10:	learn: 0.5685418	total: 13.3ms	remaining: 107ms
11:	learn: 0.5626521	total: 14.2ms	remaining: 104ms
12:	learn: 0.5596744	total: 15.1ms	remaining: 101ms
13:	learn: 0.5559016	total: 16.2ms	remaining: 99.7ms
14:	learn: 0.5518676	total: 17.9ms	remaining: 102ms
15:	learn: 0.5491761	total: 19ms	remaining: 99.8ms
16:	learn: 0.5455019	total: 20.4ms	remaining: 99.6ms
17:	learn: 0.5431766	total: 21.7ms	remaining: 99ms
18:	learn: 0.5404428	total: 23ms	remaining: 98.3ms
19:	learn: 0.5384746	tota

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6746947	total: 1.32ms	remaining: 131ms
1:	learn: 0.6573923	total: 2.41ms	remaining: 118ms
2:	learn: 0.6422426	total: 3.33ms	remaining: 108ms
3:	learn: 0.6295911	total: 4.9ms	remaining: 118ms
4:	learn: 0.6194261	total: 6.55ms	remaining: 125ms
5:	learn: 0.6104675	total: 8.06ms	remaining: 126ms
6:	learn: 0.6020905	total: 9.54ms	remaining: 127ms
7:	learn: 0.5946437	total: 10.9ms	remaining: 126ms
8:	learn: 0.5885906	total: 12.4ms	remaining: 126ms
9:	learn: 0.5820377	total: 13.6ms	remaining: 123ms
10:	learn: 0.5759542	total: 15.2ms	remaining: 123ms
11:	learn: 0.5731522	total: 16.6ms	remaining: 122ms
12:	learn: 0.5679960	total: 18.1ms	remaining: 121ms
13:	learn: 0.5652933	total: 19.7ms	remaining: 121ms
14:	learn: 0.5612146	total: 21.7ms	remaining: 123ms
15:	learn: 0.5583449	total: 23ms	remaining: 121ms
16:	learn: 0.5556422	total: 24.6ms	remaining: 120ms
17:	learn: 0.5530919	total: 26.1ms	remaining: 119ms
18:	learn: 0.5493503	total: 27.6ms	remaining: 118ms
19:	learn: 0.5468347	tota

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6736783	total: 1.26ms	remaining: 125ms
1:	learn: 0.6570803	total: 2.52ms	remaining: 124ms
2:	learn: 0.6422855	total: 3.75ms	remaining: 121ms
3:	learn: 0.6322519	total: 5ms	remaining: 120ms
4:	learn: 0.6227406	total: 6.06ms	remaining: 115ms
5:	learn: 0.6121781	total: 7.14ms	remaining: 112ms
6:	learn: 0.6044788	total: 8.21ms	remaining: 109ms
7:	learn: 0.5960640	total: 9.22ms	remaining: 106ms
8:	learn: 0.5894243	total: 10.3ms	remaining: 104ms
9:	learn: 0.5847398	total: 11.4ms	remaining: 103ms
10:	learn: 0.5783807	total: 12.6ms	remaining: 102ms
11:	learn: 0.5728399	total: 13.7ms	remaining: 100ms
12:	learn: 0.5688737	total: 14.7ms	remaining: 98.5ms
13:	learn: 0.5653943	total: 16.2ms	remaining: 99.5ms
14:	learn: 0.5625049	total: 18ms	remaining: 102ms
15:	learn: 0.5594695	total: 19.7ms	remaining: 104ms
16:	learn: 0.5567108	total: 21.2ms	remaining: 104ms
17:	learn: 0.5533486	total: 22.7ms	remaining: 103ms
18:	learn: 0.5505880	total: 24ms	remaining: 102ms
19:	learn: 0.5488943	total:

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6748476	total: 1.53ms	remaining: 151ms
1:	learn: 0.6572455	total: 2.53ms	remaining: 124ms
2:	learn: 0.6435172	total: 3.82ms	remaining: 123ms
3:	learn: 0.6301061	total: 5.46ms	remaining: 131ms
4:	learn: 0.6173365	total: 7.25ms	remaining: 138ms
5:	learn: 0.6070298	total: 8.74ms	remaining: 137ms
6:	learn: 0.5987754	total: 9.93ms	remaining: 132ms
7:	learn: 0.5907567	total: 10.9ms	remaining: 125ms
8:	learn: 0.5844378	total: 11.8ms	remaining: 120ms
9:	learn: 0.5786278	total: 13.6ms	remaining: 122ms
10:	learn: 0.5724043	total: 15ms	remaining: 121ms
11:	learn: 0.5698764	total: 16.1ms	remaining: 118ms
12:	learn: 0.5648238	total: 17.2ms	remaining: 115ms
13:	learn: 0.5613392	total: 18.5ms	remaining: 113ms
14:	learn: 0.5584593	total: 19.8ms	remaining: 112ms
15:	learn: 0.5546959	total: 21ms	remaining: 110ms
16:	learn: 0.5522554	total: 35.5ms	remaining: 173ms
17:	learn: 0.5493932	total: 36.9ms	remaining: 168ms
18:	learn: 0.5464846	total: 37.8ms	remaining: 161ms
19:	learn: 0.5450596	total

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6755384	total: 1.11ms	remaining: 110ms
1:	learn: 0.6583390	total: 2.38ms	remaining: 117ms
2:	learn: 0.6435126	total: 3.73ms	remaining: 121ms
3:	learn: 0.6331943	total: 4.94ms	remaining: 119ms
4:	learn: 0.6214315	total: 6.29ms	remaining: 119ms
5:	learn: 0.6116167	total: 7.82ms	remaining: 122ms
6:	learn: 0.6036884	total: 9.39ms	remaining: 125ms
7:	learn: 0.5960313	total: 11ms	remaining: 126ms
8:	learn: 0.5897789	total: 12.2ms	remaining: 123ms
9:	learn: 0.5847535	total: 13.4ms	remaining: 121ms
10:	learn: 0.5787925	total: 14.7ms	remaining: 119ms
11:	learn: 0.5741101	total: 16.1ms	remaining: 118ms
12:	learn: 0.5698550	total: 17.5ms	remaining: 117ms
13:	learn: 0.5670281	total: 18.8ms	remaining: 116ms
14:	learn: 0.5639403	total: 20.1ms	remaining: 114ms
15:	learn: 0.5611994	total: 21.4ms	remaining: 112ms
16:	learn: 0.5581649	total: 23ms	remaining: 112ms
17:	learn: 0.5550112	total: 24.6ms	remaining: 112ms
18:	learn: 0.5524010	total: 25.9ms	remaining: 111ms
19:	learn: 0.5498369	total

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6702424	total: 1.18ms	remaining: 117ms
1:	learn: 0.6546306	total: 2.47ms	remaining: 121ms
2:	learn: 0.6407109	total: 3.83ms	remaining: 124ms
3:	learn: 0.6295166	total: 5.3ms	remaining: 127ms
4:	learn: 0.6198048	total: 6.85ms	remaining: 130ms
5:	learn: 0.6094100	total: 8.44ms	remaining: 132ms
6:	learn: 0.6022450	total: 10.6ms	remaining: 141ms
7:	learn: 0.5946779	total: 11.7ms	remaining: 135ms
8:	learn: 0.5883622	total: 12.6ms	remaining: 127ms
9:	learn: 0.5820668	total: 13.5ms	remaining: 121ms
10:	learn: 0.5769586	total: 14.8ms	remaining: 120ms
11:	learn: 0.5730042	total: 16.2ms	remaining: 119ms
12:	learn: 0.5690082	total: 17.3ms	remaining: 116ms
13:	learn: 0.5660587	total: 18.8ms	remaining: 116ms
14:	learn: 0.5631128	total: 19.8ms	remaining: 112ms
15:	learn: 0.5598100	total: 20.8ms	remaining: 109ms
16:	learn: 0.5562263	total: 23.1ms	remaining: 113ms
17:	learn: 0.5534413	total: 24.9ms	remaining: 114ms
18:	learn: 0.5522009	total: 26.3ms	remaining: 112ms
19:	learn: 0.5500229	to

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6738419	total: 1.14ms	remaining: 113ms
1:	learn: 0.6574743	total: 2.32ms	remaining: 114ms
2:	learn: 0.6441921	total: 3.55ms	remaining: 115ms
3:	learn: 0.6321186	total: 4.68ms	remaining: 112ms
4:	learn: 0.6206724	total: 5.63ms	remaining: 107ms
5:	learn: 0.6110484	total: 6.7ms	remaining: 105ms
6:	learn: 0.6028525	total: 7.94ms	remaining: 106ms
7:	learn: 0.5963835	total: 9.38ms	remaining: 108ms
8:	learn: 0.5898165	total: 10.5ms	remaining: 106ms
9:	learn: 0.5842625	total: 11.8ms	remaining: 106ms
10:	learn: 0.5778239	total: 13.2ms	remaining: 107ms
11:	learn: 0.5738061	total: 14.8ms	remaining: 109ms
12:	learn: 0.5681406	total: 16ms	remaining: 107ms
13:	learn: 0.5644253	total: 17.3ms	remaining: 107ms
14:	learn: 0.5611076	total: 18.5ms	remaining: 105ms
15:	learn: 0.5589316	total: 19.6ms	remaining: 103ms
16:	learn: 0.5565212	total: 20.8ms	remaining: 101ms
17:	learn: 0.5534225	total: 22.1ms	remaining: 101ms
18:	learn: 0.5512853	total: 23.3ms	remaining: 99.5ms
19:	learn: 0.5495765	tot

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6757173	total: 1.54ms	remaining: 153ms
1:	learn: 0.6558459	total: 2.61ms	remaining: 128ms
2:	learn: 0.6434758	total: 3.61ms	remaining: 117ms
3:	learn: 0.6325670	total: 4.61ms	remaining: 111ms
4:	learn: 0.6184659	total: 5.86ms	remaining: 111ms
5:	learn: 0.6102935	total: 7.39ms	remaining: 116ms
6:	learn: 0.6028184	total: 8.51ms	remaining: 113ms
7:	learn: 0.5954728	total: 9.87ms	remaining: 114ms
8:	learn: 0.5891020	total: 10.9ms	remaining: 110ms
9:	learn: 0.5816283	total: 11.8ms	remaining: 106ms
10:	learn: 0.5741992	total: 12.8ms	remaining: 103ms
11:	learn: 0.5687855	total: 13.9ms	remaining: 102ms
12:	learn: 0.5652814	total: 15.5ms	remaining: 104ms
13:	learn: 0.5607407	total: 16.9ms	remaining: 104ms
14:	learn: 0.5575863	total: 18.3ms	remaining: 104ms
15:	learn: 0.5549009	total: 19.6ms	remaining: 103ms
16:	learn: 0.5520903	total: 20.8ms	remaining: 102ms
17:	learn: 0.5491349	total: 22.2ms	remaining: 101ms
18:	learn: 0.5464677	total: 23.7ms	remaining: 101ms
19:	learn: 0.5444772	t

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6729490	total: 1.73ms	remaining: 172ms
1:	learn: 0.6551604	total: 2.95ms	remaining: 145ms
2:	learn: 0.6411170	total: 4ms	remaining: 129ms
3:	learn: 0.6274133	total: 4.96ms	remaining: 119ms
4:	learn: 0.6180173	total: 6.38ms	remaining: 121ms
5:	learn: 0.6096132	total: 8.14ms	remaining: 127ms
6:	learn: 0.6023417	total: 9.64ms	remaining: 128ms
7:	learn: 0.5950992	total: 11ms	remaining: 127ms
8:	learn: 0.5891832	total: 12.4ms	remaining: 126ms
9:	learn: 0.5834272	total: 13.9ms	remaining: 125ms
10:	learn: 0.5780739	total: 15.4ms	remaining: 125ms
11:	learn: 0.5725600	total: 16.5ms	remaining: 121ms
12:	learn: 0.5681409	total: 17.7ms	remaining: 118ms
13:	learn: 0.5654929	total: 18.9ms	remaining: 116ms
14:	learn: 0.5625996	total: 19.9ms	remaining: 113ms
15:	learn: 0.5597272	total: 21.2ms	remaining: 111ms
16:	learn: 0.5568509	total: 23ms	remaining: 112ms
17:	learn: 0.5541420	total: 24.1ms	remaining: 110ms
18:	learn: 0.5531527	total: 24.8ms	remaining: 106ms
19:	learn: 0.5515337	total: 2

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6710035	total: 1.41ms	remaining: 139ms
1:	learn: 0.6545732	total: 2.68ms	remaining: 131ms
2:	learn: 0.6422235	total: 3.81ms	remaining: 123ms
3:	learn: 0.6315317	total: 5.21ms	remaining: 125ms
4:	learn: 0.6228412	total: 6.47ms	remaining: 123ms
5:	learn: 0.6153912	total: 7.38ms	remaining: 116ms
6:	learn: 0.6070677	total: 8.63ms	remaining: 115ms
7:	learn: 0.6003154	total: 9.82ms	remaining: 113ms
8:	learn: 0.5946984	total: 10.9ms	remaining: 110ms
9:	learn: 0.5875919	total: 11.7ms	remaining: 105ms
10:	learn: 0.5821841	total: 12.9ms	remaining: 104ms
11:	learn: 0.5789460	total: 14ms	remaining: 102ms
12:	learn: 0.5746493	total: 15.2ms	remaining: 101ms
13:	learn: 0.5699093	total: 16.4ms	remaining: 101ms
14:	learn: 0.5657457	total: 17.9ms	remaining: 101ms
15:	learn: 0.5629685	total: 19.1ms	remaining: 100ms
16:	learn: 0.5602990	total: 20.7ms	remaining: 101ms
17:	learn: 0.5573862	total: 22.2ms	remaining: 101ms
18:	learn: 0.5550977	total: 23.5ms	remaining: 100ms
19:	learn: 0.5528083	tot

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6708933	total: 1.66ms	remaining: 164ms
1:	learn: 0.6554796	total: 2.98ms	remaining: 146ms
2:	learn: 0.6418966	total: 4.03ms	remaining: 130ms
3:	learn: 0.6289007	total: 4.96ms	remaining: 119ms
4:	learn: 0.6198123	total: 6.77ms	remaining: 129ms
5:	learn: 0.6123786	total: 8ms	remaining: 125ms
6:	learn: 0.6023939	total: 9.24ms	remaining: 123ms
7:	learn: 0.5956772	total: 10.5ms	remaining: 120ms
8:	learn: 0.5894870	total: 11.7ms	remaining: 118ms
9:	learn: 0.5834599	total: 13.1ms	remaining: 118ms
10:	learn: 0.5772831	total: 14.8ms	remaining: 120ms
11:	learn: 0.5732325	total: 16.3ms	remaining: 119ms
12:	learn: 0.5676417	total: 17.6ms	remaining: 118ms
13:	learn: 0.5638405	total: 19.2ms	remaining: 118ms
14:	learn: 0.5599196	total: 20.6ms	remaining: 117ms
15:	learn: 0.5568653	total: 21.9ms	remaining: 115ms
16:	learn: 0.5543577	total: 23.2ms	remaining: 113ms
17:	learn: 0.5519960	total: 24.5ms	remaining: 112ms
18:	learn: 0.5491424	total: 26.1ms	remaining: 111ms
19:	learn: 0.5464951	tota

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6726837	total: 1.42ms	remaining: 141ms
1:	learn: 0.6571817	total: 2.95ms	remaining: 144ms
2:	learn: 0.6429185	total: 4.13ms	remaining: 134ms
3:	learn: 0.6311416	total: 5.29ms	remaining: 127ms
4:	learn: 0.6188308	total: 6.59ms	remaining: 125ms
5:	learn: 0.6112065	total: 7.92ms	remaining: 124ms
6:	learn: 0.6036427	total: 9.8ms	remaining: 130ms
7:	learn: 0.5983207	total: 11.5ms	remaining: 132ms
8:	learn: 0.5913810	total: 12.6ms	remaining: 127ms
9:	learn: 0.5858775	total: 13.7ms	remaining: 123ms
10:	learn: 0.5797769	total: 14.8ms	remaining: 119ms
11:	learn: 0.5749432	total: 15.9ms	remaining: 117ms
12:	learn: 0.5695717	total: 17.1ms	remaining: 115ms
13:	learn: 0.5662091	total: 18.3ms	remaining: 112ms
14:	learn: 0.5628417	total: 19.4ms	remaining: 110ms
15:	learn: 0.5595445	total: 20.5ms	remaining: 108ms
16:	learn: 0.5570440	total: 21.7ms	remaining: 106ms
17:	learn: 0.5545793	total: 23ms	remaining: 105ms
18:	learn: 0.5523165	total: 24.4ms	remaining: 104ms
19:	learn: 0.5504768	tota

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6724731	total: 1.14ms	remaining: 113ms
1:	learn: 0.6572044	total: 2.51ms	remaining: 123ms
2:	learn: 0.6441163	total: 4.07ms	remaining: 131ms
3:	learn: 0.6329796	total: 5.5ms	remaining: 132ms
4:	learn: 0.6228248	total: 7.04ms	remaining: 134ms
5:	learn: 0.6164994	total: 8.24ms	remaining: 129ms
6:	learn: 0.6085593	total: 9.77ms	remaining: 130ms
7:	learn: 0.5995769	total: 10.9ms	remaining: 125ms
8:	learn: 0.5929013	total: 11.9ms	remaining: 120ms
9:	learn: 0.5867412	total: 13ms	remaining: 117ms
10:	learn: 0.5811787	total: 14.4ms	remaining: 116ms
11:	learn: 0.5770062	total: 15.9ms	remaining: 117ms
12:	learn: 0.5722840	total: 17.4ms	remaining: 116ms
13:	learn: 0.5688525	total: 18.5ms	remaining: 113ms
14:	learn: 0.5655146	total: 19.6ms	remaining: 111ms
15:	learn: 0.5616025	total: 20.6ms	remaining: 108ms
16:	learn: 0.5581219	total: 22.1ms	remaining: 108ms
17:	learn: 0.5557634	total: 22.9ms	remaining: 105ms
18:	learn: 0.5538586	total: 23.9ms	remaining: 102ms
19:	learn: 0.5514155	tota

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6739167	total: 1.47ms	remaining: 146ms
1:	learn: 0.6569857	total: 2.57ms	remaining: 126ms
2:	learn: 0.6447385	total: 3.56ms	remaining: 115ms
3:	learn: 0.6335758	total: 4.54ms	remaining: 109ms
4:	learn: 0.6221434	total: 5.85ms	remaining: 111ms
5:	learn: 0.6131105	total: 7.11ms	remaining: 111ms
6:	learn: 0.6030607	total: 8.89ms	remaining: 118ms
7:	learn: 0.5973908	total: 10.4ms	remaining: 120ms
8:	learn: 0.5898852	total: 11.7ms	remaining: 119ms
9:	learn: 0.5820247	total: 13ms	remaining: 117ms
10:	learn: 0.5766723	total: 14.2ms	remaining: 115ms
11:	learn: 0.5712420	total: 15.6ms	remaining: 115ms
12:	learn: 0.5669776	total: 17.1ms	remaining: 114ms
13:	learn: 0.5628364	total: 18.6ms	remaining: 114ms
14:	learn: 0.5592393	total: 19.8ms	remaining: 112ms
15:	learn: 0.5563815	total: 21.2ms	remaining: 111ms
16:	learn: 0.5537473	total: 22.5ms	remaining: 110ms
17:	learn: 0.5516063	total: 23.6ms	remaining: 108ms
18:	learn: 0.5492925	total: 24.7ms	remaining: 106ms
19:	learn: 0.5476052	tot

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6744690	total: 1.62ms	remaining: 160ms
1:	learn: 0.6610864	total: 3.14ms	remaining: 154ms
2:	learn: 0.6490820	total: 4.44ms	remaining: 144ms
3:	learn: 0.6370295	total: 5.37ms	remaining: 129ms
4:	learn: 0.6235226	total: 6.28ms	remaining: 119ms
5:	learn: 0.6131869	total: 7.74ms	remaining: 121ms
6:	learn: 0.6037413	total: 8.87ms	remaining: 118ms
7:	learn: 0.5955310	total: 9.97ms	remaining: 115ms
8:	learn: 0.5899280	total: 11.2ms	remaining: 113ms
9:	learn: 0.5838716	total: 12.3ms	remaining: 111ms
10:	learn: 0.5775154	total: 13.2ms	remaining: 107ms
11:	learn: 0.5731742	total: 14.2ms	remaining: 104ms
12:	learn: 0.5698711	total: 15.6ms	remaining: 104ms
13:	learn: 0.5671277	total: 17.1ms	remaining: 105ms
14:	learn: 0.5633822	total: 18.3ms	remaining: 104ms
15:	learn: 0.5607431	total: 19.3ms	remaining: 101ms
16:	learn: 0.5578904	total: 20.3ms	remaining: 99.1ms
17:	learn: 0.5544431	total: 21.6ms	remaining: 98.6ms
18:	learn: 0.5515674	total: 23.3ms	remaining: 99.3ms
19:	learn: 0.548829

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6733803	total: 1.56ms	remaining: 155ms
1:	learn: 0.6561263	total: 2.58ms	remaining: 127ms
2:	learn: 0.6434857	total: 3.53ms	remaining: 114ms
3:	learn: 0.6338190	total: 4.55ms	remaining: 109ms
4:	learn: 0.6228912	total: 6.26ms	remaining: 119ms
5:	learn: 0.6144509	total: 8.09ms	remaining: 127ms
6:	learn: 0.6059800	total: 9.58ms	remaining: 127ms
7:	learn: 0.5980244	total: 10.5ms	remaining: 121ms
8:	learn: 0.5922831	total: 11.6ms	remaining: 117ms
9:	learn: 0.5868879	total: 12.5ms	remaining: 113ms
10:	learn: 0.5807013	total: 13.6ms	remaining: 110ms
11:	learn: 0.5769272	total: 14.7ms	remaining: 108ms
12:	learn: 0.5726039	total: 15.6ms	remaining: 105ms
13:	learn: 0.5694566	total: 17.1ms	remaining: 105ms
14:	learn: 0.5666418	total: 18.4ms	remaining: 105ms
15:	learn: 0.5630791	total: 19.6ms	remaining: 103ms
16:	learn: 0.5605786	total: 20.5ms	remaining: 100ms
17:	learn: 0.5579614	total: 22.1ms	remaining: 101ms
18:	learn: 0.5559744	total: 23.3ms	remaining: 99.5ms
19:	learn: 0.5540651	

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6731870	total: 1.22ms	remaining: 121ms
1:	learn: 0.6599435	total: 2.59ms	remaining: 127ms
2:	learn: 0.6486434	total: 4.09ms	remaining: 132ms
3:	learn: 0.6348114	total: 5.44ms	remaining: 131ms
4:	learn: 0.6230028	total: 6.7ms	remaining: 127ms
5:	learn: 0.6146193	total: 7.92ms	remaining: 124ms
6:	learn: 0.6057363	total: 9.13ms	remaining: 121ms
7:	learn: 0.5975556	total: 10.5ms	remaining: 120ms
8:	learn: 0.5953176	total: 11ms	remaining: 111ms
9:	learn: 0.5894972	total: 12ms	remaining: 108ms
10:	learn: 0.5842878	total: 13ms	remaining: 105ms
11:	learn: 0.5789721	total: 14.1ms	remaining: 103ms
12:	learn: 0.5761572	total: 15ms	remaining: 101ms
13:	learn: 0.5717196	total: 16ms	remaining: 98.3ms
14:	learn: 0.5685181	total: 17.3ms	remaining: 98ms
15:	learn: 0.5657091	total: 18.7ms	remaining: 98.2ms
16:	learn: 0.5620395	total: 19.9ms	remaining: 97.3ms
17:	learn: 0.5588192	total: 20.9ms	remaining: 95ms
18:	learn: 0.5574453	total: 22ms	remaining: 93.6ms
19:	learn: 0.5555816	total: 23.1m

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.r

### K-Fold CV

In [16]:
from sklearn.model_selection import KFold

# Initialize 5-fold cross-validator
kfold = KFold(n_splits=5, shuffle=False)

results2_kfold = {}

# Loop over each model
for model_name, model2 in models2.items():
    accuracies = []
    f1_scores = []

    # Loop over each fold in 5-fold cross-validation
    for train_index, test_index in kfold.split(X, y):
        # Split the data into training and testing sets for the current fold
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Apply SMOTE to the training data to balance class distribution
        X_train_oversampled, y_train_oversampled = smote.fit_resample(X_train, y_train)

        # Train the model on the oversampled training data
        model2.fit(X_train_oversampled, y_train_oversampled.ravel())

        # Predict the target on the test data
        y_pred = model2.predict(X_test)

        # Evaluate the prediction
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='macro')

        accuracies.append(accuracy)
        f1_scores.append(f1)

    # Compute average accuracy and F1-score across all folds
    avg_accuracy = sum(accuracies) / len(accuracies)
    avg_f1_score = sum(f1_scores) / len(f1_scores)

    results2_kfold[model_name] = {
        'Average Accuracy': avg_accuracy,
        'Average F1-Score (macro)': avg_f1_score
    }

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.r

[LightGBM] [Info] Number of positive: 1188, number of negative: 1188
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000282 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 88
[LightGBM] [Info] Number of data points in the train set: 2376, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1173, number of negative: 1173
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000298 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 90
[LightGBM] [Info] Number of data points in the train set: 2346, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[Lig

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


[LightGBM] [Info] Number of positive: 1187, number of negative: 1187
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000300 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 91
[LightGBM] [Info] Number of data points in the train set: 2374, number of used features: 19
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 1201, number of negative: 1201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000281 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 90
[LightGBM] [Info] Number of data points in the train set: 2402, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


0:	learn: 0.6715702	total: 1.75ms	remaining: 174ms
1:	learn: 0.6538396	total: 3.31ms	remaining: 162ms
2:	learn: 0.6392834	total: 4.67ms	remaining: 151ms
3:	learn: 0.6272745	total: 5.95ms	remaining: 143ms
4:	learn: 0.6170082	total: 7.24ms	remaining: 138ms
5:	learn: 0.6092339	total: 8.04ms	remaining: 126ms
6:	learn: 0.6027320	total: 9.25ms	remaining: 123ms
7:	learn: 0.5971089	total: 10.8ms	remaining: 124ms
8:	learn: 0.5889696	total: 12.1ms	remaining: 122ms
9:	learn: 0.5837884	total: 13.1ms	remaining: 118ms
10:	learn: 0.5805317	total: 13.8ms	remaining: 112ms
11:	learn: 0.5736902	total: 15ms	remaining: 110ms
12:	learn: 0.5697774	total: 16.1ms	remaining: 108ms
13:	learn: 0.5662060	total: 17.3ms	remaining: 106ms
14:	learn: 0.5615712	total: 18.3ms	remaining: 104ms
15:	learn: 0.5567693	total: 19.3ms	remaining: 101ms
16:	learn: 0.5527578	total: 20.2ms	remaining: 98.8ms
17:	learn: 0.5486857	total: 21.2ms	remaining: 96.4ms
18:	learn: 0.5462700	total: 22.3ms	remaining: 95.2ms
19:	learn: 0.5441497	

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


19:	learn: 0.5371649	total: 20ms	remaining: 79.8ms
20:	learn: 0.5356128	total: 21.1ms	remaining: 79.4ms
21:	learn: 0.5343331	total: 22.1ms	remaining: 78.5ms
22:	learn: 0.5325453	total: 23.2ms	remaining: 77.6ms
23:	learn: 0.5302191	total: 24.2ms	remaining: 76.6ms
24:	learn: 0.5280395	total: 25.3ms	remaining: 75.9ms
25:	learn: 0.5258492	total: 26.3ms	remaining: 74.9ms
26:	learn: 0.5235427	total: 27.6ms	remaining: 74.7ms
27:	learn: 0.5219664	total: 29ms	remaining: 74.6ms
28:	learn: 0.5199665	total: 30ms	remaining: 73.3ms
29:	learn: 0.5190939	total: 30.9ms	remaining: 72ms
30:	learn: 0.5178102	total: 32.1ms	remaining: 71.5ms
31:	learn: 0.5165753	total: 33.4ms	remaining: 71ms
32:	learn: 0.5155173	total: 34.5ms	remaining: 70.1ms
33:	learn: 0.5146989	total: 35.7ms	remaining: 69.2ms
34:	learn: 0.5136173	total: 36.7ms	remaining: 68.1ms
35:	learn: 0.5130749	total: 37.7ms	remaining: 67ms
36:	learn: 0.5124388	total: 38.6ms	remaining: 65.7ms
37:	learn: 0.5118897	total: 39.6ms	remaining: 64.6ms
38:	l

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


94:	learn: 0.4805022	total: 87.2ms	remaining: 4.59ms
95:	learn: 0.4799500	total: 88.1ms	remaining: 3.67ms
96:	learn: 0.4795702	total: 89.2ms	remaining: 2.76ms
97:	learn: 0.4791694	total: 90.1ms	remaining: 1.84ms
98:	learn: 0.4786992	total: 91ms	remaining: 919us
99:	learn: 0.4783280	total: 91.9ms	remaining: 0us
0:	learn: 0.6741379	total: 1.05ms	remaining: 104ms
1:	learn: 0.6571544	total: 2.01ms	remaining: 98.6ms
2:	learn: 0.6448336	total: 2.81ms	remaining: 90.8ms
3:	learn: 0.6332969	total: 3.65ms	remaining: 87.6ms
4:	learn: 0.6251638	total: 4.44ms	remaining: 84.4ms
5:	learn: 0.6178365	total: 5.4ms	remaining: 84.6ms
6:	learn: 0.6104293	total: 6.32ms	remaining: 83.9ms
7:	learn: 0.6044367	total: 7.12ms	remaining: 81.9ms
8:	learn: 0.5991271	total: 7.96ms	remaining: 80.5ms
9:	learn: 0.5935131	total: 8.78ms	remaining: 79ms
10:	learn: 0.5887796	total: 9.61ms	remaining: 77.8ms
11:	learn: 0.5843930	total: 10.4ms	remaining: 76.6ms
12:	learn: 0.5785885	total: 11.3ms	remaining: 75.8ms
13:	learn: 0.

  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())
  model2.fit(X_train_oversampled, y_train_oversampled.ravel())


In [17]:
from tabulate import tabulate

# Models list
models = [
    'Random Forest', 'Gradient Boosting', 'XGBoost',
    'LightGBM', 'CatBoost', 'SVM', 'Dummy'
]

# Prepare table data
table_data = []
for model in models:
    row = [model]
    
    # Shell 1: SHORT_INTERACTION, Leave-One-Group-Out
    row.append(results1_logo.get(model, {}).get('Average Accuracy', 0.0))
    row.append(results1_logo.get(model, {}).get('Average F1-Score (macro)', 0.0))
    
    # Shell 3: LONG_INTERACTION, Leave-One-Group-Out
    row.append(results2_logo.get(model, {}).get('Average Accuracy', 0.0))
    row.append(results2_logo.get(model, {}).get('Average F1-Score (macro)', 0.0))

    # Shell 2: SHORT_INTERACTION, 5-fold
    row.append(results1_kfold.get(model, {}).get('Average Accuracy', 0.0))
    row.append(results1_kfold.get(model, {}).get('Average F1-Score (macro)', 0.0))
    
    # Shell 4: LONG_INTERACTION, 5-fold
    row.append(results2_kfold.get(model, {}).get('Average Accuracy', 0.0))
    row.append(results2_kfold.get(model, {}).get('Average F1-Score (macro)', 0.0))
    
    table_data.append(row)

# Define headers
headers = [
    'Model',
    'LOSO CV\nShort\nInteraction\nAccuracy', 'LOSO CV\nShort\nInteraction\nF1',
    'LOSO CV\nLong\nInteraction\nAccuracy', 'LOSO CV\nLong\nInteraction\nF1',
    '5-fold CV\nShort\nInteraction\nAccuracy', '5-fold CV\nShort\nInteraction\nF1',
    '5-fold CV\nLong\nInteraction\nAccuracy', '5-fold CV\nLong\nInteraction\nF1'
]

# Print the table
print("Machine learning model performance")
print(tabulate(table_data, headers=headers, tablefmt='fancy_grid', floatfmt='.4f', numalign="decimal"))


Machine learning model performance
╒═══════════════════╤═══════════════╤═══════════════╤═══════════════╤═══════════════╤═══════════════╤═══════════════╤═══════════════╤═══════════════╕
│ Model             │       LOSO CV │       LOSO CV │       LOSO CV │       LOSO CV │     5-fold CV │     5-fold CV │     5-fold CV │     5-fold CV │
│                   │         Short │         Short │          Long │          Long │         Short │         Short │          Long │          Long │
│                   │   Interaction │   Interaction │   Interaction │   Interaction │   Interaction │   Interaction │   Interaction │   Interaction │
│                   │      Accuracy │            F1 │      Accuracy │            F1 │      Accuracy │            F1 │      Accuracy │            F1 │
╞═══════════════════╪═══════════════╪═══════════════╪═══════════════╪═══════════════╪═══════════════╪═══════════════╪═══════════════╪═══════════════╡
│ Random Forest     │        0.8336 │        0.7385 │        0.68