In [1]:
import numpy as np
import pandas as pd
from sklearn.base import clone

import optuna
from scipy.optimize import minimize
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import *
from scipy.stats import linregress
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import cohen_kappa_score

import re

from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor


SEED = 42
n_splits = 5
n_repeats = 3

In [2]:
## Process file
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().loc[['max', 'min', 'mean', '25%', '75%', 'std']].values.reshape(-1), filename.split('=')[1]

def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"Stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    
    return df

In [3]:
# Load data
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')

train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

train_ts



100%|██████████| 996/996 [01:19<00:00, 12.50it/s]
100%|██████████| 2/2 [00:00<00:00,  9.15it/s]


Unnamed: 0,Stat_0,Stat_1,Stat_2,Stat_3,Stat_4,Stat_5,Stat_6,Stat_7,Stat_8,Stat_9,Stat_10,Stat_11,Stat_12,Stat_13,Stat_14,Stat_15,Stat_16,Stat_17,Stat_18,Stat_19,Stat_20,Stat_21,Stat_22,Stat_23,Stat_24,Stat_25,Stat_26,Stat_27,Stat_28,Stat_29,Stat_30,Stat_31,Stat_32,Stat_33,Stat_34,Stat_35,Stat_36,Stat_37,Stat_38,Stat_39,Stat_40,Stat_41,Stat_42,Stat_43,Stat_44,Stat_45,Stat_46,Stat_47,Stat_48,Stat_49,Stat_50,Stat_51,Stat_52,Stat_53,Stat_54,Stat_55,Stat_56,Stat_57,Stat_58,Stat_59,Stat_60,Stat_61,Stat_62,Stat_63,Stat_64,Stat_65,Stat_66,Stat_67,Stat_68,Stat_69,Stat_70,Stat_71,id
0,1.850391,3.580182,1.738203,5.314874,89.422226,0.0,2626.199951,4187.0,8.639500e+13,7.0,2.0,57.0,-1.812031,-2.631380,-1.798073,0.0,-89.987045,0.0,0.0,3829.000000,0.000000e+00,1.0,2.0,15.0,-0.054638,-0.163923,-0.114302,0.045252,-7.805897,0.000000,46.009533,4027.514893,5.415475e+13,4.438860,2.000000,30.202068,-0.701660,-0.619076,-0.536432,0.007953,-32.948602,0.000000,2.520257,3958.000000,4.325125e+13,3.0,2.0,17.0,0.437897,0.148919,0.223770,0.036048,13.095750,0.0,24.750000,4146.0,6.978000e+13,6.0,2.0,38.0,0.633126,0.513286,0.500372,0.132576,34.917873,0.000000,205.862213,108.451317,1.876976e+13,1.825557,0.000000,11.773107,0745c390
1,1.928769,3.234613,2.475326,3.966906,89.080330,1.0,2628.199951,4146.0,8.639500e+13,7.0,2.0,243.0,-1.807955,-2.887664,-1.004992,0.0,-89.654587,0.0,0.0,3098.166748,0.000000e+00,1.0,2.0,223.0,0.113277,0.093139,-0.106038,0.028960,-6.065619,0.046508,56.437958,3829.466064,4.331149e+13,3.840885,2.000000,232.909103,-0.231743,-0.257600,-0.595426,0.000367,-37.326844,0.000000,4.000000,3724.000000,2.128500e+13,2.0,2.0,228.0,0.517859,0.542323,0.312333,0.020598,18.462269,0.0,27.490936,3958.0,6.511000e+13,5.0,2.0,238.0,0.507897,0.541129,0.603787,0.096825,44.034721,0.208482,206.625092,167.600983,2.509136e+13,1.957999,0.000000,5.701968,eaab7a96
2,1.021510,1.016589,1.746797,5.066334,86.987267,0.0,2618.199951,4183.0,8.636500e+13,7.0,3.0,134.0,-1.903281,-3.150104,-1.020313,0.0,-89.540176,0.0,0.0,3853.000000,4.500000e+10,1.0,3.0,97.0,-0.499738,0.046381,-0.181152,0.056544,-11.934993,0.000000,77.305130,4106.425781,4.481677e+13,3.148264,3.000000,100.144516,-0.873151,-0.255299,-0.485521,0.005643,-30.154542,0.000000,2.918126,4089.625000,2.888500e+13,3.0,3.0,98.0,-0.242422,0.381953,0.088555,0.048282,5.009753,0.0,21.022933,4140.0,6.094500e+13,4.0,3.0,100.0,0.454021,0.510668,0.412588,0.140594,27.367514,0.000000,274.848145,50.734318,2.038156e+13,1.169176,0.000000,5.653936,8ec2cc63
3,5.908000,2.083693,1.269051,6.134459,89.976074,0.0,2502.000000,6000.0,8.639500e+13,7.0,4.0,72.0,-1.684624,-2.405738,-1.023798,0.0,-89.968369,0.0,0.0,3468.000000,0.000000e+00,1.0,1.0,48.0,0.007430,0.007583,-0.196510,0.053544,-12.847143,0.000000,9.369678,3958.604492,4.836642e+13,4.273992,2.303057,60.025017,-0.530198,-0.412805,-0.556091,0.009947,-34.965618,0.000000,0.893617,3841.000000,3.526000e+13,3.0,1.0,53.0,0.536801,0.443383,0.084469,0.057278,4.816339,0.0,6.200000,4064.0,6.330000e+13,6.0,4.0,67.0,0.586100,0.542189,0.474437,0.103401,32.552841,0.000000,54.104408,122.706802,1.868773e+13,2.023705,1.487018,7.396456,b2987a65
4,3.231563,1.033620,1.071875,2.774382,89.300034,0.0,1046.800049,4199.0,8.601500e+13,7.0,4.0,76.0,-1.675859,-1.071042,-1.012266,0.0,-89.770241,0.0,0.0,3815.083252,3.500000e+10,1.0,4.0,20.0,0.086653,-0.115162,-0.138969,0.040399,-11.009835,0.000000,5.049157,3992.347656,5.833895e+13,4.541829,4.000000,46.192024,-0.224805,-0.444297,-0.685736,0.005364,-46.348264,0.000000,1.438378,3837.333252,5.161375e+13,3.0,4.0,32.0,0.544297,0.153125,0.347474,0.043690,20.726226,0.0,4.942201,4087.0,7.393625e+13,7.0,4.0,69.0,0.509845,0.494897,0.639449,0.090201,47.933723,0.000000,15.590773,126.121590,2.146206e+13,2.081796,0.000000,18.615358,7b8842c3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,1.491908,3.059684,2.099614,3.669502,89.025551,1.0,2576.399902,4191.0,8.639500e+13,7.0,4.0,161.0,-1.407426,-1.014350,-1.020204,0.0,-89.820068,0.0,0.0,3098.166748,0.000000e+00,1.0,4.0,138.0,-0.067798,0.006292,0.201008,0.038587,13.204196,0.005480,8.857800,3851.464844,4.327464e+13,3.911399,4.000000,149.392853,-0.601601,-0.312998,-0.234954,0.001691,-14.056123,0.000000,1.166667,3747.000000,2.141875e+13,2.0,4.0,144.0,0.438858,0.308362,0.662152,0.039577,42.374405,0.0,5.867146,3970.0,6.497625e+13,6.0,4.0,155.0,0.591072,0.481551,0.565229,0.093812,38.950920,0.070666,53.159111,166.704880,2.502804e+13,1.951508,0.000000,6.595387,cd68643b
992,1.353594,0.994583,0.996484,1.786410,81.665283,0.0,1526.599976,4194.0,8.514000e+13,7.0,2.0,130.0,-1.064844,-1.012995,-1.033333,0.0,-89.104843,0.0,0.0,3829.000000,4.325000e+12,1.0,1.0,75.0,0.097154,-0.356072,-0.043487,0.036847,-6.769969,0.000000,35.192787,4002.345703,4.686980e+13,3.730544,1.548954,101.264435,-0.000378,-0.797474,-0.947604,0.008236,-70.773838,0.000000,7.116813,3835.000000,4.487000e+13,3.0,1.0,75.0,0.221654,-0.087578,0.632786,0.038638,38.104811,0.0,19.533333,4181.0,4.759750e+13,5.0,2.0,126.0,0.269882,0.530971,0.701824,0.068496,51.202812,0.000000,77.692108,166.620529,7.143143e+12,1.230283,0.497806,25.009487,f8ff0bc8
993,0.999923,1.043029,1.547813,3.692727,89.333710,1.0,2592.199951,4178.0,8.639500e+13,7.0,1.0,79.0,-1.508058,-2.958281,-1.013423,0.0,-89.887924,0.0,0.0,3098.166748,0.000000e+00,1.0,1.0,56.0,-0.147508,-0.047232,-0.242875,0.027135,-18.903458,0.222337,10.387013,3841.772705,4.316802e+13,4.002807,1.000000,67.532288,-0.552659,-0.354082,-0.850300,0.000000,-58.557291,0.000000,0.555556,3741.000000,2.137000e+13,2.0,1.0,62.0,0.140716,0.280936,0.231454,0.012770,13.528161,0.0,5.281850,3958.0,6.502500e+13,6.0,1.0,73.0,0.478085,0.499994,0.622155,0.109624,48.017563,0.410910,75.709877,164.142853,2.506494e+13,1.929882,0.000000,6.580971,db23fbe4
994,1.004674,0.981576,0.999219,1.673958,88.629547,0.0,1875.199951,4183.0,8.639500e+13,7.0,1.0,155.0,-1.073320,-1.455156,-1.016536,0.0,-87.998444,0.0,0.0,4073.000000,0.000000e+00,1.0,1.0,153.0,-0.441574,-0.080691,-0.270330,0.037183,-17.535593,0.000000,11.325677,4123.798828,4.597792e+13,2.487963,1.000000,154.201294,-0.831641,-0.369779,-0.664401,0.009702,-41.512409,0.000000,2.748235,4099.000000,2.505000e+13,1.0,1.0,154.0,-0.214362,0.210247,0.034375,0.039810,1.885406,0.0,10.699164,4146.0,6.662500e+13,2.0,1.0,155.0,0.502446,0.457471,0.470241,0.064660,32.590225,0.000000,35.017689,28.219002,2.436134e+13,2.188225,0.000000,0.726917,687c85e7


In [4]:
# transform 
scale_train_ts = train_ts.drop('id', axis=1)
scale_train_ts_cols = scale_train_ts.columns
scaler = MinMaxScaler()
scaler.fit(scale_train_ts)
scale_train_ts = pd.DataFrame(scaler.transform(scale_train_ts), columns = scale_train_ts_cols)
scale_train_ts['id'] = train_ts['id']


scale_test_ts = test_ts.drop('id', axis=1)
scale_test_ts_cols = scale_test_ts.columns
scale_test_ts = pd.DataFrame(scaler.transform(scale_test_ts), columns = scale_test_ts_cols)
scale_test_ts['id'] = test_ts['id']




In [22]:
# List columns of time series
time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")

# add new features for csv
# train = create_feature_csv(train)
# test = create_feature_csv(test)





In [6]:
# merge csv and time series
train = pd.merge(train, scale_train_ts, how="left", on='id')
test = pd.merge(test, scale_test_ts, how="left", on='id')

train = train.drop('id', axis=1)
test = test.drop('id', axis=1)  

In [7]:
# choose features
featuresCols = ['Basic_Demos-Enroll_Season', 'Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-Season', 'CGAS-CGAS_Score', 'Physical-Season', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'Fitness_Endurance-Season', 'Fitness_Endurance-Max_Stage',
                'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',
                'FGC-Season', 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',
                'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone', 'BIA-Season',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'PAQ_A-Season', 'PAQ_A-PAQ_A_Total', 'PAQ_C-Season',
                'PAQ_C-PAQ_C_Total', 'SDS-Season', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T', 'PreInt_EduHx-Season',
                'PreInt_EduHx-computerinternet_hoursday', 'sii'] + time_series_cols

train = train[featuresCols]
train = train.dropna(subset='sii')

test = test[[c for c in featuresCols if c !="sii"]]

In [8]:
# categorical encoding 
not_numeric_data = ['Basic_Demos-Enroll_Season', 'CGAS-Season', 'Physical-Season', 
          'Fitness_Endurance-Season', 'FGC-Season', 'BIA-Season', 
          'PAQ_A-Season', 'PAQ_C-Season', 'SDS-Season', 'PreInt_EduHx-Season']

def categorical_encoding(df, columns, ref_df=None):
    for d in columns:
        df[d] = df[d].fillna('Missing') # Fill in NA values
        if ref_df is not None:
            # Using mapping from ref_df set ensures data consistency
            categories = ref_df[d].astype('category').cat.categories
            df[d] = pd.Categorical(df[d], categories=categories).codes
        else:
            df[d] = df[d].astype('category').cat.codes
    return df

train = categorical_encoding(train, not_numeric_data)
test = categorical_encoding(test, not_numeric_data, ref_df=train)
        

In [9]:
def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(non_rounded_pred, thresholds):
    return np.where(non_rounded_pred < thresholds[0], 0,
           np.where(non_rounded_pred < thresholds[1], 1,
           np.where(non_rounded_pred < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, non_rounded_pred):
    rounded_p = threshold_Rounder(non_rounded_pred, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

In [10]:
def train_and_evaluate_model(model_class, test_data):
    
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    # SKF = RepeatedStratifiedKFold (n_splits=n_splits, n_repeats = 3, random_state=SEED)
    
    train_result = []
    test_result = []
    
    non_rounded_pred = np.zeros(len(y), dtype=float) 
    rounded_pred = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

      
        model = clone(model_class)
        model.fit(X_train, y_train)

        # predict
        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

       
        non_rounded_pred[test_idx] = y_val_pred
        
       
        y_val_pred_rounded = y_val_pred.round(0).astype(int)

        
        rounded_pred[test_idx] = y_val_pred_rounded

       
        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_result.append(train_kappa)
        test_result.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")

    print(f"Mean Train QWK --> {np.mean(train_result):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_result):.4f}")

   
    KappaOptimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, non_rounded_pred), 
                              method='Nelder-Mead') 
  

    predicted_tuned = threshold_Rounder(non_rounded_pred, KappaOptimizer.x)
    tKappa = quadratic_weighted_kappa(y, predicted_tuned)

    print(f"----> || Optimized QWK SCORE :: {tKappa:.3f}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOptimizer.x)
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission,model,tKappa

In [11]:
# def objective(trial):
#     XGBoostParams = {
#         'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.05),
#         'max_depth': trial.suggest_int('max_depth', 6, 8),
#         'n_estimators': trial.suggest_int('n_estimators', 180, 500),
#         'subsample': trial.suggest_uniform('subsample', 0.7, 0.9),
#         'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.7, 0.9),
#         'reg_alpha': trial.suggest_loguniform('reg_alpha', 1, 7),
#         'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-5, 3),
#         'tree_method': 'gpu_hist',
#         'random_state': SEED
#     }

#     # CatBoostParams = {
#     #    'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.05),  
#     #    'depth': trial.suggest_int('depth', 6, 8),  
#     #    'iterations': trial.suggest_int('iterations', 200, 400),  
#     #    'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 2, 7),  
#     #    'border_count': 128,  
#     #    'random_seed': SEED, 
#     #    'task_type': 'GPU', 
#     #    'verbose': 0  
#     # }

#     # LgbmParams = {
#     #    'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.05),
#     #    'max_depth': trial.suggest_int('max_depth', 10, 14),
#     #    'num_leaves': 60,
#     #    'min_data_in_leaf': 14,
#     #    'feature_fraction': trial.suggest_uniform('feature_fraction', 0.7, 0.9),
#     #    'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.7, 0.9),
#     #    'bagging_freq': 2,
#     #    'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-5, 3),
#     #    'lambda_l2': trial.suggest_loguniform('lambda_l2', 1, 6),
#     #    'random_state': 42,
#     #    'verbose': -1,
#     #    'n_estimators': trial.suggest_int('n_estimators', 200, 500)  
#     # }
    
#     # model_class = CatBoostRegressor(**CatBoostParams)
#     # model_class = lgb.LGBMRegressor(**LgbmParams)
#     model_class = XGBRegressor(**XGBoostParams)
#     submission, model, tKappa = train_and_evaluate_model(model_class, test)
#     return tKappa
    


# sampler = TPESampler(seed=SEED) # perform parameter selection based on conditional probability
# # If the current result does not exceed the average value of previous tests => stop early
# pruner = MedianPruner(n_warmup_steps=10) 
# study = optuna.create_study(direction="maximize", sampler=sampler, pruner=pruner)
# study.optimize(objective, n_trials=60)

# print("Best trial:")
# trial = study.best_trial
# print(trial.params)
# print(f"Best QWK Score: {trial.value}")

In [12]:
Params = {
    'learning_rate': 0.04213252223,
    'max_depth': 12,
    'num_leaves': 478,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.784,
    'bagging_freq': 4,
    'lambda_l1': 10,  
    'lambda_l2': 0.01  
}


# XGBoost parameters
XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 234,
    'subsample': 0.7912213,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  
    'reg_lambda': 5,  
    'random_state': SEED
}


CatBoost_Params = {
    'learning_rate': 0.05,
    'depth': 6,
    'iterations': 212,
    'random_seed': SEED,
    'cat_features': not_numeric_data,
    'verbose': 0,
    'l2_leaf_reg': 10  # Increase this value
}

# Create model instances
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1, n_estimators=300)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

In [20]:
# # fine tune Voting Regressor
# from scipy.optimize import minimize 
# def optimize_weights(weights):

#     print(f"Current Weights: {weights}")
#     ensemble = VotingRegressor(estimators=[
#         ('lightgbm', Light),
#         ('xgboost', XGB_Model),
#         ('catboost', CatBoost_Model)
#     ], weights=weights)

#     submission, model, tkappa = TrainML(ensemble, test)
#     return -tkappa  

# # Initial initial weight
# initial_weights = [1.0, 1.0, 1.0]

# result = minimize(
#     optimize_weights, 
#     initial_weights, 
#     method='Nelder-Mead', 
#     options={'disp': True, 'maxiter': 100} 
# )

# best_weights = result.x
# print("Best Weights:", best_weights)
best_weights = [4.9,5.2,4.9]

In [21]:
# Combine models using Voting Regressor
voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
],weights=best_weights)


submission, model, tkappa = train_and_evaluate_model(voting_model, test)

submission.to_csv('submission.csv', index=False)

Training Folds:  20%|██        | 1/5 [00:15<01:03, 15.90s/it]

Fold 1 - Train QWK: 0.7659, Validation QWK: 0.3730


Training Folds:  40%|████      | 2/5 [00:31<00:47, 15.77s/it]

Fold 2 - Train QWK: 0.7677, Validation QWK: 0.4313


Training Folds:  60%|██████    | 3/5 [00:46<00:30, 15.50s/it]

Fold 3 - Train QWK: 0.7782, Validation QWK: 0.4110


Training Folds:  80%|████████  | 4/5 [01:02<00:15, 15.66s/it]

Fold 4 - Train QWK: 0.7802, Validation QWK: 0.3602


Training Folds: 100%|██████████| 5/5 [01:17<00:00, 15.48s/it]

Fold 5 - Train QWK: 0.7724, Validation QWK: 0.3967
Mean Train QWK --> 0.7729
Mean Validation QWK ---> 0.3945





----> || Optimized QWK SCORE :: 0.457
