# Import Dependency

In [1]:
%load_ext cudf.pandas

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import *
import xgboost as xgb
from sklearn.model_selection import train_test_split, KFold

from tqdm import tqdm
import os

import warnings
warnings.filterwarnings("ignore")

  if entities is not ():


# Configuration

In [2]:
class config:
    data_dir = '/kaggle/input/playground-series-s6e1'
    sub_path = '/kaggle/input/playground-series-s6e1/sample_submission.csv'
    org_path = '/kaggle/input/exam-score-prediction-dataset/Exam_Score_Prediction.csv'
    seed = 42
    random_state = 42
    test_size = 0.2
    target = 'exam_score'
    V = 1

cfg = config()

# Data Ingestion

In [3]:
%%time

train = pd.read_csv(os.path.join(cfg.data_dir,'train.csv'),index_col='id')
test  = pd.read_csv(os.path.join(cfg.data_dir,'test.csv'),index_col='id')
org   = pd.read_csv(cfg.org_path)

CPU times: user 1.07 s, sys: 392 ms, total: 1.46 s
Wall time: 1.85 s


# Feature Engineering

In [4]:
cat_cols = [col for col in test.columns if test[col].dtype in ['O']]
num_cols = [col for col in test.columns if test[col].dtype in ['float64','int64']]
BASE  = [col for col in train.columns if col not in [cfg.target]]

print("CAT COLS",len(cat_cols))
print(cat_cols)

print("NUM_COLS", len(num_cols))
print(num_cols)

print("BASE_COLS", len(BASE))
print(BASE)

CAT COLS 7
['gender', 'course', 'internet_access', 'sleep_quality', 'study_method', 'facility_rating', 'exam_difficulty']
NUM_COLS 4
['age', 'study_hours', 'class_attendance', 'sleep_hours']
BASE_COLS 11
['age', 'gender', 'course', 'study_hours', 'class_attendance', 'internet_access', 'sleep_hours', 'sleep_quality', 'study_method', 'facility_rating', 'exam_difficulty']


## Remove Outliers

In [5]:
for col in num_cols:
    # Calculate bounds on TRAIN only to avoid leakage
    lower = train[col].quantile(0.01)
    upper = train[col].quantile(0.99)
    
    # Clip both Train and Test to these bounds
    train[col] = train[col].clip(lower, upper)
    test[col] = test[col].clip(lower, upper)

## Original Feature

In [6]:
%%time

ORIG = []
for col in BASE:
    # MEAN
    mean_map = org.groupby(col)[cfg.target].mean()
    new_mean_col_name = f"orig_mean_{col}"
    mean_map.name = new_mean_col_name
    
    train = train.merge(mean_map, on=col, how='left')
    test = test.merge(mean_map, on=col, how='left')
    ORIG.append(new_mean_col_name)

    # COUNT
    new_count_col_name = f"orig_count_{col}"
    count_map = org.groupby(col).size().reset_index(name=new_count_col_name)
    
    train = train.merge(count_map, on=col, how='left')
    test = test.merge(count_map, on=col, how='left')
    ORIG.append(new_count_col_name)

print(len(ORIG), 'Orig Features Created!!')

22 Orig Features Created!!
CPU times: user 1.01 s, sys: 198 ms, total: 1.21 s
Wall time: 1.22 s


In [7]:
for col in ORIG:
    print(col,end='')
    if 'mean' in col:
        train[col] = train[col].fillna(org[cfg.target].mean())
        test[col] = test[col].fillna(org[cfg.target].mean())
    else:
        train[col] = train[col].fillna(0)
        test[col] = test[col].fillna(0)

orig_mean_ageorig_count_ageorig_mean_genderorig_count_genderorig_mean_courseorig_count_courseorig_mean_study_hoursorig_count_study_hoursorig_mean_class_attendanceorig_count_class_attendanceorig_mean_internet_accessorig_count_internet_accessorig_mean_sleep_hoursorig_count_sleep_hoursorig_mean_sleep_qualityorig_count_sleep_qualityorig_mean_study_methodorig_count_study_methodorig_mean_facility_ratingorig_count_facility_ratingorig_mean_exam_difficultyorig_count_exam_difficulty

## Round Features

In [8]:
%%time

ROUND = []
for k in range(1,3):
    n = f"round{k}"
    train[n] = train["study_hours"].round(k)
    test[n] = test["study_hours"].round(k)
    ROUND.append(n)

print(f"We have {len(ROUND)} Round Features")
print(ROUND)

We have 2 Round Features
['round1', 'round2']
CPU times: user 7.85 ms, sys: 1.02 ms, total: 8.88 ms
Wall time: 19.2 ms


## Original Score

In [9]:
NEW_COLS = []
tmp = org.groupby('study_hours').exam_score.mean()
tmp.name = "org_score"
train = train.merge(tmp,on='study_hours',how='left')
test  = test.merge(tmp,on='study_hours',how='left')
NEW_COLS.append("org_score")

## Origianl Price from Rounded Study hours.

In [10]:
for k in range(1,3):
    n = f"round{k}"
    org[n] = org["study_hours"].round(k)
    tmp = org.groupby(n).exam_score.mean()
    tmp.name = f"org_score_r{k}"
    train = train.merge(tmp, on=n, how="left")
    test = test.merge(tmp, on=n, how="left")
    NEW_COLS.append(f"org_score_r{k}")

print(f"We have {len(NEW_COLS)} New Features")
print(NEW_COLS)

We have 3 New Features
['org_score', 'org_score_r1', 'org_score_r2']


## Digit Extraction

In [11]:
for k in range(1,3):
    train[f'digit_{k}'] = ((train['study_hours']*10**k)%10).fillna(-1).astype("int8")
    test[f"digit_{k}"]  = ((test['study_hours']*10**k)%10).fillna(-1).astype('int8')
DIGITS = [f"digit_{k}" for k in range(1,3)] 
print(f"We have {len(DIGITS)} Digit Features")
print(DIGITS)

We have 2 Digit Features
['digit_1', 'digit_2']


## Interaction Features

In [12]:
DIGIT_INT = []
for i in range(2):
    for j in range(i+1,2):
        n = f"digit_{i+1}_{j+1}"
        train[n] = ((train[f'digit_{i+1}']+1)*11 + train[f'digit_{j+1}']+1).astype("int8")
        test[n] = ((test[f'digit_{i+1}']+1)*11 + test[f'digit_{j+1}']+1).astype("int8")
        DIGIT_INT.append(n)

print(f"We have {len(DIGIT_INT)} Digit Features")
print(DIGIT_INT)

We have 1 Digit Features
['digit_1_2']


## Combinational Features

In [13]:
COMBO = []
for col in cat_cols:
    comb = pd.concat([train[col],test[col]],axis=0)
    tmp,_ = pd.factorize(comb)
    train[col] = tmp[:len(train)]
    test[col]  = tmp[len(train):]
    n = f"{col}_sh"
    train[n] = train[col]*100 + train['study_hours']
    test[n] = test[col]*100 + test['study_hours']
    COMBO.append(n)

print(f"We engineer {len(COMBO)} new columns!")
print( COMBO )

We engineer 7 new columns!
['gender_sh', 'course_sh', 'internet_access_sh', 'sleep_quality_sh', 'study_method_sh', 'facility_rating_sh', 'exam_difficulty_sh']


## Bigram Feature

In [14]:
BIGRAM = []
for i in range(len(cat_cols)):
    for j in range(i,len(cat_cols)):
        new_col = f"{cat_cols[i]}_{cat_cols[j]}"
        train[new_col] = train[cat_cols[i]].astype(str)+'_'+train[cat_cols[j]].astype(str)
        test[new_col]  = test[cat_cols[i]].astype(str)+'_'+test[cat_cols[j]].astype(str)
        comb = pd.concat([train[new_col],test[new_col]],axis=0)
        tmp,_ = pd.factorize(comb)
        train[new_col] = tmp[:len(train)]
        test[new_col]  = tmp[len(train):]
        BIGRAM.append(new_col)

print(f"We engineer {len(BIGRAM)} new columns!")
print( BIGRAM )

We engineer 28 new columns!
['gender_gender', 'gender_course', 'gender_internet_access', 'gender_sleep_quality', 'gender_study_method', 'gender_facility_rating', 'gender_exam_difficulty', 'course_course', 'course_internet_access', 'course_sleep_quality', 'course_study_method', 'course_facility_rating', 'course_exam_difficulty', 'internet_access_internet_access', 'internet_access_sleep_quality', 'internet_access_study_method', 'internet_access_facility_rating', 'internet_access_exam_difficulty', 'sleep_quality_sleep_quality', 'sleep_quality_study_method', 'sleep_quality_facility_rating', 'sleep_quality_exam_difficulty', 'study_method_study_method', 'study_method_facility_rating', 'study_method_exam_difficulty', 'facility_rating_facility_rating', 'facility_rating_exam_difficulty', 'exam_difficulty_exam_difficulty']


## Distance Features

In [15]:
def add_engineered_features(df):
    eps = 1e-5
    df['_study_hours_sin'] = np.sin(2*np.pi*df['study_hours']/12)
    df['_class_attendance_sin'] = np.sin(2*np.pi*df['class_attendance']/12)
    for col in num_cols:
        df[f'log_{col}'] = np.log1p(df[col])
        df[f'{col}_sq'] = df[col] ** 2
    df['age_squared'] = df['age'] ** 2
    df['study_att'] = df['study_hours'] * df['class_attendance']
    df['study_sleep'] = df['study_hours'] * df['sleep_hours']
    df['att_sleep'] = df['class_attendance'] * df['sleep_hours']
    df['age_study'] = df['age'] * df['study_hours']
    df['study_over_sleep'] = df['study_hours'] / (df['sleep_hours'] + eps)
    df['att_over_sleep'] = df['class_attendance'] / (df['sleep_hours'] + eps)
    df['att_over_study'] = df['class_attendance'] / (df['study_hours'] + eps)
    df['high_att_high_study'] = ((df['class_attendance'] >= 90) & (df['study_hours'] >= 6)).astype(int)
    df['ideal_sleep'] = ((df['sleep_hours'] >= 7) & (df['sleep_hours'] <= 9)).astype(int)
    df['high_study'] = (df['study_hours'] >= 7).astype(int)
    df['study_bin'] = pd.cut(df['study_hours'], 5, labels=False)
    df['att_bin'] = pd.cut(df['class_attendance'], 5, labels=False)
    df['sleep_bin'] = pd.cut(df['sleep_hours'], 5, labels=False)
    df['age_bin'] = pd.cut(df['age'], 5, labels=False)
    df['efficiency'] = (df['study_hours'] * df['class_attendance']) / (df['sleep_hours'] + 1)
    df['sleep_gap_8'] = (df['sleep_hours'] - 8).abs()
    df['att_gap_100'] = (df['class_attendance'] - 100).abs()
    return df

In [16]:
train = add_engineered_features(train)
test  = add_engineered_features(test)

In [17]:
PUBLIC_FETR = [
    "_study_hours_sin",
    "_class_attendance_sin",
    "log_study_hours", "study_hours_sq",
    "log_class_attendance", "class_attendance_sq",
    "log_sleep_hours", "sleep_hours_sq",
    "log_age", "age_sq",
    "age_squared",
    "study_att",
    "study_sleep",
    "att_sleep",
    "age_study",
    "study_over_sleep",
    "att_over_sleep",
    "att_over_study",
    "high_att_high_study",
    "ideal_sleep",
    "high_study",
    "study_bin",
    "att_bin",
    "sleep_bin",
    "age_bin",
    "efficiency",
    "sleep_gap_8",
    "att_gap_100"
]

print(f"We engineer {len(PUBLIC_FETR)} new columns!")
print( PUBLIC_FETR )

We engineer 28 new columns!
['_study_hours_sin', '_class_attendance_sin', 'log_study_hours', 'study_hours_sq', 'log_class_attendance', 'class_attendance_sq', 'log_sleep_hours', 'sleep_hours_sq', 'log_age', 'age_sq', 'age_squared', 'study_att', 'study_sleep', 'att_sleep', 'age_study', 'study_over_sleep', 'att_over_sleep', 'att_over_study', 'high_att_high_study', 'ideal_sleep', 'high_study', 'study_bin', 'att_bin', 'sleep_bin', 'age_bin', 'efficiency', 'sleep_gap_8', 'att_gap_100']


## META features

In [18]:
meta_train_1 = pd.read_csv("/kaggle/input/hill-climbing-for-ensembling/oof.csv")
meta_test_1  = pd.read_csv("/kaggle/input/hill-climbing-for-ensembling/submission.csv")

meta_train_2 = pd.read_csv('/kaggle/input/ens-ft-transformer-tabm-autogluon-xgboost-resnet/oof.csv')
meta_test_2  = pd.read_csv('/kaggle/input/ens-ft-transformer-tabm-autogluon-xgboost-resnet/submission.csv')

meta_train_3 = pd.read_csv('/kaggle/input/s6e1-meta-learner-lr-lb-8-54477/lr_ensemble_oof.csv')
meta_test_3  = pd.read_csv('/kaggle/input/s6e1-meta-learner-lr-lb-8-54477/lr_ensemble_sub.csv')

train["meta_1"] = meta_train_1['exam_score']
test['meta_1']  = meta_test_1['exam_score']

train['meta_2'] = meta_train_2['exam_score']
test['meta_2']  = meta_test_2['exam_score']

train['meta_3'] = meta_train_3['exam_score']
test['meta_3']  = meta_test_3['exam_score']

META_FETR = ['meta_1','meta_2','meta_3']

## Formula

In [19]:
def formula(df):
    f = (6*df.study_hours + 0.35*df.class_attendance + 1.5*df.sleep_hours +
                 5*(df.sleep_quality=='good') + -5*(df.sleep_quality=='poor') +
                 10*(df.study_method=='coaching') + 5*(df.study_method=='mixed') + 2*(df.study_method=='group study') + 1*(df.study_method=='online videos') +
                 4*(df.facility_rating=='high') + -4*(df.facility_rating=='low') )
    return f

train['meta_0'] = formula(train)
test['meta_0']  = formula(test)
META_FETR.append('meta_0')

print(f"We engineer {len(META_FETR)} new columns!")
print( META_FETR )

We engineer 4 new columns!
['meta_1', 'meta_2', 'meta_3', 'meta_0']


# Target Encoding

In [20]:
from sklearn.base import BaseEstimator, TransformerMixin

class TargetEncoder(BaseEstimator, TransformerMixin):
    """
    Target Encoder that supports multiple aggregation functions,
    internal cross-validation for leakage prevention, and smoothing.

    Parameters
    ----------
    cols_to_encode : list of str
        List of column names to be target encoded.

    aggs : list of str, default=['mean']
        List of aggregation functions to apply. Any function accepted by
        pandas' `.agg()` method is supported, such as:
        'mean', 'std', 'var', 'min', 'max', 'skew', 'nunique', 
        'count', 'sum', 'median'.
        Smoothing is applied only to the 'mean' aggregation.

    cv : int, default=5
        Number of folds for cross-validation in fit_transform.

    smooth : float or 'auto', default='auto'
        The smoothing parameter `m`. A larger value puts more weight on the 
        global mean. If 'auto', an empirical Bayes estimate is used.
        
    drop_original : bool, default=False
        If True, the original columns to be encoded are dropped.
    """
    def __init__(self, cols_to_encode, aggs=['mean'], cv=5, smooth='auto', drop_original=False):
        self.cols_to_encode = cols_to_encode
        self.aggs = aggs
        self.cv = cv
        self.smooth = smooth
        self.drop_original = drop_original
        self.mappings_ = {}
        self.global_stats_ = {}

    def fit(self, X, y):
        """
        Learn mappings from the entire dataset.
        These mappings are used for the transform method on validation/test data.
        """
        temp_df = X.copy()
        temp_df['target'] = y

        # Learn global statistics for each aggregation
        for agg_func in self.aggs:
            self.global_stats_[agg_func] = y.agg(agg_func)

        # Learn category-specific mappings
        for col in self.cols_to_encode:
            self.mappings_[col] = {}
            for agg_func in self.aggs:
                mapping = temp_df.groupby(col)['target'].agg(agg_func)
                self.mappings_[col][agg_func] = mapping
        
        return self

    def transform(self, X):
        """
        Apply learned mappings to the data.
        Unseen categories are filled with global statistics.
        """
        X_transformed = X.copy()
        for col in self.cols_to_encode:
            for agg_func in self.aggs:
                new_col_name = f'TE_{col}_{agg_func}'
                map_series = self.mappings_[col][agg_func]
                X_transformed[new_col_name] = X[col].map(map_series)
                X_transformed[new_col_name].fillna(self.global_stats_[agg_func], inplace=True)
        
        if self.drop_original:
            X_transformed.drop(columns=self.cols_to_encode, inplace=True)
            
        return X_transformed

    def fit_transform(self, X, y):
        """
        Fit and transform the data using internal cross-validation to prevent leakage.
        """
        # First, fit on the entire dataset to get global mappings for transform method
        self.fit(X, y)

        # Initialize an empty DataFrame to store encoded features
        encoded_features = pd.DataFrame(index=X.index)
        
        kf = KFold(n_splits=self.cv, shuffle=True, random_state=42)

        for train_idx, val_idx in kf.split(X, y):
            X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
            X_val = X.iloc[val_idx]
            
            temp_df_train = X_train.copy()
            temp_df_train['target'] = y_train

            for col in self.cols_to_encode:
                # --- Calculate mappings only on the training part of the fold ---
                for agg_func in self.aggs:
                    new_col_name = f'TE_{col}_{agg_func}'
                    
                    # Calculate global stat for this fold
                    fold_global_stat = y_train.agg(agg_func)
                    
                    # Calculate category stats for this fold
                    mapping = temp_df_train.groupby(col)['target'].agg(agg_func)

                    # --- Apply smoothing only for 'mean' aggregation ---
                    if agg_func == 'mean':
                        counts = temp_df_train.groupby(col)['target'].count()
                        
                        m = self.smooth
                        if self.smooth == 'auto':
                            # Empirical Bayes smoothing
                            variance_between = mapping.var()
                            avg_variance_within = temp_df_train.groupby(col)['target'].var().mean()
                            if variance_between > 0:
                                m = avg_variance_within / variance_between
                            else:
                                m = 0  # No smoothing if no variance between groups
                        
                        # Apply smoothing formula
                        smoothed_mapping = (counts * mapping + m * fold_global_stat) / (counts + m)
                        encoded_values = X_val[col].map(smoothed_mapping)
                    else:
                        encoded_values = X_val[col].map(mapping)
                    
                    # Store encoded values for the validation fold
                    encoded_features.loc[X_val.index, new_col_name] = encoded_values.fillna(fold_global_stat)

        # Merge with original DataFrame
        X_transformed = X.copy()
        for col in encoded_features.columns:
            X_transformed[col] = encoded_features[col]
            
        if self.drop_original:
            X_transformed.drop(columns=self.cols_to_encode, inplace=True)
            
        return X_transformed

# Model Training

In [21]:
FEATURES = ORIG + num_cols + cat_cols + ROUND + COMBO + DIGITS + DIGIT_INT + NEW_COLS + PUBLIC_FETR + META_FETR + BIGRAM
print(f"Features {len(FEATURES)}")
print(FEATURES)

Features 108
['orig_mean_age', 'orig_count_age', 'orig_mean_gender', 'orig_count_gender', 'orig_mean_course', 'orig_count_course', 'orig_mean_study_hours', 'orig_count_study_hours', 'orig_mean_class_attendance', 'orig_count_class_attendance', 'orig_mean_internet_access', 'orig_count_internet_access', 'orig_mean_sleep_hours', 'orig_count_sleep_hours', 'orig_mean_sleep_quality', 'orig_count_sleep_quality', 'orig_mean_study_method', 'orig_count_study_method', 'orig_mean_facility_rating', 'orig_count_facility_rating', 'orig_mean_exam_difficulty', 'orig_count_exam_difficulty', 'age', 'study_hours', 'class_attendance', 'sleep_hours', 'gender', 'course', 'internet_access', 'sleep_quality', 'study_method', 'facility_rating', 'exam_difficulty', 'round1', 'round2', 'gender_sh', 'course_sh', 'internet_access_sh', 'sleep_quality_sh', 'study_method_sh', 'facility_rating_sh', 'exam_difficulty_sh', 'digit_1', 'digit_2', 'digit_1_2', 'org_score', 'org_score_r1', 'org_score_r2', '_study_hours_sin', '_c

In [22]:
STATS = ["mean","std","count","nunique","median","min","max","skew"]
STATS2 = ["mean","std"]

In [23]:
params = {
    "objective": "reg:squarederror",
    'learning_rate': 0.007,
    'max_depth': 7,
    # 'scale_pos_weight':1.2,
    'subsample': 0.8,
    'reg_lambda':3.0, 
    'colsample_bynode': 0.3,
    'min_child_weight':10,
    'n_jobs': -1,
    'eval_metric': 'rmse',
    'device': 'cuda',  
    'enable_categorical' : True
    }

In [24]:
kf = KFold(n_splits = 5, random_state = cfg.random_state, shuffle = True)

X = train[FEATURES]
y = train[cfg.target]
oof = np.zeros(len(train))
preds = np.zeros(len(test))

In [25]:
%%time

for fold,(train_indx,test_indx) in enumerate(kf.split(X,y)):
    print(f"#### Fold {fold+1} ####")
    x_train, y_train = X.iloc[train_indx], y.iloc[train_indx]
    x_valid , y_valid  = X.iloc[test_indx],  y.iloc[test_indx]
    x_test = test[FEATURES].copy()

    TE1 = TargetEncoder(cols_to_encode=num_cols, cv=5, smooth=1.0,aggs=STATS,drop_original=False)
    x_train = TE1.fit_transform(x_train,y_train)
    x_valid = TE1.transform(x_valid)
    x_test  = TE1.transform(x_test)

    TE2 = TargetEncoder(cols_to_encode=ROUND, cv=5, smooth=1.0,aggs=STATS2,drop_original=True)
    x_train = TE2.fit_transform(x_train,y_train)
    x_valid = TE2.transform(x_valid)
    x_test  = TE2.transform(x_test)

    TE3 = TargetEncoder(cols_to_encode=COMBO, cv=5, smooth=1.0,aggs=STATS2,drop_original=True)
    x_train = TE3.fit_transform(x_train,y_train)
    x_valid = TE3.transform(x_valid)
    x_test  = TE3.transform(x_test)

    TE4 = TargetEncoder(cols_to_encode=cat_cols, cv=5, smooth=1.0,aggs=STATS2,drop_original=False)
    x_train = TE4.fit_transform(x_train,x_train['study_hours'])
    x_valid = TE4.transform(x_valid)
    x_test  = TE4.transform(x_test)

    TE5 = TargetEncoder(cols_to_encode=DIGIT_INT, cv=5, smooth=1.0,aggs=STATS,drop_original=False)
    x_train = TE5.fit_transform(x_train,y_train)
    x_valid = TE5.transform(x_valid)
    x_test  = TE5.transform(x_test)

    # TE6 = TargetEncoder(cols_to_encode=BIGRAM, cv=5, smooth=1.0,aggs=STATS,drop_original=False)
    # x_train = TE6.fit_transform(x_train,y_train)
    # x_valid = TE6.transform(x_valid)
    # x_test  = TE6.transform(x_test)

    # TE7 = TargetEncoder(cols_to_encode=BIGRAM, cv=5, smooth=1.0,aggs=STATS,drop_original=False)
    # x_train = TE7.fit_transform(x_train,x_train['study_hours'])
    # x_valid = TE7.transform(x_valid)
    # x_test  = TE7.transform(x_test)

    # x_train[cat_cols] = x_train[cat_cols].astype('category')
    # x_valid[cat_cols] = x_valid[cat_cols].astype('category')
    # x_test[cat_cols]  = x_test[cat_cols].astype('category')
    
    dtrain = xgb.DMatrix(x_train,label=y_train)
    dval   = xgb.DMatrix(x_valid,label=y_valid)
    dtest  = xgb.DMatrix(x_test)

    params['random_state'] = fold+cfg.random_state
    
    model  = xgb.train(
        params = params,
        dtrain = dtrain,
        num_boost_round = 2_000,
        evals = [(dtrain,'train'),(dval,'valid')],
        early_stopping_rounds = 200,
        verbose_eval = 500
    )
    oof[test_indx] = model.predict(dval, iteration_range=(0,model.best_iteration+1))
    preds += model.predict(dtest, iteration_range=(0,model.best_iteration+1))
    score = mean_squared_error(y_valid,oof[test_indx])
    print(f"FOLD {fold+1} RMSE : {np.sqrt(score):.5f}")

preds /= 5

#### Fold 1 ####
[0]	train-rmse:18.82735	valid-rmse:18.75553
[500]	train-rmse:8.54601	valid-rmse:8.59393
[934]	train-rmse:8.48820	valid-rmse:8.58213
FOLD 1 RMSE : 8.58152
#### Fold 2 ####
[0]	train-rmse:18.81600	valid-rmse:18.79873
[500]	train-rmse:8.54564	valid-rmse:8.59684
[929]	train-rmse:8.48897	valid-rmse:8.58577
FOLD 2 RMSE : 8.58502
#### Fold 3 ####
[0]	train-rmse:18.81403	valid-rmse:18.80391
[500]	train-rmse:8.54663	valid-rmse:8.59509
[981]	train-rmse:8.48495	valid-rmse:8.58082
FOLD 3 RMSE : 8.58035
#### Fold 4 ####
[0]	train-rmse:18.79925	valid-rmse:18.86361
[500]	train-rmse:8.54480	valid-rmse:8.60454
[1000]	train-rmse:8.48240	valid-rmse:8.58928
[1125]	train-rmse:8.47137	valid-rmse:8.58975
FOLD 4 RMSE : 8.58911
#### Fold 5 ####
[0]	train-rmse:18.80543	valid-rmse:18.84062
[500]	train-rmse:8.53929	valid-rmse:8.62510
[1000]	train-rmse:8.47695	valid-rmse:8.60926
[1033]	train-rmse:8.47411	valid-rmse:8.60945
FOLD 5 RMSE : 8.60903
CPU times: user 13min 4s, sys: 4min 2s, total: 17min 

In [26]:
overall_rmse = mean_squared_error(y,oof)
print(f"OVERALL ROC SCORE: {np.sqrt(overall_rmse):.5f}")

OVERALL ROC SCORE: 8.58901


In [27]:
np.save(f'oof{cfg.V}.npy', oof)
np.save(f'preds{cfg.V}.npy', preds)

In [28]:
sub = pd.read_csv(cfg.sub_path)
sub[cfg.target] = preds
display(sub.head())
sub.to_csv("submission.csv",index=False)

Unnamed: 0,id,exam_score
0,630000,69.512566
1,630001,67.525011
2,630002,90.708458
3,630003,56.885264
4,630004,46.820876
