In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns
import polars as pl
import warnings
import os

from sklearn.base import clone
from tqdm import tqdm
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
from scipy.optimize import minimize
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor

from IPython.display import clear_output
from colorama import Fore, Style
import lightgbm as lgb

warnings.filterwarnings('ignore', category=FutureWarning)
sns.set(style="whitegrid")
%matplotlib inline

SEED = 100
n_splits = 6

## Data Processing

### Overview

In [2]:
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')
data_dictionary = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/data_dictionary.csv')

In [3]:
print(f"Train shape: {train.shape}")
train.head()

Train shape: (3960, 82)


Unnamed: 0,id,Basic_Demos-Enroll_Season,Basic_Demos-Age,Basic_Demos-Sex,CGAS-Season,CGAS-CGAS_Score,Physical-Season,Physical-BMI,Physical-Height,Physical-Weight,...,PCIAT-PCIAT_18,PCIAT-PCIAT_19,PCIAT-PCIAT_20,PCIAT-PCIAT_Total,SDS-Season,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-Season,PreInt_EduHx-computerinternet_hoursday,sii
0,00008ff9,Fall,5,0,Winter,51.0,Fall,16.877316,46.0,50.8,...,4.0,2.0,4.0,55.0,,,,Fall,3.0,2.0
1,000fd460,Summer,9,0,,,Fall,14.03559,48.0,46.0,...,0.0,0.0,0.0,0.0,Fall,46.0,64.0,Summer,0.0,0.0
2,00105258,Summer,10,1,Fall,71.0,Fall,16.648696,56.5,75.6,...,2.0,1.0,1.0,28.0,Fall,38.0,54.0,Summer,2.0,0.0
3,00115b9f,Winter,9,0,Fall,71.0,Summer,18.292347,56.0,81.6,...,3.0,4.0,1.0,44.0,Summer,31.0,45.0,Winter,0.0,1.0
4,0016bb22,Spring,18,1,Summer,,,,,,...,,,,,,,,,,


In [4]:
print(f"Test shape: {test.shape}")
test_ids = test['id']
test.head()

Test shape: (20, 59)


Unnamed: 0,id,Basic_Demos-Enroll_Season,Basic_Demos-Age,Basic_Demos-Sex,CGAS-Season,CGAS-CGAS_Score,Physical-Season,Physical-BMI,Physical-Height,Physical-Weight,...,BIA-BIA_TBW,PAQ_A-Season,PAQ_A-PAQ_A_Total,PAQ_C-Season,PAQ_C-PAQ_C_Total,SDS-Season,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-Season,PreInt_EduHx-computerinternet_hoursday
0,00008ff9,Fall,5,0,Winter,51.0,Fall,16.877316,46.0,50.8,...,32.6909,,,,,,,,Fall,3.0
1,000fd460,Summer,9,0,,,Fall,14.03559,48.0,46.0,...,27.0552,,,Fall,2.34,Fall,46.0,64.0,Summer,0.0
2,00105258,Summer,10,1,Fall,71.0,Fall,16.648696,56.5,75.6,...,,,,Summer,2.17,Fall,38.0,54.0,Summer,2.0
3,00115b9f,Winter,9,0,Fall,71.0,Summer,18.292347,56.0,81.6,...,45.9966,,,Winter,2.451,Summer,31.0,45.0,Winter,0.0
4,0016bb22,Spring,18,1,Summer,,,,,,...,,Summer,1.04,,,,,,,


Age: `Basic_Demos-Age`

Sex: `Basic_Demos-Sex`

Internet Usage: `PreInt_EduHx-computerinternet_hoursday`

CGAS score: `CGAS-CGAS_Score`    (đánh giá chức năng tổng quát của trẻ em, điểm càng thấp càng có vấn đề)

Physical: `Physical-BMI`, `Physical-Height`, `Physical-Weight`, `Physical-Waist_Circumference`, `Physical-Diastolic_BP`, `Physical-HeartRate`, `Physical-Systolic_BP` (các thang đo sức khỏe vật lý) 

FGC: `Fitness_Endurance-Max_Stage`, `Fitness_Endurance-Time_Mins`, `Fitness_Endurance-Time_Sec`, `FGC-FGC_CU`, `FGC-FGC_CU_Zone`, `FGC-FGC_GSND`,
       `FGC-FGC_GSND_Zone`, `FGC-FGC_GSD`, `FGC-FGC_GSD_Zone`, `FGC-FGC_PU`,
       `FGC-FGC_PU_Zone`, `FGC-FGC_SRL`, `FGC-FGC_SRL_Zone`, `FGC-FGC_SRR`,
       `FGC-FGC_SRR_Zone`, `FGC-FGC_TL`, `FGC-FGC_TL_Zone`
       (các thang đo đánh giá tim mạch, sức mạnh cơ bắp, linh hoạt qua các bài kiểm tra sức bền thể chất)


BIA:  'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
       'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
       'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
       'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
       'BIA-BIA_TBW' (theo dõi các yếu tố thành phần cơ thể như nước, mỡ, cơ bắp)

SDS: đánh giá rối loạn giấc ngủ ở trẻ

PAQ: đánh giá mức độ tham gia các hoạt động thể chất trong 10 ngày gần nhất

PCIAT: thang đo hành vi và đặc tính khi sử dụng Internet quá mức (gồm 20 câu hỏi)Age: `Basic_Demos-Age`

### Extract features from actigraphy (time-series data)

In [5]:
def load_time_series(dirname):
    ids = os.listdir(dirname)
    indexes = []
    stats = []
    for idname in tqdm(ids):
        df = pd.read_parquet(os.path.join(dirname, idname, 'part-0.parquet'))
        df.drop('step', axis=1, inplace=True)
        stats.append(df.describe().iloc[1:].values.reshape(-1))
        indexes.append(idname.split('=')[1])
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df

In [6]:
train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")
time_series_cols = train_ts.columns.tolist()
time_series_cols.remove("id")

100%|██████████| 996/996 [02:38<00:00,  6.28it/s]
100%|██████████| 2/2 [00:00<00:00,  9.08it/s]


In [7]:
train = pd.merge(train, train_ts, how="left", on='id')
test = pd.merge(test, test_ts, how="left", on='id')

In [8]:
train = train.drop('id',axis=1)
test = test.drop('id',axis=1)

### Impute and convert category columns

In [9]:
feature_cols = list(test.columns)
cat_cols = test.select_dtypes(exclude=['number'])

train = train[feature_cols + ['sii']]

In [10]:
def convert_cat_cols(df):
    for col in cat_cols:
        df[col] = df[col].replace({'Spring':1,'Summer':2,"Fall":3,"Winter":4})
        df[col] = df[col].fillna(0)
    return df

train = convert_cat_cols(train) 
test = convert_cat_cols(test)

### Drop rows that miss `sii` values

In [11]:
train = train.dropna(subset=['sii'])
train.shape, test.shape

((2736, 143), (20, 142))

### Variation 1: Adding features

In [12]:
def feature_engineer(df): 
    df['Age_BP'] = df['Basic_Demos-Age'] * df['Physical-Systolic_BP'] # Blood pressure trends with age
    df['Sex_HR'] = df['Basic_Demos-Sex'] * df['Physical-HeartRate'] # Cardiovascular patterns by sex
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age'] # BMI scaled by age
    df['BMI_PAQC'] = df['Physical-BMI'] * df['PAQ_C-PAQ_C_Total'] # Physical activity scaled by body composition
    
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday'] # Interaction between BMI and Internet Hours
    df['Age_Internet_Hours'] =  df['Basic_Demos-Age'] * df['PreInt_EduHx-computerinternet_hoursday'] # Internet hours scaled by age
    df['PAQC_Internet_Hours'] = df['PAQ_C-PAQ_C_Total'] * df['PreInt_EduHx-computerinternet_hoursday'] # Internet hours scaled by physical activity
    df['Sleep_Internet_Hours'] = df['SDS-SDS_Total_T'] * df['PreInt_EduHx-computerinternet_hoursday'] # Internet hours scaled by sleep disturbance
    df['PAQC_CGAS'] = df['PAQ_C-PAQ_C_Total'] * df['CGAS-CGAS_Score'] # Global assessment vs. physical activity
    df['Sleep_CGAS'] = df['SDS-SDS_Total_T'] * df['CGAS-CGAS_Score'] # Global assessment vs. sleep disturbances
    
    df['BP_HR'] = df['Physical-Systolic_BP'] * df['Physical-HeartRate'] # Cardiovascular health metrics
    df['BP_prof'] = df['Physical-Systolic_BP'] * df['Physical-Diastolic_BP'] # Overall blood pressure profile
    df['BMR_HR'] = df['BIA-BIA_BMR'] / df['Physical-HeartRate'] # Basal Metabolic Rate scaled by heart rate
    df['HR_Sleep'] = df['Physical-HeartRate'] / df['SDS-SDS_Total_T'] # Heart rate vs. sleep disturbances
    
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI'] # Body Fat Percentage and BMI ratio
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat'] # Fat-Free Mass Index and Body Fat Percentage ratio
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat'] # Fat Mass Index and Body Fat Percentage ratio
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW'] # Lean Soft Tissue to Total Body Water ratio
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR'] # Body Fat Percentage multiplied by Basal Metabolic Rate
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE'] # Body Fat Percentage multiplied by Daily Energy Expenditure
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight'] # Basal Metabolic Rate scaled by weight
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight'] # Daily Energy Expenditure scaled by weight
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height'] # Skeletal Muscle Mass scaled by height
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI'] # Muscle-to-Fat Ratio
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight'] # Hydration Status
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW'] # Intracellular Water to Total Body Water ratio

    return df

In [13]:
train = feature_engineer(train)
test = feature_engineer(test)

train.shape, test.shape

((2736, 169), (20, 168))

### Variation 2: Perform numerical imputation

In [14]:
num_cols = test.select_dtypes(include=['float64', 'int64']).columns

num_cols 
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=3)

imputer.fit(train[num_cols])
train[num_cols] = imputer.transform(train[num_cols])
test[num_cols] = imputer.transform(test[num_cols])

---
## Model

### Helper Functions (use for evaluation)

In [15]:
## Helper functions
def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')
    
def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

### Train model and predict result

In [16]:
def fit_and_predict(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']
    
    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]
        
        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)

        clear_output(wait=True)
        
    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead') 
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': test_ids,
        'sii': tpTuned
    })

    return submission, tKappa

### Tuning hyperparameters

In [17]:
def param_tunning(max_evals,type,train_data,scoring_data):
    
    from hyperopt import fmin, tpe, hp, Trials, STATUS_OK  
    from sklearn.metrics import mean_squared_error  
    from sklearn.model_selection import train_test_split  
    import xgboost as xgb  


    def objective(params):  
        learning_rate = params['learning_rate']  
        n_estimators = int(params['n_estimators'])  
        iterations = int(params['iterations'])  
        subsample = params['subsample']  
        colsample_bytree = params['colsample_bytree']  
        max_depth = int(params['max_depth'])  
        l2_leaf_reg=params['l2_leaf_reg']   
        random_strength=params['random_strength']   
        bagging_temperature=params['bagging_temperature']   
        border_count=params['border_count']
        reg_alpha=params['reg_alpha']
        reg_lambda=params['reg_lambda']
        min_child_weight=params['min_child_weight']
        min_data_in_leaf = int(params['min_data_in_leaf'])
        feature_fraction = params['feature_fraction']
        bagging_fraction = params['bagging_fraction']
        bagging_freq = int(params['bagging_freq'])
        lambda_l1 =params['lambda_l1']
        lambda_l2 =params['lambda_l2']
        
        if type=='LGBM':
            Light = lgb.LGBMRegressor(
                learning_rate=learning_rate,  
                n_estimators=n_estimators, 
                subsample = subsample,
                max_depth=max_depth,
                min_data_in_leaf = min_data_in_leaf,
                feature_fraction = feature_fraction,
                bagging_fraction =bagging_fraction,
                bagging_freq =bagging_freq,
                lambda_l1 =lambda_l1,
                lambda_l2 =lambda_l2,
                verbosity=0,
                random_state=SEED, 
                verbose=-1
            )    
            Submission_XGB, k_lgbm = TrainML(Light,scoring_data)
            return {'loss': -k_lgbm, 'status': STATUS_OK}
        elif type=='XGB':
            XGBoost = xgb.XGBRegressor(  
                learning_rate=learning_rate,  
                n_estimators=n_estimators,  
                subsample=subsample,  
                colsample_bytree=colsample_bytree,  
                max_depth=max_depth,  
                objective='reg:squarederror',  
                verbosity=0,  
                eval_metric='rmse',  
                random_state=SEED,  
                enable_categorical=True  
            )  
            Submission_XGB, k_xgb = TrainML(XGBoost,scoring_data)
            return {'loss': -k_xgb, 'status': STATUS_OK}

        elif type=='CAT':
            CatBoost = CatBoostRegressor(  
                learning_rate=learning_rate,  
                iterations=iterations,   
                max_depth=max_depth,
                l2_leaf_reg=l2_leaf_reg,
                random_strength=random_strength,
                bagging_temperature=bagging_temperature,
                border_count=border_count,
                random_state=SEED, 
                verbose=0,
                cat_features=cat_c  
            )  
            Submission_CatBoost , k_cat= TrainML(CatBoost,scoring_data)
            return {'loss': -k_cat, 'status': STATUS_OK}


    space = {  
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.1),
        'n_estimators': hp.quniform('n_estimators', 200, 300, 50),  
        'iterations': hp.quniform('iterations', 150, 250, 50),  
        'min_data_in_leaf': hp.choice('min_data_in_leaf', list(range(10, 21))),  
        'lambda_l1': hp.loguniform('lambda_l1', np.log(6), np.log(12)),  
        'lambda_l2': hp.loguniform('lambda_l2', np.log(0.01), np.log(0.1)),  
        'bagging_fraction': hp.uniform('bagging_fraction', 0.7, 0.9),  
        'feature_fraction': hp.uniform('feature_fraction', 0.8, 1.0),  
        'bagging_freq': hp.choice('bagging_freq', [4]),
        'subsample': hp.uniform('subsample', 0.75, 0.85),  
        'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 0.9),  
        'max_depth': hp.quniform('max_depth', 6, 12, 1),
        'l2_leaf_reg': hp.loguniform('l2_leaf_reg', np.log(5), np.log(15)),  
        'random_strength': hp.uniform('random_strength', 0.1, 0.2),
        'bagging_temperature': hp.uniform('bagging_temperature', 0.01, 0.1),  
        'border_count': hp.quniform('border_count', 128, 255, 1),  
        'reg_alpha': hp.uniform('reg_alpha', 0.5, 2),  
        'reg_lambda': hp.uniform('reg_lambda', 3, 7),  
        'min_child_weight': hp.uniform('min_child_weight', 0.5, 2)
    }

    trials = Trials()  
    
    best = fmin(fn=objective,  
                space=space,  
                algo=tpe.suggest,  
                max_evals=max_evals,
                trials=trials)  
    
    best['max_depth'] = int(best['max_depth'])
    if type=='CAT':
        best['iterations'] = int(best['iterations'])
    else:
        best['n_estimators'] = int(best['n_estimators'])
        
    if type=='LGBM':
        best_filtered = {k: v for k, v in best.items() if k in ['learning_rate', 'n_estimators','subsample','max_depth', 'min_data_in_leaf','feature_fraction','bagging_fraction','bagging_freq','lambda_l1','lamda_l2']}
    elif type=='XGB':
        best_filtered = {k: v for k, v in best.items() if k in ['learning_rate', 'n_estimators','subsample', 'colsample_bytree','max_depth','reg_alpha', 'reg_lambda']}
    elif type=='CAT':
        best_filtered = {k: v for k, v in best.items() if k in ['learning_rate', 'iterations','max_depth','l2_leaf_reg','random_strength','bagging_temperature']}
                
    print(f"--->{type} best parameters:{best_filtered}")
    
    return best_filtered

In [18]:
# best_params_lgbm=param_tunning(max_evals=100,type='LGBM',train_data=train,scoring_data=test)
# LGBM_best_parameters = {
#     'bagging_fraction': 0.8823877190424483, 
#     'bagging_freq': 0, 
#     'feature_fraction': 0.9687818251276876, 
#     'lambda_l1': 11.083702499316473, 
#     'learning_rate': 0.015047557793121483, 
#     'max_depth': 12, 'min_data_in_leaf': 7, 
#     'n_estimators': 300, 
#     'subsample': 0.8397302322292359
# }

In [19]:
# best_params_xgb=param_tunning(max_evals=100,type='XGB',train_data=train,scoring_data=test)
# XGB best parameters:{
    # 'colsample_bytree': 0.8824818923210358, 
    # 'learning_rate': 0.010202971192339372, 
    # 'max_depth': 6, 
    # 'n_estimators': 250, 
    # 'reg_alpha': 1.7453122201668874, 
    # 'reg_lambda': 5.784871182330073, 
    # 'subsample': 0.7515130656632484
# }

In [20]:
# best_params_catboost=param_tunning(max_evals=100,type='CAT',train_data=train,scoring_data=test)
# CAT best parameters:{
#     'bagging_temperature': 0.09868359430137408,
#     'iterations': 150,
#     'l2_leaf_reg': 13.675035261530743,
#     'learning_rate': 0.03609529519492104,
#     'max_depth': 6,
#     'random_strength': 0.1687460842854431
# }

In [21]:
LGBM_best_parameters = {
    'bagging_fraction': 0.8823877190424483, 
    'bagging_freq': 0, 
    'feature_fraction': 0.9687818251276876, 
    'lambda_l1': 11.083702499316473, 
    'learning_rate': 0.015047557793121483, 
    'max_depth': 12, 'min_data_in_leaf': 7, 
    'n_estimators': 300, 
    'subsample': 0.8397302322292359
}
XGB_best_parameters={
    'colsample_bytree': 0.8824818923210358, 
    'learning_rate': 0.010202971192339372, 
    'max_depth': 6, 
    'n_estimators': 250, 
    'reg_alpha': 1.7453122201668874, 
    'reg_lambda': 5.784871182330073, 
    'subsample': 0.7515130656632484
}
CAT_best_parameters={
    'bagging_temperature': 0.09868359430137408,
    'iterations': 150,
    'l2_leaf_reg': 13.675035261530743,
    'learning_rate': 0.03609529519492104,
    'max_depth': 6,
    'random_strength': 0.1687460842854431
}

In [22]:
# Create model instances
LGBM_Model = LGBMRegressor(**LGBM_best_parameters, random_state=SEED,device= 'cpu')
XGB_Model = XGBRegressor(**XGB_best_parameters, tree_method='gpu_hist')
CatBoost_Model = CatBoostRegressor(**CAT_best_parameters, task_type = 'GPU')

In [23]:
voting_model = VotingRegressor(estimators=[
    ('lightgbm', LGBM_Model),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model)
],weights=[6.0,5.0,2.0])

sub = fit_and_predict(voting_model, test)[0]

sub

Training Folds: 100%|██████████| 6/6 [00:27<00:00,  4.61s/it]


Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,1
5,001f3379,1
6,0038ba98,1
7,0068a485,0
8,0069fbed,1
9,0083e397,1


In [24]:
sub.to_csv('submission.csv', index=False)