## 1. Import libraries<a class="anchor" id="1"></a>

[Back to Table of Contents](#0.1)

In [None]:
! pip install iterative-stratification

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting iterative-stratification
  Downloading iterative_stratification-0.1.7-py3-none-any.whl (8.5 kB)
Installing collected packages: iterative-stratification
Successfully installed iterative-stratification-0.1.7


In [None]:
import sys

import numpy as np
import random
import pandas as pd
import os
import copy
import gc

import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import QuantileTransformer
from sklearn.feature_selection import VarianceThreshold, SelectKBest
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.modules.loss import _WeightedLoss

import warnings
warnings.filterwarnings('ignore')

In [None]:
pd.set_option('max_columns', 2000)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/train.csv')

In [None]:
test_df = pd.read_csv('/content/drive/MyDrive/test.csv')

In [None]:
train_df.drop(['X_04','X_23','X_47','X_48','X_10','X_11','ID'], axis = 1, inplace= True)
test_df.drop(['X_04','X_23','X_47','X_48','X_10','X_11','ID'], axis = 1, inplace= True)

In [None]:
train_df['X_3/7'] = train_df['X_03']/train_df['X_07']
test_df['X_3/7'] = test_df['X_03']/test_df['X_07']

In [None]:
train_features = train_df.filter(regex = "X")
test_features = test_df.filter(regex = "X")

train_y = train_df.filter(regex = "Y")

### 2.1. Commit now <a class="anchor" id="2.1"></a>

> 들여쓴 블록



[Back to Table of Contents](#0.1)

In [None]:
n_component = 16

VarianceThreshold_for_FS = 0.9
Dropout_Model = 0.25
print('n_component :' , n_component)

n_component : 16


### 2.2 Previous commits <a class="anchor" id="2.2"></a>

[Back to Table of Contents](#0.1)

In [None]:
commits_df = pd.DataFrame(columns = ['n_commit', 'n_component', 'train_features','VarianceThreshold_for_FS', 'Dropout_Model', 'LB_score', 'CV_logloss'])

### Commit 0 (parameters from https://www.kaggle.com/riadalmadani/pytorch-cv-0-0145-lb-0-01839, commit 8)

In [None]:
train_features.shape

(39607, 51)

In [None]:
n=0
commits_df.loc[n, 'n_commit'] = 0                       # Number of commit
commits_df.loc[n, 'n_component'] = 16                 # Number of output features for PCA 

commits_df.loc[n, 'VarianceThreshold_for_FS'] = 0.8     # Threshold for VarianceThreshold for feature selection
commits_df.loc[n, 'train_features'] = 56              # Number features in the training dataframe after FE and before modeling

commits_df.loc[n, 'Dropout_Model'] = 0 # Dropout in Model
commits_df.loc[n, 'CV_logloss'] = 0   # Result CV logloss metrics
commits_df.loc[n, 'LB_score'] = 0                 # LB score after submitting

### 2.3 Parameters and LB score visualization <a class="anchor" id="2.3"></a>

[Back to Table of Contents](#0.1)

In [None]:
commits_df['seed'] = 42

In [None]:
# Find and mark minimun value of LB score
commits_df['LB_score'] = pd.to_numeric(commits_df['LB_score'])
commits_df = commits_df.sort_values(by=['LB_score', 'CV_logloss'], ascending = True).reset_index(drop=True)
commits_df['min'] = 0
commits_df.loc[0, 'min'] = 1
commits_df

Unnamed: 0,n_commit,n_component,train_features,VarianceThreshold_for_FS,Dropout_Model,LB_score,CV_logloss,seed,min
0,0,16,56,0.8,0,0,0,42,1


In [None]:
# # Interactive plot with results of parameters tuning
# fig = px.scatter_3d(commits_df, x='train_features', y='CV_logloss', z='LB_score', color = 'min', 
#                     symbol = 'seed',
#                     title='Parameters and LB score visualization of MoA solutions')
# fig.update(layout=dict(title=dict(x=0.1)))

## 4. FE & Data Preprocessing <a class="anchor" id="4"></a>

[Back to Table of Contents](#0.1)

### 4.1 RankGauss<a class="anchor" id="4.1"></a>

[Back to Table of Contents](#0.1)

In [None]:
# RankGauss - transform to Gauss

for col in (train_features.columns):

    transformer = QuantileTransformer(n_quantiles=100,random_state=0, output_distribution="normal")
    vec_len = len(train_features[col].values)
    vec_len_test = len(test_features[col].values)
    raw_vec = train_features[col].values.reshape(vec_len, 1)
    transformer.fit(raw_vec)

    train_features[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test_features[col] = transformer.transform(test_features[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

### 4.2 Seed<a class="anchor" id="4.2"></a>

[Back to Table of Contents](#0.1)

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

trtttfjjjjjjjjjjjjkljgjfldsjfksjkklksldkjflkskjdskksksjssjkfsskfl;ssffklalnj### 4.3 PCA features<a class="anchor" id="4.3"></a>

[Back to Table of Contents](#0.1)

In [None]:
train_features.shape

(39607, 51)

In [None]:
# GENES

data = pd.concat([pd.DataFrame(train_features), pd.DataFrame(test_features)])
data2 = (PCA(n_components=n_component, random_state=42).fit_transform(data))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_{i}' for i in range(n_component)])
test2 = pd.DataFrame(test2, columns=[f'pca_{i}' for i in range(n_component)])

train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

In [None]:
train_features.shape

(39607, 67)

In [None]:
train_features.head(5)

Unnamed: 0,X_01,X_02,X_03,X_05,X_06,X_07,X_08,X_09,X_12,X_13,X_14,X_15,X_16,X_17,X_18,X_19,X_20,X_21,X_22,X_24,X_25,X_26,X_27,X_28,X_29,X_30,X_31,X_32,X_33,X_34,X_35,X_36,X_37,X_38,X_39,X_40,X_41,X_42,X_43,X_44,X_45,X_46,X_49,X_50,X_51,X_52,X_53,X_54,X_55,X_56,X_3/7,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,pca_10,pca_11,pca_12,pca_13,pca_14,pca_15
0,0.870846,-5.199338,0.037988,-1.571091,1.876359,0.430727,-2.165418,0.199581,-1.508944,1.399657,-1.120205,-0.403108,-1.690622,0.282216,-0.375793,-1.335178,-0.076032,-1.168949,-0.834427,-2.322575,-0.63527,-0.987837,-1.144237,-1.508944,2.582669,2.322575,1.549706,1.955661,1.470545,0.852495,-0.515705,-0.799083,1.304923,-2.049594,-1.786156,-1.876359,0.44466,-1.807354,1.639976,-1.746017,2.399574,-1.639976,-1.83395,1.10152,0.706687,1.471329,1.150214,-0.493553,-0.186024,-0.695262,-0.260615,-1.540519,1.931141,6.340495,-4.554213,1.000365,-0.545088,-0.036837,-0.616814,-3.147477,1.45543,2.012483,-3.570198,-0.811626,0.854204,1.801731,1.89548
1,0.515705,5.199338,-0.650837,-0.76471,1.029957,-0.082381,-2.22678,-0.012033,0.308666,1.399657,-1.4342,-1.690622,-0.178175,-0.191052,-1.029957,-2.502106,-0.559592,-2.549931,-0.216904,-0.430727,0.216904,0.255962,0.152506,1.4342,1.746017,2.322575,1.399657,1.746017,1.008673,-0.63527,-0.747859,-0.799083,0.165327,-0.255962,-0.472789,-0.362241,-0.889495,-0.714776,-0.987837,-0.63527,-0.416878,-1.639976,-1.449607,0.535601,0.80682,1.785439,-0.811146,-0.041775,0.870222,-0.724532,-0.394691,8.829335,2.993104,5.754309,0.017732,3.02237,-1.786248,2.138157,-1.217996,-0.454488,1.104894,0.751667,-2.751081,-0.429387,0.318333,0.728578,1.53955
2,1.470545,-5.199338,-1.073988,1.051717,1.029957,-0.021101,-0.684986,0.9048,-0.604585,0.335333,-0.44466,-0.714776,-1.955661,-0.191052,-0.714776,-2.322575,-1.4342,-1.955661,-0.947401,-1.335178,-1.051717,-1.168949,-0.714776,-0.44466,1.639976,2.322575,1.549706,1.955661,1.144237,0.282216,-0.747859,-1.366707,1.639976,-0.666564,-0.731217,-0.44466,-1.876359,-0.714776,-0.747859,-0.947401,-0.216904,-1.120205,-1.25704,0.368716,-0.052719,1.317171,0.17506,-0.216499,0.405759,-1.266358,-0.659458,-1.582487,1.862391,5.668267,-2.817971,2.530055,-0.932375,1.590164,-1.624078,1.728486,1.243618,0.512749,-2.597395,0.719716,1.740719,0.444956,1.735954
3,1.168949,-5.199338,0.063341,-0.403108,2.358176,0.063341,0.016556,0.435273,-1.955661,2.322575,-2.322575,-1.690622,-1.955661,-0.191052,-1.639976,-2.166107,-1.876359,-1.807354,-1.4342,-1.955661,-1.399657,-1.549706,-1.144237,-1.120205,-1.304923,1.746017,1.470545,2.322575,1.144237,-0.799083,0.714776,0.947401,0.063341,-0.216904,-0.13971,-0.088734,-0.203961,-2.049594,-0.076032,-1.746017,1.335178,5.199338,-0.084258,0.60469,0.331225,0.193611,0.790052,0.519635,1.542183,1.259694,-0.010567,-1.599035,-1.149774,-0.041362,-5.949109,3.307005,-1.943117,2.57312,-0.774211,-0.800656,1.903706,2.033716,-2.552516,-1.501552,1.179105,-0.408579,-0.762438
4,0.515705,-5.199338,-1.275817,-0.304243,0.203961,0.57446,-0.807346,0.113836,-1.029957,0.698526,-0.781781,-1.399657,-1.4342,-0.57446,-1.029957,-2.322575,-1.029957,-2.049594,-0.947401,-0.714776,-1.051717,0.0,0.650837,2.76046,-0.889495,2.322575,1.470545,2.322575,1.829398,0.0,-1.508944,-0.53022,0.559592,-1.120205,-1.029957,-0.834427,-0.889495,-2.34397,-0.515705,-1.549706,1.335178,5.199338,-1.22071,1.995423,0.915304,-0.61024,1.224744,-0.484813,0.233703,-1.049862,-1.175887,-1.483618,2.346502,-0.577938,-3.710309,3.695619,-1.206369,0.502561,0.627043,-0.314877,2.585618,0.601344,-2.678048,0.575943,1.288453,0.768755,0.825997


### 4.4 Feature selection<a class="anchor" id="4.4"></a>

[Back to Table of Contents](#0.1)

In [None]:
data = train_features.append(test_features)
data

Unnamed: 0,X_01,X_02,X_03,X_05,X_06,X_07,X_08,X_09,X_12,X_13,X_14,X_15,X_16,X_17,X_18,X_19,X_20,X_21,X_22,X_24,X_25,X_26,X_27,X_28,X_29,X_30,X_31,X_32,X_33,X_34,X_35,X_36,X_37,X_38,X_39,X_40,X_41,X_42,X_43,X_44,X_45,X_46,X_49,X_50,X_51,X_52,X_53,X_54,X_55,X_56,X_3/7,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,pca_10,pca_11,pca_12,pca_13,pca_14,pca_15
0,0.870846,-5.199338,0.037988,-1.571091,1.876359,0.430727,-2.165418e+00,0.199581,-1.508944,1.399657,-1.120205,-0.403108,-1.690622,0.282216,-0.375793,-1.335178,-0.076032,-1.168949,-0.834427,-2.322575,-0.635270,-0.987837,-1.144237,-1.508944,2.582669,2.322575,1.549706,1.955661,1.470545,0.852495,-0.515705,-0.799083,1.304923,-2.049594,-1.786156,-1.876359,0.444660,-1.807354,1.639976,-1.746017,2.399574,-1.639976,-1.833950,1.101520,0.706687,1.471329,1.150214,-0.493553,-0.186024,-0.695262,-0.260615,-1.540519,1.931141,6.340495,-4.554213,1.000365,-0.545088,-0.036837,-0.616814,-3.147477,1.455430,2.012483,-3.570198,-0.811626,0.854204,1.801731,1.895480
1,0.515705,5.199338,-0.650837,-0.764710,1.029957,-0.082381,-2.226780e+00,-0.012033,0.308666,1.399657,-1.434200,-1.690622,-0.178175,-0.191052,-1.029957,-2.502106,-0.559592,-2.549931,-0.216904,-0.430727,0.216904,0.255962,0.152506,1.434200,1.746017,2.322575,1.399657,1.746017,1.008673,-0.635270,-0.747859,-0.799083,0.165327,-0.255962,-0.472789,-0.362241,-0.889495,-0.714776,-0.987837,-0.635270,-0.416878,-1.639976,-1.449607,0.535601,0.806820,1.785439,-0.811146,-0.041775,0.870222,-0.724532,-0.394691,8.829335,2.993104,5.754309,0.017732,3.022370,-1.786248,2.138157,-1.217996,-0.454488,1.104894,0.751667,-2.751081,-0.429387,0.318333,0.728578,1.539550
2,1.470545,-5.199338,-1.073988,1.051717,1.029957,-0.021101,-6.849861e-01,0.904800,-0.604585,0.335333,-0.444660,-0.714776,-1.955661,-0.191052,-0.714776,-2.322575,-1.434200,-1.955661,-0.947401,-1.335178,-1.051717,-1.168949,-0.714776,-0.444660,1.639976,2.322575,1.549706,1.955661,1.144237,0.282216,-0.747859,-1.366707,1.639976,-0.666564,-0.731217,-0.444660,-1.876359,-0.714776,-0.747859,-0.947401,-0.216904,-1.120205,-1.257040,0.368716,-0.052719,1.317171,0.175060,-0.216499,0.405759,-1.266358,-0.659458,-1.582487,1.862391,5.668267,-2.817971,2.530055,-0.932375,1.590164,-1.624078,1.728486,1.243618,0.512749,-2.597395,0.719716,1.740719,0.444956,1.735954
3,1.168949,-5.199338,0.063341,-0.403108,2.358176,0.063341,1.655580e-02,0.435273,-1.955661,2.322575,-2.322575,-1.690622,-1.955661,-0.191052,-1.639976,-2.166107,-1.876359,-1.807354,-1.434200,-1.955661,-1.399657,-1.549706,-1.144237,-1.120205,-1.304923,1.746017,1.470545,2.322575,1.144237,-0.799083,0.714776,0.947401,0.063341,-0.216904,-0.139710,-0.088734,-0.203961,-2.049594,-0.076032,-1.746017,1.335178,5.199338,-0.084258,0.604690,0.331225,0.193611,0.790052,0.519635,1.542183,1.259694,-0.010567,-1.599035,-1.149774,-0.041362,-5.949109,3.307005,-1.943117,2.573120,-0.774211,-0.800656,1.903706,2.033716,-2.552516,-1.501552,1.179105,-0.408579,-0.762438
4,0.515705,-5.199338,-1.275817,-0.304243,0.203961,0.574460,-8.073456e-01,0.113836,-1.029957,0.698526,-0.781781,-1.399657,-1.434200,-0.574460,-1.029957,-2.322575,-1.029957,-2.049594,-0.947401,-0.714776,-1.051717,0.000000,0.650837,2.760460,-0.889495,2.322575,1.470545,2.322575,1.829398,0.000000,-1.508944,-0.530220,0.559592,-1.120205,-1.029957,-0.834427,-0.889495,-2.343970,-0.515705,-1.549706,1.335178,5.199338,-1.220710,1.995423,0.915304,-0.610240,1.224744,-0.484813,0.233703,-1.049862,-1.175887,-1.483618,2.346502,-0.577938,-3.710309,3.695619,-1.206369,0.502561,0.627043,-0.314877,2.585618,0.601344,-2.678048,0.575943,1.288453,0.768755,0.825997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,0.101452,-5.199338,-1.120205,1.549706,-0.764710,0.458679,1.233649e-01,1.244781,-1.029957,-0.114185,0.295415,-0.403108,0.501298,-0.191052,-0.025322,-0.650837,-1.144237,-1.008673,-1.304923,0.375793,-0.295415,0.000000,-0.714776,0.101452,-0.191052,-0.362241,1.304923,0.076032,-0.229884,0.544847,1.639976,-0.530220,-0.178175,-0.714776,-1.194396,-0.987837,-0.889495,1.746017,0.781781,0.530220,-0.908458,5.199338,2.361110,-0.531745,-0.889455,0.095836,-0.508205,0.034397,0.369600,-0.024441,-1.002601,-1.626921,2.198372,-1.016082,-1.668412,0.806594,0.547143,0.057908,-1.501859,2.075482,-0.880173,0.840845,0.480756,0.812578,-0.013924,-1.760668,-0.311185
39604,0.101452,-5.199338,-2.264083,0.574460,-0.764710,1.558978,1.162243e-01,0.910922,-1.508944,0.698526,-0.114185,-0.101452,0.165327,0.781781,0.403108,-0.416878,-2.322575,-0.714776,-1.746017,-1.955661,-1.399657,-0.987837,-0.348756,-0.927754,-0.666564,-1.120205,1.073988,0.076032,1.144237,-0.635270,0.362241,0.947401,1.639976,-0.444660,-0.416878,-0.321971,-0.889495,-0.714776,0.362241,-0.947401,0.714776,5.199338,2.359139,-0.527028,-1.344334,-0.047762,-1.505949,-0.155577,-0.091226,0.624394,-2.248725,-1.573259,1.112996,-0.496701,-4.691829,1.191605,2.189610,0.291691,0.692286,2.589022,0.586767,1.080183,0.017013,0.006428,-1.154942,-1.232502,-1.983046
39605,0.515705,-5.199338,-1.220640,0.799083,-0.242903,0.740701,-1.982771e-01,1.546420,0.308666,0.698526,-0.444660,-0.714776,-0.178175,0.282216,-0.375793,-0.416878,-1.746017,-0.714776,-1.593219,-0.139710,1.051717,-0.764710,0.403108,0.101452,0.255962,1.470545,0.870846,-0.764710,1.399657,0.852495,-0.515705,1.639976,-1.508944,-1.008673,-1.549706,-1.275817,-0.889495,1.247754,0.559592,0.126937,-0.650837,5.199338,2.348253,-0.376973,-1.212050,0.021310,-0.745491,-0.401482,-0.355686,-0.153209,-1.293773,-1.594434,2.828926,-1.070217,-1.383463,2.098143,0.860539,0.194327,-1.788552,1.303767,-0.759125,1.039111,-0.499669,0.543512,0.732556,-0.436771,-2.179879
39606,-0.282216,5.199338,-2.085356,0.764710,-1.247754,1.508336,-6.957291e-16,0.763840,-0.191052,0.698526,0.295415,-0.403108,0.747859,0.781781,0.403108,-0.870846,-1.144237,-1.807354,-1.073988,-0.139710,0.747859,0.000000,0.650837,0.101452,0.731217,0.375793,0.308666,0.852495,0.515705,0.282216,1.639976,0.947401,-0.178175,0.000000,-0.216904,-0.088734,-0.203961,1.008673,0.152506,0.530220,-0.908458,5.199338,2.347973,-1.734555,-1.080605,-0.585808,-1.318818,0.239299,-0.850937,0.683718,-2.167242,8.797174,3.139248,-1.246587,-0.440827,1.169823,1.925785,1.506497,-0.387791,2.242512,-0.623047,0.434325,0.538669,0.335346,-0.335674,-0.810692,-1.486925


In [None]:
var_thresh = VarianceThreshold(VarianceThreshold_for_FS)

# data = train_features.append(test_features)
data_transformed = var_thresh.fit_transform(train_features)

train_features_transformed = data_transformed[ : train_features.shape[0]]
test_features_transformed = data_transformed[-test_features.shape[0] : ]

train_features.shape

(39607, 67)

In [None]:
train = train_features.reset_index(drop=True)
test = test_features.reset_index(drop=True)

target = train_y.reset_index(drop=True)

train = pd.concat([train, target] ,axis=1)

In [None]:
target_cols = target.columns.values.tolist()

### 4.5 CV folds<a class="anchor" id="4.5"></a>

[Back to Table of Contents](#0.1)

In [None]:
folds = train.copy()

mskf = MultilabelStratifiedKFold(n_splits=7)

for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=target)):
    folds.loc[v_idx, 'kfold'] = int(f)

folds['kfold'] = folds['kfold'].astype(int)
folds

Unnamed: 0,X_01,X_02,X_03,X_05,X_06,X_07,X_08,X_09,X_12,X_13,X_14,X_15,X_16,X_17,X_18,X_19,X_20,X_21,X_22,X_24,X_25,X_26,X_27,X_28,X_29,X_30,X_31,X_32,X_33,X_34,X_35,X_36,X_37,X_38,X_39,X_40,X_41,X_42,X_43,X_44,X_45,X_46,X_49,X_50,X_51,X_52,X_53,X_54,X_55,X_56,X_3/7,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,pca_10,pca_11,pca_12,pca_13,pca_14,pca_15,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14,kfold
0,0.870846,-5.199338,0.037988,-1.571091,1.876359,0.430727,-2.165418,0.199581,-1.508944,1.399657,-1.120205,-0.403108,-1.690622,0.282216,-0.375793,-1.335178,-0.076032,-1.168949,-0.834427,-2.322575,-0.635270,-0.987837,-1.144237,-1.508944,2.582669,2.322575,1.549706,1.955661,1.470545,0.852495,-0.515705,-0.799083,1.304923,-2.049594,-1.786156,-1.876359,0.444660,-1.807354,1.639976,-1.746017,2.399574,-1.639976,-1.833950,1.101520,0.706687,1.471329,1.150214,-0.493553,-0.186024,-0.695262,-0.260615,-1.540519,1.931141,6.340495,-4.554213,1.000365,-0.545088,-0.036837,-0.616814,-3.147477,1.455430,2.012483,-3.570198,-0.811626,0.854204,1.801731,1.895480,2.056,1.456,1.680,10.502,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304,3
1,0.515705,5.199338,-0.650837,-0.764710,1.029957,-0.082381,-2.226780,-0.012033,0.308666,1.399657,-1.434200,-1.690622,-0.178175,-0.191052,-1.029957,-2.502106,-0.559592,-2.549931,-0.216904,-0.430727,0.216904,0.255962,0.152506,1.434200,1.746017,2.322575,1.399657,1.746017,1.008673,-0.635270,-0.747859,-0.799083,0.165327,-0.255962,-0.472789,-0.362241,-0.889495,-0.714776,-0.987837,-0.635270,-0.416878,-1.639976,-1.449607,0.535601,0.806820,1.785439,-0.811146,-0.041775,0.870222,-0.724532,-0.394691,8.829335,2.993104,5.754309,0.017732,3.022370,-1.786248,2.138157,-1.217996,-0.454488,1.104894,0.751667,-2.751081,-0.429387,0.318333,0.728578,1.539550,1.446,1.184,1.268,18.507,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438,5
2,1.470545,-5.199338,-1.073988,1.051717,1.029957,-0.021101,-0.684986,0.904800,-0.604585,0.335333,-0.444660,-0.714776,-1.955661,-0.191052,-0.714776,-2.322575,-1.434200,-1.955661,-0.947401,-1.335178,-1.051717,-1.168949,-0.714776,-0.444660,1.639976,2.322575,1.549706,1.955661,1.144237,0.282216,-0.747859,-1.366707,1.639976,-0.666564,-0.731217,-0.444660,-1.876359,-0.714776,-0.747859,-0.947401,-0.216904,-1.120205,-1.257040,0.368716,-0.052719,1.317171,0.175060,-0.216499,0.405759,-1.266358,-0.659458,-1.582487,1.862391,5.668267,-2.817971,2.530055,-0.932375,1.590164,-1.624078,1.728486,1.243618,0.512749,-2.597395,0.719716,1.740719,0.444956,1.735954,1.251,0.665,0.782,14.082,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370,2
3,1.168949,-5.199338,0.063341,-0.403108,2.358176,0.063341,0.016556,0.435273,-1.955661,2.322575,-2.322575,-1.690622,-1.955661,-0.191052,-1.639976,-2.166107,-1.876359,-1.807354,-1.434200,-1.955661,-1.399657,-1.549706,-1.144237,-1.120205,-1.304923,1.746017,1.470545,2.322575,1.144237,-0.799083,0.714776,0.947401,0.063341,-0.216904,-0.139710,-0.088734,-0.203961,-2.049594,-0.076032,-1.746017,1.335178,5.199338,-0.084258,0.604690,0.331225,0.193611,0.790052,0.519635,1.542183,1.259694,-0.010567,-1.599035,-1.149774,-0.041362,-5.949109,3.307005,-1.943117,2.573120,-0.774211,-0.800656,1.903706,2.033716,-2.552516,-1.501552,1.179105,-0.408579,-0.762438,1.464,1.079,1.052,16.975,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345,6
4,0.515705,-5.199338,-1.275817,-0.304243,0.203961,0.574460,-0.807346,0.113836,-1.029957,0.698526,-0.781781,-1.399657,-1.434200,-0.574460,-1.029957,-2.322575,-1.029957,-2.049594,-0.947401,-0.714776,-1.051717,0.000000,0.650837,2.760460,-0.889495,2.322575,1.470545,2.322575,1.829398,0.000000,-1.508944,-0.530220,0.559592,-1.120205,-1.029957,-0.834427,-0.889495,-2.343970,-0.515705,-1.549706,1.335178,5.199338,-1.220710,1.995423,0.915304,-0.610240,1.224744,-0.484813,0.233703,-1.049862,-1.175887,-1.483618,2.346502,-0.577938,-3.710309,3.695619,-1.206369,0.502561,0.627043,-0.314877,2.585618,0.601344,-2.678048,0.575943,1.288453,0.768755,0.825997,0.983,0.646,0.689,15.047,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,-0.714776,-5.199338,-1.876359,0.852495,-1.746017,0.834427,-1.483185,1.595785,-0.604585,0.335333,-0.114185,-0.101452,1.247754,0.282216,0.403108,-0.203961,-1.593219,-1.168949,-0.834427,-0.430727,0.486994,-0.269066,0.403108,-0.444660,0.731217,-0.362241,0.698526,0.076032,1.073988,-1.073988,1.639976,-0.139710,-0.178175,-0.375793,-0.635270,-0.559592,-0.544847,0.515705,-0.295415,0.530220,-0.908458,5.199338,2.350683,-0.133876,-0.098773,-0.840400,-0.507828,-1.230909,-0.669710,0.129135,-1.653414,-1.578357,2.695564,-1.099540,-1.358931,1.330029,1.333211,0.342798,-0.524273,2.177622,-1.191015,0.219029,-0.439466,0.883423,-1.897821,-1.190716,-0.275749,1.382,1.215,1.263,10.874,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580,6
39603,-0.714776,5.199338,-1.639976,0.173889,-1.746017,0.269066,-0.882746,0.861705,1.247754,-0.559592,-0.444660,-0.714776,0.165327,-0.947401,-0.375793,-0.764710,-1.144237,-1.335178,-1.746017,0.126937,0.747859,0.000000,0.650837,-0.165327,0.731217,1.470545,1.470545,0.852495,1.639976,-1.275817,-0.515705,-0.799083,-1.029957,0.781781,0.698526,0.666564,-0.889495,0.764710,0.152506,0.530220,-0.650837,-2.572352,2.351599,-0.551502,-2.047601,0.669796,-1.019062,-1.000264,0.832795,-1.148603,-1.131743,8.845366,3.014333,6.543622,0.612625,2.348508,0.371551,2.239546,0.403742,2.076913,-1.646390,-1.478797,-0.259669,0.478665,0.603770,-1.482115,0.160168,1.482,0.606,1.083,8.759,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325,2
39604,0.101452,-5.199338,-0.834427,0.666564,-0.764710,0.721798,-0.857471,-0.487386,0.308666,-0.114185,0.295415,0.191052,0.165327,0.282216,0.403108,0.063341,-0.781781,-1.008673,-0.947401,0.375793,-0.050661,-0.269066,-0.714776,0.101452,-0.191052,1.029957,0.781781,0.852495,0.335333,0.544847,0.967422,0.650837,-5.199338,-0.501298,-0.834427,-0.604585,-0.544847,0.764710,0.362241,0.530220,-0.416878,-2.322575,-2.341354,0.316328,0.960214,-0.684816,0.208234,0.557908,0.420502,-1.891781,-1.070145,-1.616993,2.845541,6.416991,-0.631191,-0.155247,0.104485,0.656173,-1.262385,1.031860,0.007006,-0.391306,0.300912,0.301696,0.067184,1.996187,1.209444,1.117,1.154,0.993,13.159,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601,3
39605,-0.714776,-5.199338,-1.220640,0.208272,-1.247754,0.880132,-0.242903,1.000618,-1.955661,0.698526,-0.444660,-0.101452,-0.430727,0.282216,0.403108,-0.416878,-1.876359,-0.050661,-1.304923,-1.335178,-2.322575,-0.987837,-1.470545,-0.444660,-1.304923,-0.362241,0.308666,0.076032,1.073988,-0.799083,-0.203961,0.152506,0.747859,-1.807354,-1.786156,-1.508944,-1.220640,-0.229884,-0.987837,-0.635270,-0.908458,5.199338,2.361809,-0.451702,-1.802614,0.445148,0.403171,-0.512097,-0.205802,-0.794066,-1.420324,-1.585843,0.418670,-0.463801,-4.417791,1.714727,1.365056,-1.458977,-1.835674,2.128679,0.515858,0.120184,-0.318808,0.503071,-1.762642,-1.716506,-1.502264,0.895,0.187,0.477,9.123,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635,5


In [None]:
print(train.shape)
print(folds.shape)
print(test.shape)
print(target.shape)

(39607, 81)
(39607, 82)
(39608, 67)
(39607, 14)


### 4.6 Dataset Classes<a class="anchor" id="4.6"></a>

[Back to Table of Contents](#0.1)

In [None]:
class Dataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct    

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        valid_preds.append(outputs.detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

### 4.7 Smoothing<a class="anchor" id="4.7"></a>

[Back to Table of Contents](#0.1)

In [None]:
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

  
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

### 4.8 Preprocessing<a class="anchor" id="4.8"></a>

[Back to Table of Contents](#0.1)

In [None]:
feature_cols = [c for c in folds.columns if c not in target_cols]
len(feature_cols)

68

## 5. Modeling<a class="anchor" id="5"></a>

[Back to Table of Contents](#0.1)

In [None]:
feature_cols = [col for col in feature_cols if col is not "kfold"]

In [None]:
# HyperParameters

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
NFOLDS = 7
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

num_features=len(feature_cols)
num_targets=len(target_cols)
hidden_size=1500

In [None]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(Dropout_Model)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(Dropout_Model)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)

        
        return x
    
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))    

In [None]:
import torch.nn as nn

class NRMSELoss(torch.nn.Module):
    def __init__(self):
        super(NRMSELoss,self).__init__()

    def forward(self, gt, preds):
        criterion = nn.MSELoss()
        all_nrmse = torch.zeros(14)
        for idx in range(14):
            rmse = torch.sqrt(criterion(preds[:,idx], gt[:,idx]))
            nrmse = rmse / torch.mean(torch.abs(gt[:,idx]))
            all_nrmse[idx] = nrmse
        score = 1.2 * torch.sum(all_nrmse[:8]) + 1.0 * torch.sum(all_nrmse[8:14])
        return score

In [None]:
def run_training(fold, seed):
    
    seed_everything(seed)
    
    train = folds
    test_ = test
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = Dataset(x_train, y_train)
    valid_dataset = Dataset(x_valid, y_valid)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,
    )
    
    model.to(DEVICE)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    
    # loss_fn = nn.L1Loss()
    # loss_tr = nn.L1Loss()

    loss_fn = nn.MSELoss()
    loss_tr = nn.MSELoss()

    # loss_fn = nn.BCEWithLogitsLoss()
    # loss_tr = SmoothBCEwLogits(smoothing =0.001)
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
   
    oof = np.zeros((len(train), target.shape[1]))
    best_loss = np.inf

    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"FOLD{fold}_.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
    
    
    #--------------------- PREDICTION---------------------

    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,

    )
    
    model.load_state_dict(torch.load(f"FOLD{fold}_.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_), target.iloc[:, :].shape[1]))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

## 6. Prediction & Submission <a class="anchor" id="6"></a>

[Back to Table of Contents](#0.1)

In [None]:
def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_training(fold, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [None]:
# Averaging on multiple SEEDS

SEED = [0, 1, 2, 3, 4, 5, 6]
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

for seed in SEED:
    
    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)

train[target_cols] = oof
test[target_cols] = predictions


FOLD: 0, EPOCH: 0, train_loss: 321.70312074073274
FOLD: 0, EPOCH: 0, valid_loss: 30.387679714626735
FOLD: 0, EPOCH: 1, train_loss: 3.993540336763052
FOLD: 0, EPOCH: 1, valid_loss: 1.7041094779968262
FOLD: 0, EPOCH: 2, train_loss: 2.032679166112627
FOLD: 0, EPOCH: 2, valid_loss: 1.5673438297377693
FOLD: 0, EPOCH: 3, train_loss: 1.8298534477563728
FOLD: 0, EPOCH: 3, valid_loss: 1.4693806383344863
FOLD: 0, EPOCH: 4, train_loss: 1.733657863803376
FOLD: 0, EPOCH: 4, valid_loss: 1.4627172576056586
FOLD: 0, EPOCH: 5, train_loss: 1.677184990922311
FOLD: 0, EPOCH: 5, valid_loss: 1.4521985199716356
FOLD: 0, EPOCH: 6, train_loss: 1.6337316349933022
FOLD: 0, EPOCH: 6, valid_loss: 1.4495931916766696
FOLD: 0, EPOCH: 7, train_loss: 1.6182159584267695
FOLD: 0, EPOCH: 7, valid_loss: 1.4809521277745565
FOLD: 0, EPOCH: 8, train_loss: 1.587279074174121
FOLD: 0, EPOCH: 8, valid_loss: 1.4505805439419217
FOLD: 0, EPOCH: 9, train_loss: 1.5746740290993138
FOLD: 0, EPOCH: 9, valid_loss: 1.441299765639835
FOLD: 

In [None]:
test[target_cols]

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.388079,1.141563,1.079478,13.135708,30.678589,16.586047,3.258197,-26.269745,-26.290824,-22.417452,24.354717,-26.218874,-26.208853,-26.232697
1,1.437255,1.176708,1.102875,12.915499,30.472148,16.684115,3.237006,-26.169162,-26.181721,-22.254208,24.275018,-26.115714,-26.110141,-26.126633
2,1.417059,1.105214,1.058075,15.082008,32.100709,16.212655,3.003773,-25.889930,-25.904976,-22.110499,24.519632,-25.838147,-25.834243,-25.837679
3,1.440128,1.121571,1.055231,15.108599,32.226511,17.050032,3.015458,-25.686553,-25.704325,-21.667135,24.603150,-25.633667,-25.631148,-25.636540
4,1.397496,1.080309,1.009285,14.732486,31.793409,17.048567,3.078990,-25.639875,-25.656549,-21.814923,24.696498,-25.587776,-25.583687,-25.592119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.286007,0.985085,0.988809,12.357791,30.510367,16.458530,3.262439,-26.611812,-26.607789,-22.736527,24.036420,-26.539774,-26.537774,-26.549160
39604,1.236609,0.930779,0.933246,13.176504,31.173713,16.453708,3.169635,-26.538800,-26.531045,-22.802750,24.199197,-26.465561,-26.468045,-26.477340
39605,1.270567,0.954837,0.960783,13.193591,31.142487,16.469504,3.186035,-26.529553,-26.528287,-22.654592,24.137909,-26.460265,-26.459034,-26.469508
39606,1.255097,0.945117,0.948694,13.237718,31.117002,16.546775,3.185914,-26.523537,-26.518255,-22.741371,24.226191,-26.450910,-26.452883,-26.463986


In [None]:
valid_results = train.drop(columns=target_cols)
valid_results = pd.concat([train[target_cols], valid_results], axis = 1).fillna(0)

y_true = train[target_cols].values
y_pred = valid_results[target_cols].values


score = 0
for i in range(len(target_cols)):
    score_ = NRMSELoss(y_true, y_pred)
print("CV log_loss: ", score)

TypeError: ignored

In [None]:

from sklearn.metrics import mean_squared_error

def lg_nrmse(gt, preds):
    
    gt = pd.DataFrame(gt)
    preds = pd.DataFrame(preds)

    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(0,14): # ignore 'ID'
        rmse = mean_squared_error(gt.iloc[:,idx], preds.iloc[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt.iloc[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score

In [None]:
sample_submission = pd.read_csv('/content/drive/MyDrive/sample_submission.csv')

In [None]:
sub = sample_submission.drop(columns=target_cols)
sub = pd.concat([sub, test[target_cols]], axis=1)

In [None]:
sub

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.388079,1.141563,1.079478,13.135708,30.678589,16.586047,3.258197,-26.269745,-26.290824,-22.417452,24.354717,-26.218874,-26.208853,-26.232697
1,TEST_00002,1.437255,1.176708,1.102875,12.915499,30.472148,16.684115,3.237006,-26.169162,-26.181721,-22.254208,24.275018,-26.115714,-26.110141,-26.126633
2,TEST_00003,1.417059,1.105214,1.058075,15.082008,32.100709,16.212655,3.003773,-25.889930,-25.904976,-22.110499,24.519632,-25.838147,-25.834243,-25.837679
3,TEST_00004,1.440128,1.121571,1.055231,15.108599,32.226511,17.050032,3.015458,-25.686553,-25.704325,-21.667135,24.603150,-25.633667,-25.631148,-25.636540
4,TEST_00005,1.397496,1.080309,1.009285,14.732486,31.793409,17.048567,3.078990,-25.639875,-25.656549,-21.814923,24.696498,-25.587776,-25.583687,-25.592119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.286007,0.985085,0.988809,12.357791,30.510367,16.458530,3.262439,-26.611812,-26.607789,-22.736527,24.036420,-26.539774,-26.537774,-26.549160
39604,TEST_39605,1.236609,0.930779,0.933246,13.176504,31.173713,16.453708,3.169635,-26.538800,-26.531045,-22.802750,24.199197,-26.465561,-26.468045,-26.477340
39605,TEST_39606,1.270567,0.954837,0.960783,13.193591,31.142487,16.469504,3.186035,-26.529553,-26.528287,-22.654592,24.137909,-26.460265,-26.459034,-26.469508
39606,TEST_39607,1.255097,0.945117,0.948694,13.237718,31.117002,16.546775,3.185914,-26.523537,-26.518255,-22.741371,24.226191,-26.450910,-26.452883,-26.463986


In [None]:
sub.to_csv('submission_dee.csv', index=False)

In [None]:
sub.shape

(39608, 15)

In [None]:
xg=pd.read_csv('/content/drive/MyDrive/xb_op.csv')
dee=pd.read_csv('/content/drive/MyDrive/submission_de.csv')

In [None]:
dee

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.406372,1.147632,1.087531,13.234566,30.809424,16.754725,3.247308,-26.260815,-26.285035,-22.358318,24.406179,-26.211799,-26.200416,-26.224554
1,TEST_00002,1.433825,1.161293,1.095300,12.899568,30.509578,16.668013,3.232052,-26.190968,-26.205496,-22.280114,24.268941,-26.141255,-26.134367,-26.151486
2,TEST_00003,1.413143,1.103595,1.055341,14.904961,32.157268,16.247088,3.013412,-25.939796,-25.951615,-22.167084,24.578230,-25.887120,-25.881373,-25.887445
3,TEST_00004,1.436698,1.114572,1.045946,15.188222,32.269532,16.975235,2.995590,-25.660403,-25.677094,-21.685817,24.599730,-25.607743,-25.600719,-25.610909
4,TEST_00005,1.392308,1.071342,1.001657,14.703822,31.733916,16.970309,3.080498,-25.642096,-25.659806,-21.840517,24.653902,-25.590529,-25.583979,-25.594569
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.283522,0.973178,0.984186,12.379939,30.616074,16.410236,3.241947,-26.606812,-26.603413,-22.746561,24.033887,-26.534501,-26.531918,-26.545293
39604,TEST_39605,1.250144,0.932469,0.943179,13.203278,31.304400,16.520762,3.163161,-26.544261,-26.536944,-22.784249,24.239840,-26.471828,-26.472341,-26.483135
39605,TEST_39606,1.282012,0.959506,0.969278,13.185411,31.250283,16.446111,3.170680,-26.534831,-26.533844,-22.661839,24.157115,-26.466280,-26.462978,-26.475908
39606,TEST_39607,1.263644,0.946354,0.955714,13.185192,31.157196,16.546064,3.180505,-26.529599,-26.525357,-22.744099,24.229091,-26.459288,-26.457593,-26.471215


In [None]:
ensemble=xg.copy()
ensemble['Y_01']=xg['Y_01']*0.7+dee['Y_01']*0.3
ensemble['Y_02']=xg['Y_02']*0.7+dee['Y_02']*0.3
ensemble['Y_03']=xg['Y_03']*0.7+dee['Y_03']*0.3
ensemble['Y_04']=xg['Y_04']*0.7+dee['Y_04']*0.3
ensemble['Y_05']=xg['Y_05']*0.7+dee['Y_05']*0.3
ensemble['Y_06']=xg['Y_06']*0.7+dee['Y_06']*0.3
ensemble['Y_07']=xg['Y_07']*0.7+dee['Y_07']*0.3
ensemble['Y_08']=xg['Y_08']*0.7+dee['Y_08']*0.3
ensemble['Y_09']=xg['Y_09']*0.7+dee['Y_09']*0.3
ensemble['Y_10']=xg['Y_10']*0.7+dee['Y_10']*0.3
ensemble['Y_11']=xg['Y_11']*0.7+dee['Y_11']*0.3
ensemble['Y_12']=xg['Y_12']*0.7+dee['Y_12']*0.3
ensemble['Y_13']=xg['Y_13']*0.7+dee['Y_13']*0.3
ensemble['Y_14']=xg['Y_14']*0.7+dee['Y_14']*0.3

In [None]:
ensemble

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.448311,1.236891,1.127814,13.819839,31.186037,16.534501,3.111968,-26.077538,-26.141246,-22.206487,24.540352,-26.038504,-26.044405,-26.092843
1,TEST_00002,1.470628,1.226680,1.133888,13.386016,30.842684,16.592342,3.173091,-26.151496,-26.167586,-22.297249,24.311315,-26.082814,-26.102816,-26.110243
2,TEST_00003,1.457465,1.153651,1.123495,14.470927,31.817542,16.678564,3.073718,-25.918457,-25.885299,-22.025977,24.562326,-25.817261,-25.831824,-25.780535
3,TEST_00004,1.465415,1.159168,1.061337,14.914618,32.664065,17.055414,3.045533,-25.632829,-25.640306,-21.751710,24.882296,-25.612306,-25.577110,-25.596737
4,TEST_00005,1.351484,1.006864,0.968315,14.944911,31.662645,16.917157,3.105859,-25.682251,-25.718008,-21.959547,24.725148,-25.590271,-25.602373,-25.602977
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.265734,0.982443,0.996393,12.525655,30.705989,16.597222,3.216403,-26.516329,-26.502268,-22.844821,24.239449,-26.429644,-26.430922,-26.456274
39604,TEST_39605,1.234310,0.889913,0.919805,13.965244,31.446893,16.717344,3.196000,-26.415546,-26.411004,-22.781710,24.455350,-26.387901,-26.368649,-26.374821
39605,TEST_39606,1.224383,0.932595,0.958650,13.000281,31.024385,16.568639,3.165357,-26.527315,-26.513326,-22.840712,24.245853,-26.458563,-26.452205,-26.496909
39606,TEST_39607,1.213177,0.868279,0.926525,13.333311,31.189714,16.715612,3.168659,-26.465753,-26.466685,-22.803271,24.412309,-26.432759,-26.430030,-26.444838


In [None]:

ensemble.to_csv('ensemble_08.csv', index = False)

[Go to Top](#0)