In [1445]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.stats import spearmanr
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_squared_log_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
import torch
import torchsort
from scipy import stats
from sklearn.decomposition import PCA

In [2]:
# loading the train datasets
x_train = pd.read_csv('x_train.csv')
y_train = pd.read_csv('y_train.csv')

In [3]:
# removing the useless data
x_train = x_train.drop('DE_FR_EXCHANGE' , axis=1)
x_train = x_train.drop('FR_NET_EXPORT' , axis=1)
x_train = x_train.drop('DE_NET_EXPORT' , axis=1)

In [4]:
x_train

Unnamed: 0,ID,DAY_ID,COUNTRY,DE_CONSUMPTION,FR_CONSUMPTION,FR_DE_EXCHANGE,DE_NET_IMPORT,FR_NET_IMPORT,DE_GAS,FR_GAS,...,FR_RESIDUAL_LOAD,DE_RAIN,FR_RAIN,DE_WIND,FR_WIND,DE_TEMP,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET
0,1054,206,FR,0.210099,-0.427458,0.606523,,-0.692860,0.441238,-0.213766,...,-0.444661,-0.172680,-0.556356,-0.790823,-0.283160,-1.069070,-0.063404,0.339041,0.124552,-0.002445
1,2049,501,FR,-0.022399,-1.003452,0.022063,0.573520,1.130838,0.174773,0.426940,...,-1.183194,-1.240300,-0.770457,1.522331,0.828412,0.437419,1.831241,-0.659091,0.047114,-0.490365
2,1924,687,FR,1.395035,1.978665,-1.021305,0.622021,1.682587,2.351913,2.122241,...,1.947273,-0.480700,-0.313338,0.431134,0.487608,0.684884,0.114836,0.535974,0.743338,0.204952
3,297,720,DE,-0.983324,-0.849198,0.839586,0.270870,-0.563230,0.487818,0.194659,...,-0.976974,-1.114838,-0.507570,-0.499409,-0.236249,0.350938,-0.417514,0.911652,-0.296168,1.073948
4,1101,818,FR,0.143807,-0.617038,0.924990,,-0.990324,0.238693,-0.240862,...,-0.526267,-0.541465,-0.424550,-1.088158,-1.011560,0.614338,0.729495,0.245109,1.526606,2.614378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1489,459,809,DE,1.529204,1.106682,1.855327,0.218658,-1.450426,1.810665,1.388269,...,0.509514,,,,,,,0.876984,0.819520,1.320373
1490,1674,887,FR,1.618582,1.752840,-0.611392,-0.449153,0.152146,1.972779,1.558300,...,1.666252,,,,,,,0.932633,-0.085690,0.356356
1491,748,1083,DE,0.856399,0.489199,0.255778,1.531544,0.829568,2.108764,1.866399,...,0.358120,0.207905,0.404763,-0.594595,0.894011,0.256338,0.402316,-1.112899,-0.237835,0.067152
1492,1454,1133,FR,0.560689,-0.343777,0.830239,0.304856,-1.210230,-0.003973,0.869742,...,-0.184862,-0.682815,-0.390304,-0.972088,-1.501930,1.215528,1.338708,0.962812,-5.392852,-0.843812


In [313]:
# exctractig the bulk dataset into Germany and France
x_train_de = x_train.copy()
columnsde = x_train_de.columns
for col in columnsde:
    if (not(col=='COUNTRY')):
        x_train_de[col].fillna(x_train_de[col].mean(), inplace=True)
x_train_de = x_train_de[x_train_de['COUNTRY'] == 'DE']

x_train_fr = x_train.copy()
columnsfr = x_train_fr.columns
for col in columnsfr:
    if (not(col=='COUNTRY')):
        x_train_fr[col].fillna(x_train_fr[col].mean(), inplace=True)
x_train_fr = x_train_fr[x_train_fr['COUNTRY'] == 'FR']

In [314]:
x_train_fr

Unnamed: 0,ID,DAY_ID,COUNTRY,DE_CONSUMPTION,FR_CONSUMPTION,FR_DE_EXCHANGE,DE_NET_IMPORT,FR_NET_IMPORT,DE_GAS,FR_GAS,...,FR_RESIDUAL_LOAD,DE_RAIN,FR_RAIN,DE_WIND,FR_WIND,DE_TEMP,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET
0,1054,206,FR,0.210099,-0.427458,0.606523,0.256332,-0.692860,0.441238,-0.213766,...,-0.444661,-0.172680,-0.556356,-0.790823,-0.283160,-1.069070,-0.063404,0.339041,0.124552,-0.002445
1,2049,501,FR,-0.022399,-1.003452,0.022063,0.573520,1.130838,0.174773,0.426940,...,-1.183194,-1.240300,-0.770457,1.522331,0.828412,0.437419,1.831241,-0.659091,0.047114,-0.490365
2,1924,687,FR,1.395035,1.978665,-1.021305,0.622021,1.682587,2.351913,2.122241,...,1.947273,-0.480700,-0.313338,0.431134,0.487608,0.684884,0.114836,0.535974,0.743338,0.204952
4,1101,818,FR,0.143807,-0.617038,0.924990,0.256332,-0.990324,0.238693,-0.240862,...,-0.526267,-0.541465,-0.424550,-1.088158,-1.011560,0.614338,0.729495,0.245109,1.526606,2.614378
5,1520,467,FR,-0.295296,-0.765120,0.717490,1.117139,0.200305,1.533595,0.306422,...,-0.860628,-0.962519,-0.193837,-0.871600,-0.917234,0.102046,0.472708,0.891049,0.861408,1.124457
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1483,1776,510,FR,0.422357,-0.704613,1.019784,0.977214,-1.112333,1.147306,-0.456156,...,-0.795046,1.848129,1.021354,1.070862,0.805043,-0.322520,-0.555211,1.946355,0.867074,1.322433
1486,1401,985,FR,0.117491,0.944372,-1.171116,-1.403843,-0.499653,-0.110824,1.320758,...,0.634867,-0.371909,-0.441164,-0.666013,-0.658130,0.324165,0.829517,0.494188,1.011794,1.472650
1487,1728,905,FR,0.968724,0.459382,-0.996808,-0.101161,1.048997,2.360936,0.715357,...,0.251190,-0.291708,-0.152293,1.128147,1.697829,-2.413150,-2.069991,1.145686,0.335645,0.606318
1490,1674,887,FR,1.618582,1.752840,-0.611392,-0.449153,0.152146,1.972779,1.558300,...,1.666252,-0.037831,0.019357,0.109480,0.123099,0.009451,0.008404,0.932633,-0.085690,0.356356


In [1886]:
# extracting nd array data
xdatade = x_train_de.iloc[: , 3:].values
ydatade = y_train['TARGET'].values[x_train['COUNTRY'].values=='DE']

xdatafr = x_train_fr.iloc[: , 3:].values
ydatafr = y_train['TARGET'].values[x_train['COUNTRY'].values=='FR']

In [1841]:
# removing above 5std
outsde = []
for i in range (xdatade.shape[1]):
    zscores = np.abs(stats.zscore(xdatade[:,i]))
    for i in range(xdatade.shape[0]):
        if (zscores[i]>5):
            if not(i in outsde):
                outsde.append(i)
zscores = np.abs(stats.zscore(ydatade))
for i in range(ydatade.shape[0]):
    if (zscores[i]>5):
        if not(i in outsde):
            outsde.append(i)
xdatade = np.delete(xdatade, outsde, axis=0)
ydatade = np.delete(ydatade, outsde, axis=0)


outsfr = []
for i in range (xdatafr.shape[1]):
    zscores = np.abs(stats.zscore(xdatafr[:,i]))
    for i in range(xdatafr.shape[0]):
        if (zscores[i]>5):
            if not(i in outsfr):
                outsfr.append(i)
zscores = np.abs(stats.zscore(ydatafr))
for i in range(ydatafr.shape[0]):
    if (zscores[i]>5):
        if not(i in outsfr):
            outsfr.append(i)
xdatafr = np.delete(xdatafr, outsfr, axis=0)
ydatafr = np.delete(ydatafr, outsfr, axis=0)

In [1984]:
# removing the features with low correlation
xcompde = xdatade.copy()
delsde = []
for i in range(xcompde.shape[1]):
    corr = spearmanr(xcompde[:,i], ydatade).correlation
    if (abs(corr)<0.1):
        delsde.append(i)
xcompdefinal = np.delete(xcompde,delsde,axis=1)

xcompfr = xdatafr.copy()
delsfr = []
for i in range(xcompfr.shape[1]):
    corr = spearmanr(xcompfr[:,i], ydatafr).correlation
    if (abs(corr)<0.1):
        delsfr.append(i)
xcompfrfinal = np.delete(xcompfr,delsfr,axis=1)

In [1874]:
# using PCA to remove redundent features
xcompdefinal = (xcompdefinal - np.mean(xcompdefinal, axis=0)) / np.std(xcompdefinal, axis=0)
pca = PCA(n_components=0.95)
xcompdefinal = pca.fit_transform(xdatade)

xcompfrfinal = (xcompfrfinal - np.mean(xcompfrfinal, axis=0)) / np.std(xcompfrfinal, axis=0)
pca = PCA(n_components=0.95)
xcompfrfinal = pca.fit_transform(xdatafr)

In [1985]:
res = []
for i in range(100):
    # train-test split
    X_trainde, X_testde, Y_trainde, Y_testde = train_test_split(xcompdefinal, ydatade, test_size=0.5, random_state=i)

    X_trainfr, X_testfr, Y_trainfr, Y_testfr = train_test_split(xcompfrfinal, ydatafr, test_size=0.5, random_state=i)
    # Z-score (standardize) each column
    X_trainde = (X_trainde - np.mean(X_trainde, axis=0)) / np.std(X_trainde, axis=0)
    X_testde = (X_testde - np.mean(X_testde, axis=0)) / np.std(X_testde, axis=0)

    X_trainfr = (X_trainfr - np.mean(X_trainfr, axis=0)) / np.std(X_trainfr, axis=0)
    X_testfr = (X_testfr - np.mean(X_testfr, axis=0)) / np.std(X_testfr, axis=0)
    # model validation

    model = SVR(kernel='linear', C=10, epsilon=0.1)
    model.fit(X_trainde, Y_trainde)

    # Make predictions
    Y_predde = model.predict(X_testde)


    model = SVR(kernel='linear', C=10, epsilon=0.1)
    model.fit(X_trainfr, Y_trainfr)

    # Make predictions
    Y_predfr = model.predict(X_testfr)

    res.append(spearmanr(np.concatenate((Y_predfr, Y_predde)), np.concatenate((Y_testfr, Y_testde))).correlation)

In [1986]:
print(np.max(res))
print(np.mean(res))
print(np.min(res))

0.3386555610112837
0.2775376011511977
0.2086356093468671


In [1944]:
# train-test split
X_trainde, X_testde, Y_trainde, Y_testde = train_test_split(xcompdefinal, ydatade, test_size=0.5, random_state=17)

X_trainfr, X_testfr, Y_trainfr, Y_testfr = train_test_split(xcompfrfinal, ydatafr, test_size=0.5, random_state=17)

In [1945]:
# Z-score (standardize) each column
X_trainde = (X_trainde - np.mean(X_trainde, axis=0)) / np.std(X_trainde, axis=0)
X_testde = (X_testde - np.mean(X_testde, axis=0)) / np.std(X_testde, axis=0)

X_trainfr = (X_trainfr - np.mean(X_trainfr, axis=0)) / np.std(X_trainfr, axis=0)
X_testfr = (X_testfr - np.mean(X_testfr, axis=0)) / np.std(X_testfr, axis=0)

In [1946]:
# model validation

model = SVR(kernel='linear', C=10, epsilon=1)
model.fit(X_trainde, Y_trainde)

# Make predictions
Y_predde = model.predict(X_testde)
msede = mean_squared_error(Y_testde, Y_predde)
print('GR')
print(msede)
print(spearmanr(Y_predde, Y_testde).correlation)

model = SVR(kernel='linear', C=10, epsilon=1)
model.fit(X_trainfr, Y_trainfr)

# Make predictions
Y_predfr = model.predict(X_testfr)
msefr = mean_squared_error(Y_testfr, Y_predfr)
print('FR')
print(msefr)
print(spearmanr(Y_predfr, Y_testfr).correlation)

print('overall')
print(spearmanr(np.concatenate((Y_predfr, Y_predde)), np.concatenate((Y_testfr, Y_testde))).correlation)

GR
1.1876453758645327
0.189033846178177
FR
1.317739844116926
0.058605295589203514
overall
0.13314983969060856


In [1171]:
x_test = pd.read_csv('x_test_final.csv')

In [1172]:
# removing the useless data
x_test = x_test.drop('DE_FR_EXCHANGE' , axis=1)
x_test = x_test.drop('FR_NET_EXPORT' , axis=1)
x_test = x_test.drop('DE_NET_EXPORT' , axis=1)

In [1173]:
x_test

Unnamed: 0,ID,DAY_ID,COUNTRY,DE_CONSUMPTION,FR_CONSUMPTION,FR_DE_EXCHANGE,DE_NET_IMPORT,FR_NET_IMPORT,DE_GAS,FR_GAS,...,FR_RESIDUAL_LOAD,DE_RAIN,FR_RAIN,DE_WIND,FR_WIND,DE_TEMP,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET
0,1115,241,FR,0.340083,-0.433604,0.423521,-0.165333,-0.519419,1.524963,0.423845,...,-0.222525,-0.513180,-0.182048,-0.982546,-0.876632,0.880491,0.692242,0.569419,-0.029697,-0.929256
1,1202,1214,FR,0.803209,0.780411,-0.601610,-0.342802,-0.555367,1.064102,1.807256,...,0.857739,-0.340595,-0.301094,-0.759816,-1.221443,-0.616617,-0.737496,0.251251,0.753646,0.664086
2,1194,1047,FR,0.795540,0.721954,-1.179158,-1.620928,-0.666901,0.128004,-0.043300,...,0.447967,0.796475,-0.367248,0.376055,-0.483363,0.865138,0.120079,-1.485642,-0.326450,-0.349747
3,1084,1139,FR,0.172555,-0.723427,0.044539,,0.205276,1.046069,-0.174150,...,-0.561295,-0.542606,-0.013291,-0.791119,-0.894309,0.239153,0.457457,-0.746863,2.262654,0.642069
4,1135,842,FR,0.949714,0.420236,-0.617391,-0.608561,0.240856,1.376753,1.413967,...,0.503567,-0.230291,-0.609203,-0.744986,-1.196282,0.176557,0.312557,-2.219626,-0.509272,-0.488341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
649,879,213,DE,0.267684,-0.901332,-0.749769,-1.198565,1.614458,-0.601501,0.012450,...,-1.057186,1.591562,-0.284628,2.272121,1.647899,-0.838634,-0.463684,-1.071829,0.720641,-1.842072
650,673,943,DE,1.389207,1.466549,-0.661778,-0.610986,1.934012,0.993839,1.962333,...,1.055349,0.155454,-0.168436,1.290495,2.199105,-0.329098,-1.172309,-1.290355,-0.897009,-0.925825
651,1641,261,FR,1.377650,1.781671,-0.557006,0.326698,1.462676,3.215415,2.157089,...,1.824478,-0.825759,-0.266881,-1.247165,-0.666308,-0.807636,-1.260777,0.931572,0.151169,0.474347
652,712,1082,DE,1.232202,1.971074,-0.537247,1.109420,1.464287,2.534455,2.202872,...,1.964292,-0.145241,-0.358861,0.195383,-0.103433,1.352906,-0.485161,0.995363,-0.152287,2.067306


In [1925]:
# exctractig the bulk the test dataset into Germany and France
x_test_de = x_test.copy()
columnsde = x_test_de.columns
for col in columnsde:
    if (not(col=='COUNTRY')):
        x_test_de[col].fillna(x_test_de[col].mean(), inplace=True)
x_test_de = x_test_de[x_test_de['COUNTRY'] == 'DE']

x_test_fr = x_test.copy()
columnsfr = x_test_fr.columns
for col in columnsfr:
    if (not(col=='COUNTRY')):
        x_test_fr[col].fillna(x_test_fr[col].mean(), inplace=True)
x_test_fr = x_test_fr[x_test_fr['COUNTRY'] == 'FR']

In [1926]:
x_test_fr

Unnamed: 0,ID,DAY_ID,COUNTRY,DE_CONSUMPTION,FR_CONSUMPTION,FR_DE_EXCHANGE,DE_NET_IMPORT,FR_NET_IMPORT,DE_GAS,FR_GAS,...,FR_RESIDUAL_LOAD,DE_RAIN,FR_RAIN,DE_WIND,FR_WIND,DE_TEMP,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET
0,1115,241,FR,0.340083,-0.433604,0.423521,-0.165333,-0.519419,1.524963,0.423845,...,-0.222525,-0.513180,-0.182048,-0.982546,-0.876632,0.880491,0.692242,0.569419,-0.029697,-0.929256
1,1202,1214,FR,0.803209,0.780411,-0.601610,-0.342802,-0.555367,1.064102,1.807256,...,0.857739,-0.340595,-0.301094,-0.759816,-1.221443,-0.616617,-0.737496,0.251251,0.753646,0.664086
2,1194,1047,FR,0.795540,0.721954,-1.179158,-1.620928,-0.666901,0.128004,-0.043300,...,0.447967,0.796475,-0.367248,0.376055,-0.483363,0.865138,0.120079,-1.485642,-0.326450,-0.349747
3,1084,1139,FR,0.172555,-0.723427,0.044539,0.221057,0.205276,1.046069,-0.174150,...,-0.561295,-0.542606,-0.013291,-0.791119,-0.894309,0.239153,0.457457,-0.746863,2.262654,0.642069
4,1135,842,FR,0.949714,0.420236,-0.617391,-0.608561,0.240856,1.376753,1.413967,...,0.503567,-0.230291,-0.609203,-0.744986,-1.196282,0.176557,0.312557,-2.219626,-0.509272,-0.488341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,1341,1160,FR,-0.706132,-0.412952,0.458735,0.111726,-0.672802,1.264901,-0.014317,...,-0.691930,1.792669,-0.638401,-0.742850,-0.181850,-1.388265,-1.658348,0.419918,-0.575021,-0.246239
637,1500,599,FR,-0.459724,-0.970210,1.528448,1.955702,-1.140573,1.599161,-0.575089,...,-1.167055,-0.732933,-0.536472,-0.865626,-0.559115,-0.650628,0.408559,0.122640,0.011485,-0.950733
639,1340,854,FR,-1.531444,-0.365023,-0.324053,-0.767021,0.074149,1.277669,0.311021,...,-0.316312,0.258509,-0.373126,-0.619784,-0.816250,-1.383641,-2.158179,-0.209040,-1.750241,0.004109
651,1641,261,FR,1.377650,1.781671,-0.557006,0.326698,1.462676,3.215415,2.157089,...,1.824478,-0.825759,-0.266881,-1.247165,-0.666308,-0.807636,-1.260777,0.931572,0.151169,0.474347


In [1987]:
# extracting nd array data
xdatade = x_train_de.iloc[: , 3:].values
ydatade = y_train['TARGET'].values[x_train['COUNTRY'].values=='DE']

xdatafr = x_train_fr.iloc[: , 3:].values
ydatafr = y_train['TARGET'].values[x_train['COUNTRY'].values=='FR']

In [1915]:
# removing above 5std
outsde = []
for i in range (xdatade.shape[1]):
    zscores = np.abs(stats.zscore(xdatade[:,i]))
    for i in range(xdatade.shape[0]):
        if (zscores[i]>5):
            if not(i in outsde):
                outsde.append(i)
zscores = np.abs(stats.zscore(ydatade))
for i in range(ydatade.shape[0]):
    if (zscores[i]>5):
        if not(i in outsde):
            outsde.append(i)
xdatade = np.delete(xdatade, outsde, axis=0)
ydatade = np.delete(ydatade, outsde, axis=0)


outsfr = []
for i in range (xdatafr.shape[1]):
    zscores = np.abs(stats.zscore(xdatafr[:,i]))
    for i in range(xdatafr.shape[0]):
        if (zscores[i]>5):
            if not(i in outsfr):
                outsfr.append(i)
zscores = np.abs(stats.zscore(ydatafr))
for i in range(ydatafr.shape[0]):
    if (zscores[i]>5):
        if not(i in outsfr):
            outsfr.append(i)
xdatafr = np.delete(xdatafr, outsfr, axis=0)
ydatafr = np.delete(ydatafr, outsfr, axis=0)

In [1988]:
# removing the features with low correlation for the train set
xcompde = xdatade.copy()
delsde = []
for i in range(xcompde.shape[1]):
    corr = spearmanr(xcompde[:,i], ydatade).correlation
    if (abs(corr)<0.1):
        delsde.append(i)
xcompdefinal = np.delete(xcompde,delsde,axis=1)
xcompdefinal = (xcompdefinal - np.mean(xcompdefinal, axis=0)) / np.std(xcompdefinal, axis=0)


xcompfr = xdatafr.copy()
delsfr = []
for i in range(xcompfr.shape[1]):
    corr = spearmanr(xcompfr[:,i], ydatafr).correlation
    if (abs(corr)<0.1):
        delsfr.append(i)
xcompfrfinal = np.delete(xcompfr,delsfr,axis=1)
xcompfrfinal = (xcompfrfinal - np.mean(xcompfrfinal, axis=0)) / np.std(xcompfrfinal, axis=0)

In [1989]:
# extracting nd array data for the test
xtestde = x_test_de.iloc[: , 3:].values
xtestde = (xtestde - np.mean(xtestde, axis=0)) / np.std(xtestde, axis=0)
xtestde = np.delete(xtestde,delsde,axis=1)



xtestfr = x_test_fr.iloc[: , 3:].values
xtestfr = (xtestfr - np.mean(xtestfr, axis=0)) / np.std(xtestfr, axis=0)
xtestfr = np.delete(xtestfr,delsfr,axis=1)

In [1990]:
xcompde.shape

(643, 29)

In [1991]:
len(delsde)

20

In [1992]:
def spearman_loss_lgb(ytrue, ypred):
    
    def corrcoef(target, pred):
        pred_n = pred - pred.mean()
        target_n = target - target.mean()
        pred_n = pred_n / pred_n.norm()
        target_n = target_n / target_n.norm()
        return -1*(pred_n * target_n).sum()

    def differentiable_spearman(target, pred, regularization="l2", regularization_strength=1.0,):
        pred = torchsort.soft_rank(
            pred,
            regularization=regularization,
            regularization_strength=regularization_strength,
        )
        return corrcoef(target, pred / pred.shape[-1])
    
    lenypred = ypred.shape[0]
    lenytrue = ytrue.shape[0]

    ypred_th = torch.tensor(ypred.reshape(1, lenypred), requires_grad=True)
    ytrue_th = torch.tensor(ytrue.reshape(1, lenytrue))

    loss = differentiable_spearman(ytrue_th, ypred_th, regularization_strength=1e-2)
    # print(f'Current loss:{loss}')

    # calculate gradient and convert to numpy
    loss_grads = torch.autograd.grad(loss, ypred_th)[0]
    loss_grads = loss_grads.to('cpu').detach().numpy()

    # return gradient and ones instead of Hessian diagonal
    return loss_grads[0], np.ones(loss_grads.shape)[0]

In [1993]:
# Make predictions DE
model = SVR(kernel='linear', C=10, epsilon=0.1)
model.fit(xcompdefinal, ydatade)
ypredde = model.predict(xtestde)

# Make predictions FR
model = SVR(kernel='linear', C=10, epsilon=0.1)
model.fit(xcompfrfinal, ydatafr)
ypredfr = model.predict(xtestfr)

In [1994]:
final = pd.read_csv('y_test_random_final.csv')

In [1995]:
fr = 0
de = 0
for i in range (final.shape[0]):
    if x_test['COUNTRY'].values[i]=='FR':
        final.iloc[i,1] = ypredfr[fr]
        fr = fr + 1
    else :
        final.iloc[i,1] = ypredde[de]
        de = de + 1

In [1996]:
final

Unnamed: 0,ID,TARGET
0,1115,-0.024293
1,1202,0.048497
2,1194,-0.083432
3,1084,0.034615
4,1135,-0.071057
...,...,...
649,879,-0.542389
650,673,-0.160914
651,1641,0.038590
652,712,0.361037


In [1997]:
final.to_csv('y_pred.csv' , index = False)

In [1998]:
final.describe()

Unnamed: 0,ID,TARGET
count,654.0,654.0
mean,1075.192661,-0.021874
std,625.699109,0.202357
min,1.0,-1.005675
25%,528.5,-0.082634
50%,1060.5,-0.010327
75%,1631.5,0.071392
max,2147.0,0.602067


In [1999]:
y_train.describe()

Unnamed: 0,ID,TARGET
count,1494.0,1494.0
mean,1072.759036,0.089934
std,618.013179,1.034582
min,0.0,-6.519268
25%,540.25,-0.219861
50%,1077.5,0.0
75%,1597.5,0.269719
max,2146.0,7.786578
