# Gradient Boost 

In [2]:
# Importing libraries
#Importing all libraries 
import os 
import csv
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np
import joblib 
import sklearn 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error as MSE
from sklearn.ensemble import GradientBoostingRegressor as GBR

In [3]:
# Data Files 
testFile='./data/test.csv'
trainFile='./data/train.csv'
df=pd.read_csv(trainFile)
df.head()


Unnamed: 0,latitude,longitude,year,month,day,cp,e,ilspf,lsp,pev,tcrw,tp,swvl1,swvl2,ro
0,17.75,77.25,2019,5,30,0.0,-0.000114,0.0,0.0,-0.001029,0.0,0.0,0.206655,0.219927,0.0
1,17.0,77.25,2018,10,23,1e-05,-0.000437,0.0,0.0,-0.000713,0.00196,1e-05,0.334029,0.33247,4.875474e-07
2,17.25,77.0,2019,11,25,0.0,-0.000361,0.0,0.0,-0.000659,0.0,0.0,0.279484,0.327522,3.046822e-06
3,17.0,77.25,2018,10,2,4e-05,-0.000458,0.0,1.945253e-05,-0.000642,0.002063,6e-05,0.38543,0.34014,4.875474e-07
4,17.75,78.0,2018,11,3,2e-06,-0.000173,0.0,4.139729e-07,-0.000341,0.0,2e-06,0.238017,0.265713,1.95019e-06


In [4]:
def preProcessing(dataframe):
    #Split into X,y 
    #Scale them
    df1=dataframe[['cp','e','ilspf','lsp','pev','tcrw','tp','swvl1','swvl2','ro']]
    X = df1.iloc[:,:8].values.astype(float)
    y = df1.iloc[:,-1].values.astype(float)
    X=np.reshape(X,(-1,8))
    y=np.reshape(y,(-1,1))
    sc_X = StandardScaler()
    sc_y = StandardScaler()
    X = sc_X.fit_transform(X)
    y = sc_y.fit_transform(y)
    return(X,y,sc_X,sc_y)

In [5]:
# Training it on the file.
X,y,sx,sy=preProcessing(df)
print(X.shape)
print(y.shape)

(17337, 8)
(17337, 1)


In [6]:
# Testing 
X_test,y_test,sxt,syt=preProcessing(pd.read_csv(testFile))
print(X_test.shape)
print(y_test.shape)

(913, 8)
(913, 1)


# GBR Training

In [8]:
params = {'n_estimators': 70,
          'max_depth': 5,
          'min_samples_split': 10,
          'learning_rate': 0.0001}

reg =GBR(**params)
model=reg.fit(X,y.ravel())

In [9]:
#Testing Loss Generation. 
# Loss Generation. 
y_pred_test = sy.inverse_transform((reg.predict(X_test)))
loss=1/913*(MSE(sy.inverse_transform(y_test),y_pred_test))
print(loss)

1.174910528259821e-11


In [10]:
# Storing the files 
joblib.dump(model,str('./model_files/GradientBoost')+'.sav')

['./model_files/GradientBoost.sav']

In [11]:
model1=joblib.load('./model_files/GradientBoost.sav')
y_pred_test_1 = sy.inverse_transform((model1.predict(sx.transform(X_test))))
df1=pd.DataFrame(y_pred_test_1)
df1.to_csv('results.csv')
