In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [3]:
df_train.head(3)

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,male,36,189.0,82.0,26.0,101.0,41.0,150.0
1,1,female,64,163.0,60.0,8.0,85.0,39.7,34.0
2,2,female,51,161.0,64.0,7.0,84.0,39.8,29.0


In [4]:
df_test.head(3)

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,750000,male,45,177.0,81.0,7.0,87.0,39.8
1,750001,male,26,200.0,97.0,20.0,101.0,40.5
2,750002,female,29,188.0,85.0,16.0,102.0,40.4


In [5]:
df_train.drop('id',axis=1,inplace=True)

In [6]:
df_test.drop('id',axis=1,inplace=True)

In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
label = LabelEncoder()

In [9]:
df_train['Sex'] = label.fit_transform(df_train['Sex'])
df_test['Sex'] = label.transform(df_test['Sex'])

In [10]:
from sklearn.linear_model import LinearRegression

In [11]:
model = LinearRegression()

In [12]:
from sklearn.preprocessing import PolynomialFeatures

In [13]:
poly = PolynomialFeatures()

In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
scaler = StandardScaler()

In [16]:
from sklearn.pipeline import Pipeline

In [17]:
pipe = Pipeline([('scaler',scaler),('poly',poly),('model',model)])

In [18]:
pipe

In [19]:
from sklearn.model_selection import GridSearchCV

In [20]:
params= {
    'poly__degree' : [1,2,3,4,5]
}

In [21]:
final_model = GridSearchCV(pipe,param_grid=params,verbose=2,cv=3)

In [22]:
X_train = df_train.drop('Calories',axis = 1)
y_train = df_train['Calories']
X_test = df_test

In [23]:
final_model.fit(X_train,y_train)

Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] END .....................................poly__degree=1; total time=   0.4s
[CV] END .....................................poly__degree=1; total time=   0.3s
[CV] END .....................................poly__degree=1; total time=   0.3s
[CV] END .....................................poly__degree=2; total time=   1.7s
[CV] END .....................................poly__degree=2; total time=   1.9s
[CV] END .....................................poly__degree=2; total time=   1.5s
[CV] END .....................................poly__degree=3; total time=   8.7s
[CV] END .....................................poly__degree=3; total time=   7.7s
[CV] END .....................................poly__degree=3; total time=   7.8s
[CV] END .....................................poly__degree=4; total time=  23.6s
[CV] END .....................................poly__degree=4; total time=  22.0s
[CV] END .....................................pol

In [24]:
final_model.best_params_

{'poly__degree': 5}

In [25]:
final_model.best_score_

np.float64(0.9966876819778593)

In [26]:
y_pred = final_model.predict(X_test)

In [27]:
df_sub = pd.read_csv('sample_submission.csv')

In [28]:
df_sub['Calories'] = y_pred

In [30]:
df_sub['Calories'] = np.clip(df_sub['Calories'],0,None)

In [31]:
df_sub.to_csv('submission2.csv',index=False,index_label=False)

In [47]:
df_csv = pd.read_csv('sub4.csv')

In [None]:
from sklearn.linear_model import SGDRegressor

In [130]:
model = SGDRegressor()

In [131]:
scaler = StandardScaler()

In [132]:
pipe = Pipeline([('scaler',scaler),('model',model)])

In [133]:
pipe

In [134]:
params = {
    'model__penalty' : ['l2', 'l1', 'elasticnet', None],
    'model__alpha' : [0.001,0.01,0.1,1],
    'model__max_iter' : [100,500,1000],
    'model__eta0' : [0.001,0.01,0.1,1],
    'model__early_stopping' : [True, False]
}

In [135]:
final_model = GridSearchCV(pipe,param_grid=params,verbose=2,cv=3)

In [136]:
X_train = df_train.drop('Calories',axis = 1)
y_train = df_train['Calories']
X_test = df_test

In [137]:
final_model.fit(X_train,y_train)

Fitting 3 folds for each of 384 candidates, totalling 1152 fits
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=l2; total time=   0.9s
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=l2; total time=   0.9s
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=l2; total time=   0.8s
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=l1; total time=   1.2s
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=l1; total time=   1.2s
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=l1; total time=   1.2s
[CV] END model__alpha=0.001, model__early_stopping=True, model__eta0=0.001, model__max_iter=100, model__penalty=elasticnet; to



[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l2; total time=  11.3s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l2; total time=   9.3s




[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  15.4s




[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  15.1s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=   7.9s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  15.4s




[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  16.0s




[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  12.1s




[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.7s




[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.8s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  17.1s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  18.7s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=   8.0s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  12.0s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  19.5s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  15.8s
[CV] END model__alpha=0.001, model__early_stopping=False, model__eta0=1, m



[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l2; total time=  11.4s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=   7.2s




[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  14.7s




[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  14.7s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  14.3s




[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  14.8s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  14.0s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=   9.7s




[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  12.0s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=   9.8s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  13.2s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  10.2s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  11.6s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  17.9s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  16.0s
[CV] END model__alpha=0.01, model__early_stopping=False, model__eta0=1, model__



[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l2; total time=  11.6s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l2; total time=   9.5s




[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  15.5s




[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  15.2s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  11.8s




[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  16.0s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  12.8s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  14.8s




[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.3s




[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.1s




[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.0s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  14.1s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  14.9s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  12.0s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  16.9s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  11.6s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  12.5s
[CV] END model__alpha=0.1, model__early_stopping=False, model__eta0=1, model__max_iter=5



[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  15.5s




[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=  15.4s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=l1; total time=   9.4s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=   9.8s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  13.6s




[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=elasticnet; total time=  15.7s




[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.1s




[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.2s




[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=100, model__penalty=None; total time=  11.1s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=   9.7s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=   7.0s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l2; total time=  13.0s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  11.2s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  17.2s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penalty=l1; total time=  18.9s
[CV] END model__alpha=1, model__early_stopping=False, model__eta0=1, model__max_iter=500, model__penal

In [138]:
final_model.best_params_

{'model__alpha': 0.01,
 'model__early_stopping': False,
 'model__eta0': 0.001,
 'model__max_iter': 1000,
 'model__penalty': None}

In [139]:
final_model.best_score_

np.float64(0.9683647530799421)

In [140]:
df_sub = pd.read_csv('sample_submission.csv')

In [141]:
y_pred3 = final_model.predict(X_test)

In [142]:
df_sub['Calories'] = y_pred3

In [None]:
df_sub.to_csv('final_.csv',in)

In [24]:
df_train.corr()

Unnamed: 0,Age,Duration,Heart_Rate,Body_Temp,Calories
Age,1.0,0.015656,0.017037,0.030275,0.145683
Duration,0.015656,1.0,0.875327,0.903066,0.959908
Heart_Rate,0.017037,0.875327,1.0,0.795972,0.908748
Body_Temp,0.030275,0.903066,0.795972,1.0,0.828671
Calories,0.145683,0.959908,0.908748,0.828671,1.0


In [68]:
X_train = df_train.drop('Calories',axis = 1)
y_train = df_train['Calories']
X_test = df_test

In [69]:
scaler = StandardScaler()

In [70]:
model = SGDRegressor(max_iter=100)

In [71]:
pipe = Pipeline([('scaler', scaler), ('model', model)])

In [72]:
pipe

In [73]:
pipe.fit(X_train, y_train)

In [74]:
y_pred = pipe.predict(X_test)

In [75]:
df_sub['Calories'] = y_pred

In [76]:
df_sub.to_csv('sub5.csv',index=False,index_label=False)