## Modeling

In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import mean_squared_error
import joblib 
import pipeline

np.random.seed(42)
sns.set(rc={'figure.figsize':[7,7]},font_scale=1.2)

In [2]:
df_train=pd.read_csv('training_data.csv')
df_test=pd.read_csv('testing_data.csv')

**Split data**

In [3]:
X_train=df_train.drop(['casual','registered','count'],axis=1) 
y_train=df_train[['casual','registered']]

In [4]:
X_test=df_test.drop(['casual','registered','count'],axis=1) 
y_test=df_test[['casual','registered']]

**import pipeline**

In [5]:
feature_engineering=pipeline.feature_engineering()
select_features=pipeline.select_features()
preprocessor=joblib.load('preprocessor.pickle')

In [6]:
pipe=Pipeline(steps=[
                       ('feature_engineering',pipeline.feature_engineering()),
                       ('select_features',pipeline.select_features()),
                       ('preprocessor',preprocessor),
                       ])

In [7]:
columns=['temp','humidity','windspeed','season','holiday', 'workingday', 'weather','hour' ,'month_name', 'day_of_week','is_rush_hour','is_weekend']

In [8]:
X_train_trans=pd.DataFrame(pipe.transform(X_train),columns=columns)
X_train_trans

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col]=X[col].astype('float64')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col]=X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col]=X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Unnamed: 0,temp,humidity,windspeed,season,holiday,workingday,weather,hour,month_name,day_of_week,is_rush_hour,is_weekend
0,-0.810848,0.218929,-0.834323,3.0,0.0,0.0,1.0,20.0,9.0,2.0,0.0,1.0
1,0.982374,1.100479,-0.834323,0.0,0.0,1.0,0.0,5.0,5.0,6.0,0.0,0.0
2,2.142695,-1.181181,0.868389,0.0,0.0,0.0,0.0,16.0,1.0,2.0,1.0,1.0
3,1.193342,-0.558910,-1.564639,0.0,0.0,0.0,0.0,11.0,1.0,2.0,0.0,1.0
4,-0.705364,1.619038,-0.712876,2.0,0.0,0.0,1.0,4.0,0.0,3.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8703,-1.865685,-0.403342,-0.712876,1.0,0.0,0.0,0.0,3.0,4.0,2.0,0.0,1.0
8704,-1.127299,-0.040351,0.746941,3.0,0.0,0.0,0.0,9.0,2.0,2.0,1.0,1.0
8705,-1.127299,-0.714478,0.503231,3.0,0.0,0.0,0.0,16.0,2.0,3.0,1.0,1.0
8706,-0.599881,-2.322010,2.693364,1.0,0.0,0.0,0.0,7.0,3.0,2.0,1.0,1.0


In [9]:
model=RandomForestRegressor(random_state=100,n_estimators=50,max_features=1)

In [10]:
from sklearn.model_selection import RandomizedSearchCV
from scipy import stats
param_distributions = {
    'n_estimators': [50, 100, 200],
    'max_features': stats.randint(low=1,high=12),
     }

In [11]:
random_search = RandomizedSearchCV(model, param_distributions=param_distributions, n_iter=20)
random_search.fit(X_train_trans, y_train)

In [12]:
random_search.best_params_

{'max_features': 5, 'n_estimators': 200}

In [13]:
random_search.best_score_

0.8690175665230562

In [14]:
model=RandomForestRegressor(n_estimators=200,max_features=5,random_state=100)

In [15]:
model.fit(X_train_trans,y_train)

In [16]:
train_preds=model.predict(X_train_trans)

In [17]:
mean_squared_error(train_preds,y_train)

238.06400812278355

In [18]:
model.score(X_train_trans,y_train)

0.9822677845975722

In [19]:
X_test_trans=pd.DataFrame(pipe.transform(X_test),columns=columns)
X_test_trans

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col]=X[col].astype('float64')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col]=X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col]=X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Unnamed: 0,temp,humidity,windspeed,season,holiday,workingday,weather,hour,month_name,day_of_week,is_rush_hour,is_weekend
0,1.720760,-0.144063,-1.564639,0.0,0.0,1.0,0.0,11.0,5.0,5.0,0.0,0.0
1,-2.076652,-0.403342,-0.834323,1.0,1.0,0.0,0.0,6.0,4.0,1.0,0.0,0.0
2,-0.177946,0.789343,0.016625,2.0,0.0,1.0,0.0,4.0,8.0,4.0,0.0,0.0
3,1.193342,0.011505,0.016625,0.0,0.0,1.0,1.0,10.0,1.0,6.0,0.0,0.0
4,-0.494397,0.737488,-0.469980,3.0,1.0,0.0,0.0,3.0,9.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2172,-1.021816,-0.507054,-1.564639,1.0,0.0,1.0,1.0,23.0,3.0,4.0,0.0,0.0
2173,0.454956,-0.662622,0.746941,2.0,0.0,0.0,2.0,17.0,8.0,3.0,1.0,1.0
2174,-0.916332,0.218929,1.112099,1.0,0.0,1.0,2.0,5.0,4.0,5.0,0.0,0.0
2175,1.298825,0.218929,-0.469980,0.0,0.0,1.0,0.0,2.0,5.0,6.0,0.0,0.0


In [20]:
test_preds=model.predict(X_test_trans)

In [21]:
mean_squared_error(test_preds,y_test)

1870.8219294702217

In [22]:
model.score(X_test_trans,y_test)

0.8630340141218897

**That's Good**