In [5]:
from data_preprocessing import decompress_pickle
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
import pickle

In [6]:
X_test = decompress_pickle("../data/partitioned/BikeRental_X_test.pbz2")
X_train = decompress_pickle("../data/partitioned/BikeRental_X_train.pbz2")
Y_test = decompress_pickle("../data/partitioned/BikeRental_Y_test.pbz2")
Y_train =decompress_pickle("../data/partitioned/BikeRental_Y_train.pbz2")


In [9]:
X_test.shape

(3509, 12)

In [10]:
X_train.shape

(14035, 12)

In [11]:
X_test = X_test.drop("datetime", axis = 1)
X_train = X_train.drop("datetime", axis = 1)

In [12]:
X_test.shape

(3509, 11)

In [13]:
X_train.shape

(14035, 11)

In [14]:
Y_train.shape

(14035,)

In [15]:
Y_test.shape

(3509,)

In [16]:
NN_regr_CV = MLPRegressor(solver='lbfgs', max_iter=10, random_state=0)

In [17]:
param_grid = { 
    "hidden_layer_sizes": [(1,), (2,)],
    # left out identity activation function due to its linearity
    "activation": ["logistic", "tanh"], 
    "alpha": [0.0, 0.1],
}

In [18]:
NN_regr_CV_model = GridSearchCV(estimator=NN_regr_CV, param_grid=param_grid, cv=5)

In [19]:
NN_regr_CV_model.fit(X_train, Y_train)

GridSearchCV(cv=5,
             estimator=MLPRegressor(max_iter=10, random_state=0,
                                    solver='lbfgs'),
             param_grid={'activation': ['logistic', 'tanh'],
                         'alpha': [0.0, 0.1],
                         'hidden_layer_sizes': [(1,), (2,)]})

In [20]:
print(NN_regr_CV_model.best_params_)

{'activation': 'logistic', 'alpha': 0.1, 'hidden_layer_sizes': (2,)}


In [21]:
NN_regr_CV = NN_regr_CV.set_params(**NN_regr_CV_model.best_params_)

In [22]:
NN_regr_CV.fit(X_train, Y_train)

MLPRegressor(activation='logistic', alpha=0.1, hidden_layer_sizes=(2,),
             max_iter=10, random_state=0, solver='lbfgs')

In [23]:
Y_train_pred = NN_regr_CV.predict(X_train)

In [24]:
Y_train_dev = sum((Y_train-Y_train_pred)**2)

In [25]:
Y_train_mean = Y_train.mean()
Y_train_meandev = sum((Y_train-Y_train_mean)**2)

In [26]:
r2 = 1 - Y_train_dev/Y_train_meandev
print("R2 :", r2)

R2 : 0.23825544944415122


In [27]:
Y_test_pred = NN_regr_CV.predict(X_test)

In [28]:
Y_test_dev = sum((Y_test-Y_test_pred)**2)

In [29]:
Y_test_meandev = sum((Y_test-Y_train_mean)**2)

In [30]:
pseudor2 = 1 - Y_test_dev/Y_test_meandev
print("Pseudo-R2 :", pseudor2)

Pseudo-R2 : 0.23092910557181112


In [45]:
import pandas as pd

In [57]:
r2_df = pd.DataFrame(data=[r2], columns=["r2"])


In [58]:
pseudor2_df = pd.DataFrame(data=[pseudor2], columns=["pseudor2"])


In [59]:
r_squared_values = pd.concat([r2_df, pseudor2_df], axis=1)


In [60]:
r_squared_values

Unnamed: 0,r2,pseudor2
0,0.238255,0.230929


In [61]:
optimal_parameters = pd.DataFrame(NN_regr_CV_model.best_params_)


In [62]:
optimal_parameters

Unnamed: 0,activation,alpha,hidden_layer_sizes
0,logistic,0.1,2


In [60]:
Y_test_pred

array([ 0.56395187,  0.38366951,  0.28715363, ...,  0.07505116,
        0.03568653, -0.0189239 ])

In [62]:
# create resulting dataframe (full dataframe (incl. unnormalized) for last 20%)

In [37]:
Y_test_pred

array([0.25029175, 0.25226582, 0.25356673, ..., 0.25270876, 0.25401649,
       0.2553255 ])

In [93]:
prediction_Y = pd.DataFrame(data=Y_test_pred, columns=["cnt"])
prediction_Y

Unnamed: 0,cnt
0,0.250292
1,0.252266
2,0.253567
3,0.256655
4,0.257311
...,...
3504,0.243835
3505,0.246857
3506,0.252709
3507,0.254016


In [None]:
kannst die echten Werte ausrechnen durch: X_scaled = X_std * (max - min) + min

In [108]:
max_min_cnt = decompress_pickle("../data/preprocessed/cnt_min_max.pbz2")
max_cnt = max_min_cnt[1]

KeyError: 1

In [115]:
max_cnt = max_min_cnt.iloc[0,0]

In [117]:
min_cnt = max_min_cnt.iloc[0,1]

In [116]:
max_cnt

977.0

In [118]:
min_cnt

1.0

In [120]:
norm_prediction_Y = prediction_Y * (max_cnt - min_cnt) + min_cnt

In [121]:
norm_prediction_Y

Unnamed: 0,cnt
0,245.284745
1,247.211443
2,248.481130
3,251.495380
4,252.135258
...,...
3504,238.983112
3505,241.931987
3506,247.643751
3507,248.920090


In [94]:
prediction_X = pd.DataFrame.reset_index(X_test)
prediction_X

Unnamed: 0,index,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,hum,windspeed
0,14035,3.0,1.0,8,19,0.0,1,1.0,2.0,0.755102,0.66,0.122840
1,14036,3.0,1.0,8,20,0.0,1,1.0,2.0,0.734694,0.70,0.157870
2,14037,3.0,1.0,8,21,0.0,1,1.0,2.0,0.714286,0.74,0.000000
3,14038,3.0,1.0,8,22,0.0,1,1.0,1.0,0.714286,0.74,0.122840
4,14039,3.0,1.0,8,23,0.0,1,1.0,1.0,0.714286,0.79,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
3504,17539,1.0,1.0,12,19,0.0,0,1.0,2.0,0.244898,0.60,0.193018
3505,17540,1.0,1.0,12,20,0.0,0,1.0,2.0,0.244898,0.60,0.193018
3506,17541,1.0,1.0,12,21,0.0,0,1.0,1.0,0.244898,0.60,0.193018
3507,17542,1.0,1.0,12,22,0.0,0,1.0,1.0,0.244898,0.56,0.157870


In [122]:
result_df = pd.concat([prediction_X, norm_prediction_Y], axis=1)


In [123]:
result_df

Unnamed: 0,index,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,hum,windspeed,cnt
0,14035,3.0,1.0,8,19,0.0,1,1.0,2.0,0.755102,0.66,0.122840,245.284745
1,14036,3.0,1.0,8,20,0.0,1,1.0,2.0,0.734694,0.70,0.157870,247.211443
2,14037,3.0,1.0,8,21,0.0,1,1.0,2.0,0.714286,0.74,0.000000,248.481130
3,14038,3.0,1.0,8,22,0.0,1,1.0,1.0,0.714286,0.74,0.122840,251.495380
4,14039,3.0,1.0,8,23,0.0,1,1.0,1.0,0.714286,0.79,0.000000,252.135258
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3504,17539,1.0,1.0,12,19,0.0,0,1.0,2.0,0.244898,0.60,0.193018,238.983112
3505,17540,1.0,1.0,12,20,0.0,0,1.0,2.0,0.244898,0.60,0.193018,241.931987
3506,17541,1.0,1.0,12,21,0.0,0,1.0,1.0,0.244898,0.60,0.193018,247.643751
3507,17542,1.0,1.0,12,22,0.0,0,1.0,1.0,0.244898,0.56,0.157870,248.920090


In [None]:
# de-normalize

In [None]:
# save dataframe

In [None]:
# save model

In [32]:
import os

In [35]:
os.mkdir("../python/NN_MLP_files")

In [36]:
pickle.dump(NN_regr_CV, open("../python/NN_MLP_files/NN_MLP_saved", "wb"))

In [68]:
# load model
test_load = pickle.load(open("NN_MLP_saved", "rb"))

In [70]:
test_load

MLPRegressor(activation='tanh', alpha=0.1, hidden_layer_sizes=(10,),
             max_iter=10000, random_state=0, solver='lbfgs')

In [71]:
tryout_predicting = test_load.predict(X_test)

In [72]:
tryout_predicting

array([ 0.56395187,  0.38366951,  0.28715363, ...,  0.07505116,
        0.03568653, -0.0189239 ])

In [74]:
NN_regr_CV_model.best_params_

{'activation': 'tanh',
 'alpha': 0.1,
 'hidden_layer_sizes': (10,),
 'learning_rate': 'constant'}

In [127]:
test = decompress_pickle("../python/NN_MLP_files/optimal_parameters.pbz2")


In [128]:
test

Unnamed: 0,activation,alpha,hidden_layer_sizes
0,tanh,0.1,10
