In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

from keras.models import Sequential, load_model
from keras.layers import LSTM, GRU, Dense
from keras.layers import Dropout,Flatten
from keras.callbacks import ModelCheckpoint, EarlyStopping

Using TensorFlow backend.


In [2]:
np.random.seed(123)

## MLR Model

In [3]:
file_path = 'C:/Users/Abund/Downloads/bike_project/Bikeshare/Data/'
file_name = 'daily.csv'
df = pd.read_csv(file_path + file_name, index_col=0, header=0)

In [4]:
df.head()

Unnamed: 0_level_0,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
instant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
4,2011-01-04,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


In [5]:
# select X by ditching first column with dates
X = df.iloc[:,1:]

In [6]:
# select y by popping out count column
y = X.pop('cnt')

In [7]:
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=123)

In [8]:
y_train = y_train.values.reshape(-1,1)
y_test = y_test.values.reshape(-1,1)
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

In [9]:
sc_X = MinMaxScaler(feature_range=(0,1))
sc_y = MinMaxScaler(feature_range=(0,1))

sc_X.fit(X_train)
sc_y.fit(y_train)

y_train = sc_y.transform(y_train)
y_test = sc_y.transform(y_test)
X_train = sc_X.transform(X_train)
X_test = sc_X.transform(X_test)

In [10]:
# performing exhaustive search on my variables  using mean squared error
lr = LinearRegression()
efs = EFS(lr, min_features = 1, max_features = 13, scoring='neg_mean_squared_error',print_progress=True, cv=5)

In [11]:
# create efs fit
efs = efs.fit(X_train,y_train)

Features: 5792/8191IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Features: 8191/8191

In [12]:
print('best features:', efs.best_idx_)

best features: (5, 11, 12)


In [13]:
# subset best features for MLR
X_train_best = X_train[:,[5,11,12]]
X_test_best = X_test[:,[5,11,12]]

In [14]:
# create mlr
mlr = LinearRegression()
# fit mlr
mlr.fit(X_train_best,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [15]:
y_hat = sc_y.inverse_transform(mlr.predict(X_test_best)).flatten()
y_t = sc_y.inverse_transform(y_test).flatten()

In [16]:
predictions = pd.DataFrame({'Actual': y_t, 'Predicted': y_hat})

In [17]:
predictions.head()

Unnamed: 0,Actual,Predicted
0,6421.0,6421.0
1,3389.0,3389.0
2,2252.0,2252.0
3,3747.0,3747.0
4,4708.0,4708.0


In [18]:
print(f'''
Coefficients: {mlr.coef_}
Intercept:    {mlr.intercept_}
Mean Squared Error: {mean_squared_error(y_t, y_hat)}
''')


Coefficients: [[-1.44877549e-16  3.99390601e-01  8.11672331e-01]]
Intercept:    [4.4408921e-16]
Mean Squared Error: 2.2969912311806812e-24



## LTSM RNN Model

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, shuffle=False)

In [26]:
y_train = y_train.values.reshape(-1,1)
y_test = y_test.values.reshape(-1,1)
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

In [24]:
def createSeq(x_ds, y_ds, look_back,foresight):
    X,Y = [], []
    for i in range(len(x_ds)-look_back-foresight):
        obs = x_ds[i:(i+look_back),:]
        X.append(obs)
        Y.append(y_ds[i+(look_back+foresight),1])
    return np.array(X), np.array(Y)

In [21]:
foresights = [1,2,3,4]
look_backs = [1,2,3,4]
batch_sizes = [4, 8, 16, 32, 64]
patience_sizes = [10,20,30,40,50,75,100]
results = {}
i = 0
for foresight in foresights:
    for look_back in look_backs:
        for batches in batch_sizes:
            for patiences in patience_sizes:    
                train_x, train_y = createSeq(X_train_best,y_train,foresight,look_back)
                test_x, test_y = createSeq(X_test_best,y_test,foresight,look_back)
                lstm_model = Sequential()
                lstm_model.add(LSTM(units=30,input_shape=(train_x.shape[1],train_x.shape[2]), dropout=0.1, recurrent_dropout = 0.1, return_sequences=True))
                lstm_model.add(LSTM(units=30, return_sequences=True))
                lstm_model.add(LSTM(units=30))
                lstm_model.add(Dense(units=1, activation='linear'))
                lstm_model.compile(loss = 'mae',optimizer='adam',metrics = ['mean_absolute_error'])
                network_name = 'lstm'
                filepath = network_name + "_best_model.hdf5"
                es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=patiences)
                chk = ModelCheckpoint(filepath, monitor='val_loss',verbose=0,save_best_only=True,mode='min')
                callbacks_list = [es,chk]
                lstm_network = lstm_model.fit(train_x,train_y,validation_data=(val_x,val_y),epochs=4000,batch_size=batches,callbacks=callbacks_list,verbose=0)
                print(batches, patiences)
                _, val_acc = lstm_model.evaluate(val_x, val_y, verbose=0)
                results[i] = {'foresight':foresight,
                              'lookback':look_back,
                              'batchsize':batches,
                              'patience':patiences,
                              'val_loss': val_acc}
                i += 1 
        


IndexError: tuple index out of range