In [1]:
# get bike data
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import pandas as pd
data_path = 'Bike-Sharing-Dataset/hour.csv'

rides = pd.read_csv(data_path)


In [2]:
dummy_fields = ['season', 'weathersit', 'mnth', 'hr', 'weekday']
for each in dummy_fields:
    dummies = pd.get_dummies(rides[each], prefix=each, drop_first=False)
    rides = pd.concat([rides, dummies], axis=1)

fields_to_drop = ['instant', 'dteday', 'season', 'weathersit', 
                  'weekday', 'atemp', 'mnth', 'workingday', 'hr']
data = rides.drop(fields_to_drop, axis=1)


In [3]:
quant_features = ['casual', 'registered', 'cnt', 'temp', 'hum', 'windspeed']
# Store scalings in a dictionary so we can convert back later
scaled_features = {}
for each in quant_features:
    mean, std = data[each].mean(), data[each].std()
    scaled_features[each] = [mean, std]
    data.loc[:, each] = (data[each] - mean)/std

In [4]:
# Save the last 21 days 
test_data = data[-21*24:]
data = data[:-21*24]

# Separate the data into features and targets
target_fields = ['cnt', 'casual', 'registered']
features, targets = data.drop(target_fields, axis=1), data[target_fields]
test_features, test_targets = test_data.drop(target_fields, axis=1), test_data[target_fields]

In [5]:
# Hold out the last 60 days of the remaining data as a validation set
train_features, train_targets = features[:-60*24], targets[:-60*24]
val_features, val_targets = features[-60*24:], targets[-60*24:]



In [6]:
import numpy as np
import matplotlib.pyplot as plt
import math

class Deep(object):    
    def __init__(self, sizes, actype='R'):
            self.depth = len(sizes)-1
            weights = []
            biases = []
            for m,n in zip(sizes[:-1],sizes[1:]):
                weights.append(np.random.randn(m,n)/np.sqrt(m))
                biases.append(np.zeros(n))
            self.weights = weights
            self.biases = biases
            self.actype = actype
        
    def act(self,u,c=1):
        if self.actype == 'R':
            return np.maximum(u,0)
        else:
            return 1./(1.+np.exp(-c*u))
        
    def act_diff(self,u,c=1):
        if self.actype == 'R':
            return (u>0)
        else:
            return c*np.exp(-c*u)/(1+np.exp(-c*u))**2
                
    def loss(self, X , y=None, reg=0.0):
        weights = self.weights
        biases = self.biases
        depth = self.depth
        pas=[np.dot(X,weights[0])+biases[0]]
        
        #Pass into the next layers
        for i in range(1,depth):
            pas.append(np.dot(self.act(pas[-1]),weights[i])+biases[i])
        
        #Calculate the loss function
        out = pas[-1]    
        L = np.mean((out-y)**2)
        
        L += reg * sum([np.linalg.norm(weights[i])**2 for i in range(depth)])
        weights_grad = []
        biases_grad = []
        
        #NOTE: it is not exactly the gradient of the loss function
        grad = 2*(out-y)

        #Backprogation here
        for i in range(depth-1,0,-1):
            weights_grad.append(self.act(pas[i-1]).T.dot(grad))
            biases_grad.append(sum(grad,0))
            grad = grad.dot(weights[i].T)*self.act_diff(pas[i-1])
            
        #The last one is calculated a bit differently    
        weights_grad.append(X.T.dot(grad))
        biases_grad.append(sum(grad,0))
        
        #Reorder the grads
        grads = {'weights': weights_grad[: : -1], 'biases': biases_grad[: : -1]}     

        return L, grads
    

    def train(self, X, y, X_val, y_val,
              learning_rate=1e-3, learning_rate_decay=0.95,
              reg=5e-6, epochs = 1000,
              batch_size=200, verbose=False):
        losses = {'train':[],'validation':[]}
        depth = self.depth
        data_size = len(X)
        perm = np.arange(data_size)
        iter_epoch = int(max(data_size/batch_size,1))
        n_iter = epochs * iter_epoch
        start =0 
        epoch =1
        for it in range(n_iter):
            X_batch = X[perm[start:start+batch_size],:]
            y_batch = y[perm[start:start+batch_size]]
            L, grads = self.loss(X_batch,y_batch,reg)

            for i in range(depth):
                self.weights[i] -= learning_rate*grads['weights'][i]
                self.biases[i] -= learning_rate*grads['biases'][i]
        
            if start + batch_size> data_size:
                training_accuracy = np.mean((self.predict(X)-y)**2)
                vali_accuracy = np.mean((self.predict(X_val) - y_val)**2)
                sys.stdout.write('\r Epoch: %d... Loss: %f ... training accuracy: %f...validation accuracy: %f ' %(epoch, L,training_accuracy,vali_accuracy))
                losses['train'].append(training_accuracy)    
                losses['validation'].append(vali_accuracy)

                start =0
                epoch += 1
                np.random.shuffle(perm)
            else:
                start = start + batch_size
            
        return losses
    
    def predict(self,X):
        pas = np.dot(X,self.weights[0])+self.biases[0]
        for i in range(1,self.depth):
            pas = np.dot(self.act(pas),self.weights[i])+self.biases[i]
        return pas



In [None]:
import sys


X = train_features.values
y = np.array(train_targets['cnt'].values,ndmin=2).T

X_val = val_features.values
y_val = np.array(val_targets['cnt'].values,ndmin=2).T

input_size = X.shape[1]
sizes=[input_size,40, 5, 2, 1]
learning_rate=1e-3
learning_rate_decay=1.
reg=5e-6 
#reg=0.
epochs = 10
batch_size=200

In [None]:
bike_deep= Deep(sizes,actype='R')
L =bike_deep.train(X,y,X_val,y_val,learning_rate=learning_rate, reg = reg,verbose=True)

 Epoch: 437... Loss: 0.026549 ... training accuracy: 0.029392...validation accuracy: 0.166970 

In [None]:
losses =L
plt.plot(losses['train'], label='Training loss')
plt.plot(losses['validation'], label='Validation loss')
plt.legend()
plt.ylim(ymax=.5)

In [None]:
fig, ax = plt.subplots(figsize=(8,4))

mean, std = scaled_features['cnt']
predictions = bike_deep.predict(test_features.values)*std + mean

ax.plot(predictions, label='Prediction')
ax.plot((test_targets['cnt']*std + mean).values, label='Data')
ax.set_xlim(right=len(predictions))
ax.legend()

dates = pd.to_datetime(rides.iloc[test_data.index]['dteday'])
dates = dates.apply(lambda d: d.strftime('%b %d'))
ax.set_xticks(np.arange(len(dates))[12::24])
_ = ax.set_xticklabels(dates[12::24], rotation=45)