# Regression Performance Dashboard for Bike Sharing Dataset

In [2]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor

from evidently.dashboard import Dashboard
from evidently.tabs import DriftTab, NumTargetDriftTab, RegressionPerformanceTab

## Bike Sharing Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [3]:
raw_data = pd.read_csv('Bike-Sharing-Dataset/day.csv', header = 0, sep = ',', parse_dates=['dteday'])

In [12]:
ref_data = raw_data[:120]
prod_data = raw_data[120:150]

In [29]:
ref_data

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,prediction
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985,1105.7
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801,1121.2
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349,1386.4
3,4,2011-01-04,1,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,108,1454,1562,1524.4
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,82,1518,1600,1615.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,116,2011-04-26,2,0,4,0,2,1,1,0.631667,0.594083,0.729167,0.326500,678,3722,4400,4226.4
116,117,2011-04-27,2,0,4,0,3,1,2,0.620000,0.575142,0.835417,0.312200,547,3325,3872,3959.7
117,118,2011-04-28,2,0,4,0,4,1,2,0.617500,0.578929,0.700833,0.320908,569,3489,4058,3897.4
118,119,2011-04-29,2,0,4,0,5,1,1,0.510000,0.497463,0.457083,0.240063,878,3717,4595,3714.4


## Regression Model

In [20]:
target = 'cnt'
datetime = 'dteday'

numerical_features = ['mnth', 'temp', 'atemp', 'hum', 'windspeed']
categorical_features = ['season', 'holiday', 'weekday', 'workingday', 'weathersit',]

features = numerical_features + categorical_features

In [21]:
model = RandomForestRegressor(random_state = 0)

In [22]:
model.fit(ref_data[features], ref_data[target])

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

In [23]:
ref_data['prediction']  = model.predict(ref_data[features])
prod_data['prediction'] = model.predict(prod_data[features])

## Regression Perfomance Report

In [24]:
column_mapping = {}

column_mapping['target'] = target
column_mapping['prediction'] = 'prediction'
column_mapping['datetime'] = datetime

column_mapping['numerical_features'] = numerical_features
column_mapping['categorical_features'] = categorical_features

In [25]:
dashboard = Dashboard(ref_data, prod_data, column_mapping=column_mapping, tabs=[RegressionPerformanceTab])

In [26]:
dashboard.show()

In [28]:
dashboard.save('regression_perfomance_mnth.html')