# Bike Rentals

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from itertools import combinations

import pdpexplorer
from pdpexplorer.pdp import partial_dependence

In [2]:
df_original = pd.read_csv('bike-sharing.csv').drop(columns=['yr'])

In [3]:
categories = ['weathersit']
df_one_hot = pd.get_dummies(df_original, columns=categories)

In [4]:
df_X = df_one_hot.drop(columns=['label', 'prediction'])

In [5]:
y = df_original['label'].to_numpy()

In [6]:
regr = RandomForestRegressor(n_estimators=20)
regr.fit(df_X, y)

In [7]:
features = [col for col in df_original.columns if col != 'label' and col != 'prediction']
pairs = list(combinations(features, 2))

In [8]:
one_hot_features = {
    'weathersit': [
        ('weathersit_1', 'clear'),
        ('weathersit_2', 'mist'),
        ('weathersit_3', 'rain'),
    ]
}

In [9]:
feature_value_mappings = {
    'season': {
        1: 'winter',
        2: 'spring',
        3: 'summer',
        4: 'fall'
    },
    'weekday': {
        0: 'S',
        1: 'M',
        2: 'T',
        3: 'W',
        4: 'R',
        5: 'F',
        6: 'S'
    }
}

In [10]:
pd_data = partial_dependence(
    predict=regr.predict,
    df=df_X.sample(1000),
    features=features,
    one_hot_features=one_hot_features,
    feature_value_mappings=feature_value_mappings,
    resolution=20,
    n_jobs=4,
)

Calculating 11 one-way PDPs


100%|██████████████████████████████████████████| 11/11 [00:02<00:00,  4.58PDP/s]


Clustering one-way PDPs
Calculating 25 two-way PDPs


100%|██████████████████████████████████████████| 25/25 [00:03<00:00,  7.60PDP/s]


In [11]:
w = pdpexplorer.PDPExplorerWidget(
    predict=regr.predict,
    pd_data=pd_data,
    height=650
)

w

PDPExplorerWidget(dataset={'season': [3, 3, 4, 3, 3, 4, 3, 4, 3, 2, 3, 2, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 2, 2, …