# Bike Rentals

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from itertools import combinations

import pdpexplorer
from pdpexplorer.pdp import partial_dependence

In [2]:
df_original = pd.read_csv('bike-sharing.csv').drop(columns=['yr'])

In [3]:
categories = ['weathersit']
df_one_hot = pd.get_dummies(df_original, columns=categories)

In [4]:
feature_to_one_hot = {
    original_feat: [
        (feat, feat.split('_')[-1])
        for feat in df_one_hot.columns
        if feat.startswith(original_feat)
    ]
    for original_feat in categories
}
feature_to_one_hot

{'weathersit': [('weathersit_1', '1'),
  ('weathersit_2', '2'),
  ('weathersit_3', '3')]}

In [5]:
df_X = df_one_hot.drop(columns=['label', 'prediction'])

In [6]:
y = df_original['label'].to_numpy()

In [7]:
regr = RandomForestRegressor(n_estimators=20)
regr.fit(df_X, y)

In [8]:
features = [col for col in df_original.columns if col != 'label' and col != 'prediction']
pairs = list(combinations(features, 2))

In [9]:
pd_data = partial_dependence(
    predict=regr.predict,
    df=df_X,
    one_way_features=features,
    two_way_feature_pairs=pairs,
    one_hot_features=feature_to_one_hot,
    n_instances=1000,
    resolution=20,
    n_jobs=1,
)

In [10]:
w = pdpexplorer.PDPExplorerWidget(
    predict=regr.predict,
    df=df_X,
    one_hot_features=feature_to_one_hot,
    pd_data=pd_data,
    n_jobs=8,
    height=650
)

w

PDPExplorerWidget(double_pdps=[{'num_features': 2, 'kind': 'quantitative', 'id': 'hr_season', 'x_feature': 'hr…

In [27]:
mnth = [x for x in w.single_pdps if x['id'] == 'mnth'][0]
mnth

{'num_features': 1,
 'kind': 'quantitative',
 'id': 'mnth',
 'x_feature': 'mnth',
 'x_values': [5, 6, 7, 8, 9, 10, 11, 12],
 'mean_predictions': [255.64538333333334,
  258.94533333333334,
  261.6419833333333,
  267.57809999999995,
  272.763,
  272.74665000000005,
  268.27225,
  266.3776],
 'mean_predictions_centered': [-9.850904166666652,
  -6.550954166666656,
  -3.8543041666666795,
  2.0818124999999554,
  7.266712499999983,
  7.250362500000051,
  2.7759624999999915,
  0.8813124999999786],
 'pdp_min': 255.64538333333334,
 'pdp_max': 272.763,
 'ice': {'ice_min': 2.05,
  'ice_max': 937.55,
  'centered_ice_min': -190.60000000000002,
  'centered_ice_max': 375.99999999999994,
  'mean_min': 255.01698428619193,
  'mean_max': 832.3000000000001,
  'centered_mean_min': 0.0,
  'centered_mean_max': 367.81666666666666,
  'p10_min': -16.490000000000002,
  'p90_max': 373.71,
  'clusters': [{'id': 0,
    'ice_lines': [[460.5,
      462.65,
      462.65,
      462.65,
      458.15,
      458.15,
      

In [30]:
[cluster['rules'] for cluster in mnth['ice']['clusters']]

[[{'conditions': [],
   'num_instances': 1000.0,
   'num_correct': 997.0,
   'accuracy': 0.997}],
 []]