In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from itertools import combinations

from pdpexplorer import partial_dependence, PDPExplorerWidget

In [None]:
dataset_url = 'https://gist.githubusercontent.com/DanielKerrigan/f324b392dc9a58d8bd8f8d79e1101a12/raw/c3b4760c9facfac26bcab2cd7465c4cab88ef304/bike-hour.csv'

In [None]:
df_original = pd.read_csv(dataset_url).drop(columns=['yr'])

In [None]:
df_original.head()

In [None]:
df_one_hot = pd.get_dummies(df_original, columns=['weathersit'])

In [None]:
df_X = df_one_hot.drop(columns=['cnt'])

In [None]:
y = df_original['cnt'].to_numpy()

In [None]:
regr = RandomForestRegressor(n_estimators=20)
regr.fit(df_X, y)

In [None]:
features = [col for col in df_original.columns if col != 'cnt']

In [None]:
one_hot_features = {
    'weathersit': [
        ('weathersit_1', 'clear'),
        ('weathersit_2', 'mist'),
        ('weathersit_3', 'rain'),
        ('weathersit_4', 'storm')
    ]
}

In [None]:
feature_value_mappings = {
    'season': {
        1: 'winter',
        2: 'spring',
        3: 'summer',
        4: 'fall'
    },
    'weekday': {
        0: 'S',
        1: 'M',
        2: 'T',
        3: 'W',
        4: 'R',
        5: 'F',
        6: 'S'
    }
}

In [None]:
subset = df_X.sample(1000)

In [None]:
pd_data = partial_dependence(
    predict=regr.predict,
    df=subset,
    features=features,
    one_hot_features=one_hot_features,
    feature_value_mappings=feature_value_mappings,
    resolution=20,
    n_jobs=4,
)

In [None]:
w = PDPExplorerWidget(
    predict=regr.predict,
    df=subset,
    pd_data=pd_data,
    height=650
)

w