In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from monomoy import compute_widget_data, ExampleWidget

In [2]:
dataset_url = 'https://gist.githubusercontent.com/DanielKerrigan/f324b392dc9a58d8bd8f8d79e1101a12/raw/c3b4760c9facfac26bcab2cd7465c4cab88ef304/bike-hour.csv'

In [3]:
df_original = pd.read_csv(dataset_url).drop(columns=['yr'])

In [4]:
df_original

Unnamed: 0,days_since_2011,season,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,0,1,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,16
1,0,1,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,40
2,0,1,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,32
3,0,1,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,13
4,0,1,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17374,730,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,119
17375,730,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,89
17376,730,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,90
17377,730,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,61


In [5]:
df_one_hot = pd.get_dummies(df_original, columns=['weathersit'])

In [6]:
df_one_hot.head()

Unnamed: 0,days_since_2011,season,mnth,hr,holiday,weekday,workingday,temp,atemp,hum,windspeed,cnt,weathersit_1,weathersit_2,weathersit_3,weathersit_4
0,0,1,1,0,0,6,0,0.24,0.2879,0.81,0.0,16,True,False,False,False
1,0,1,1,1,0,6,0,0.22,0.2727,0.8,0.0,40,True,False,False,False
2,0,1,1,2,0,6,0,0.22,0.2727,0.8,0.0,32,True,False,False,False
3,0,1,1,3,0,6,0,0.24,0.2879,0.75,0.0,13,True,False,False,False
4,0,1,1,4,0,6,0,0.24,0.2879,0.75,0.0,1,True,False,False,False


In [7]:
df_X = df_one_hot.drop(columns=['cnt'])

In [8]:
y = df_original['cnt'].to_numpy()

In [9]:
regr = RandomForestRegressor(n_estimators=20)
regr.fit(df_X, y)

In [10]:
features = [col for col in df_original.columns if col != 'cnt']

In [11]:
one_hot_features = {
    'weathersit': [
        ('weathersit_1', 'clear'),
        ('weathersit_2', 'mist'),
        ('weathersit_3', 'rain'),
        ('weathersit_4', 'storm')
    ]
}

In [12]:
feature_value_mappings = {
    'season': {
        1: 'winter',
        2: 'spring',
        3: 'summer',
        4: 'fall'
    },
    'weekday': {
        0: 'S',
        1: 'M',
        2: 'T',
        3: 'W',
        4: 'R',
        5: 'F',
        6: 'S'
    }
}

In [13]:
subset = df_X.sample(1000)

In [14]:
labels = y[subset.index]

In [15]:
data = compute_widget_data(
    predict=regr.predict,
    df=subset,
    features=features,
    one_hot_features=one_hot_features,
    feature_value_mappings=feature_value_mappings,
    resolution=20,
    n_jobs=1
)

Calculating 12 one-way PDPs


100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 17.62it/s]


In [21]:
w = ExampleWidget(
    predict=regr.predict,
    df=subset,
    labels=labels,
    data=data,
    height=650
)

w

ExampleWidget(dataset={'days_since_2011': [255, 454, 79, 133, 54, 399, 588, 496, 378, 331, 465, 194, 701, 265,…