In [0]:
!pip install --upgrade tables
!pip install eli5

In [0]:
import numpy as np
import pandas as pd

from sklearn.dummy import DummyRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import mean_absolute_error as mae
from sklearn.model_selection import cross_val_score

import eli5
from eli5.sklearn import PermutationImportance

In [0]:
cd /content/drive/My Drive/Colab Notebooks/matrix/matrix_two/dw_matrix_car

## Wczytywanie danych

In [0]:
df = pd.read_hdf('data/car.h5')
df.shape

In [0]:
df.columns

## Dummy model

In [0]:
df.select_dtypes(np.number).columns

In [0]:
feats = ['car_id']
X = df[ feats ].values
y  = df['price_value'].values

model = DummyRegressor()
model.fit(X, y)

y_pred = model.predict(X)
mae(y, y_pred)

In [0]:
[c for c in df.columns if 'price' in c]

In [0]:
df['price_currency'].value_counts()

In [0]:
df = df[df['price_currency'] != 'EUR']
df.shape

## Features

In [0]:
SUFFIX_CAT = '__cat'

for feat in df.columns:
  if isinstance(df[feat][0], list): continue

  factorized_values = df[feat].factorize()[0]
  if SUFFIX_CAT in feat:
    df[feat] = factorized_values
  else:
    df[feat + SUFFIX_CAT] = factorized_values

In [0]:
cat_feats = [c for c in df.columns if SUFFIX_CAT in c]
cat_feats = [c for c in cat_feats if 'price' not in c]
len(cat_feats)

In [0]:
X = df[ cat_feats ].values
y  = df['price_value'].values

model = DecisionTreeRegressor(max_depth=5)
scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_absolute_error')
np.mean(scores)

In [0]:
m = DecisionTreeRegressor(max_depth=5)
m.fit(X, y)

imp = PermutationImportance(m, random_state=0).fit(X, y)
eli5.show_weights(imp, feature_names=cat_feats)

In [0]:
def group_and_barplot(feat_groupby, feat_agg='price_value', agg_funcs=[np.mean, np.median, np.size], feat_sort='mean',top=50, subplots=True):
  return (df
  .groupby(feat_groupby)[feat_agg]
  .agg(agg_funcs)
  .sort_values(by=feat_sort,ascending=False)
  .head(top)
  ).plot(kind='bar', figsize=(15, 5), subplots=subplots);

group_and_barplot('param_napęd');
