In [4]:
!pip install --upgrade tables
!pip install eli5
!pip install xgboost

Requirement already up-to-date: tables in /usr/local/lib/python3.6/dist-packages (3.6.1)


In [0]:
import pandas as pd
import numpy as np
from sklearn.dummy import DummyRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error as mae
from sklearn.model_selection import cross_val_score, KFold
import eli5
from eli5.sklearn import PermutationImportance
import xgboost as xgb


In [11]:
cd "/content/drive/My Drive/Colab Notebooks/matrix/matrix_two"

/content/drive/My Drive/Colab Notebooks/matrix/matrix_two


In [56]:
df =pd.read_hdf('data/car.h5')
df.shape
df = df[df['price_currency'] != 'EUR']
df.shape

(106290, 155)

In [0]:
SUFIX_CAT = '__cat'
for feat in df.columns:
  if isinstance(df[feat][0], list): continue
  factorized_valus = df[feat].factorize()[0]
  if SUFIX_CAT in feat:
    df[feat] = factorized_valus
  else:
    df[feat + SUFIX_CAT] = factorized_valus

In [58]:
cat_feats = [x for x in df.columns if SUFIX_CAT in x]
cat_feats = [x for x in cat_feats if 'price' not in x]
len(cat_feats)

151

In [59]:
X = df[cat_feats].values
Y = df['price_value'].values

model = DecisionTreeRegressor(max_depth = 5)
scores = cross_val_score(model, X, Y, cv =3, 
                         scoring = 'neg_mean_absolute_error' )
np.mean(scores), np.std(scores)

(-19566.588937368328, 90.61814865165907)

In [0]:
def run_model(model, feats):
  X = df[feats].values
  y = df['price_value'].values

  #model = DecisionTreeRegressor(max_depth = 5)
  scores = cross_val_score(model, X, y, cv =3, 
                          scoring = 'neg_mean_absolute_error' )
  return np.mean(scores), np.std(scores)

In [26]:
#Decision Tree
run_model(DecisionTreeRegressor(max_depth = 5), cat_feats)

(-19566.588937368324, 90.6181486516617)

In [27]:
#Random Forest
model = RandomForestRegressor(max_depth= 5, n_estimators= 50, 
                              random_state=0)
run_model(model, cat_feats)



(-18734.2072708522, 109.87074106274046)

In [31]:
#XGBoost
xgb_params= {
    'max_depth' : 5, 
    'n_estimators':50, 
    'lerning_rate' :0.1,
    'seed' : 0
}
run_model(xgb.XGBRegressor(**xgb_params), feats)




(-13039.290196724838, 109.36715375706265)

In [32]:
m = xgb.XGBRegressor(max_depth=5, n_estimators=50, 
                     learning_rate=0.1, seed=0)
m.fit(X,Y)

imp = PermutationImportance(m, random_state=0).fit(X,Y)
eli5.show_weights(imp, feature_names = cat_feats)



Weight,Feature
0.1209  ± 0.0019,param_napęd__cat
0.1175  ± 0.0030,param_rok-produkcji__cat
0.1113  ± 0.0013,param_stan__cat
0.0625  ± 0.0019,param_skrzynia-biegów__cat
0.0527  ± 0.0016,param_faktura-vat__cat
0.0461  ± 0.0015,param_moc__cat
0.0275  ± 0.0008,param_marka-pojazdu__cat
0.0230  ± 0.0004,param_typ__cat
0.0227  ± 0.0007,feature_kamera-cofania__cat
0.0191  ± 0.0007,param_pojemność-skokowa__cat


In [60]:
feats = ['param_napęd__cat','param_rok-produkcji__cat','param_stan__cat','param_skrzynia-biegów__cat','param_faktura-vat__cat','param_moc__cat','param_marka-pojazdu__cat','param_typ__cat','feature_kamera-cofania__cat','param_pojemność-skokowa__cat','seller_name__cat','param_kod-silnika__cat','param_model-pojazdu__cat','feature_wspomaganie-kierownicy__cat','param_wersja__cat','feature_czujniki-parkowania-przednie__cat','feature_asystent-pasa-ruchu__cat','feature_regulowane-zawieszenie__cat','feature_system-start-stop__cat','feature_światła-led__cat']
run_model(xgb.XGBRegressor(**xgb_params), feats)




(-13039.290196724838, 109.36715375706265)

In [61]:
df['param_rok-produkcji'].unique()

array(['2018', '2011', '2015', '2009', '2017', '2012', '2013', '2007',
       '2001', '2016', '2006', '2008', '2004', '1999', '2000', '2010',
       '2005', '2002', '1998', '2014', '2003', '1982', '1995', '1997',
       '1992', '1993', '1994', '1996', '1989', '1988', '1967', '1987',
       '1959', '1990', '1991', '1974', None, '1975', '1973', '1985',
       '1984', '1986', '1981', '1979', '1960', '1983', '1978', '1964',
       '1980', '1972', '1969', '1956', '1966', '1977', '1971', '1963',
       '1953', '1961', '1952', '1949', '1976', '1965', '1937', '1968',
       '1958', '1962', '1955', '1970', '1933', '1929', '1957', '1944',
       '1954', '1932', '1936', '1947', '1948'], dtype=object)

In [71]:
df['param_rok-produkcji'] = df['param_rok-produkcji'].map(lambda x: -1 if str(x) == 'None' else int(x))

feats_22 = ['param_napęd__cat','param_rok-produkcji','param_stan__cat','param_skrzynia-biegów__cat','param_faktura-vat__cat','param_moc__cat','param_marka-pojazdu__cat','param_typ__cat','feature_kamera-cofania__cat','param_pojemność-skokowa__cat','seller_name__cat','param_kod-silnika__cat','param_model-pojazdu__cat','feature_wspomaganie-kierownicy__cat','param_wersja__cat','feature_czujniki-parkowania-przednie__cat','feature_asystent-pasa-ruchu__cat','feature_regulowane-zawieszenie__cat','feature_system-start-stop__cat','feature_światła-led__cat']
run_model(xgb.XGBRegressor(**xgb_params), feats_22)



(-11197.83713694348, 98.22041147876314)

In [72]:
df['param_rok-produkcji'].unique()

array([2018, 2011, 2015, 2009, 2017, 2012, 2013, 2007, 2001, 2016, 2006,
       2008, 2004, 1999, 2000, 2010, 2005, 2002, 1998, 2014, 2003, 1982,
       1995, 1997, 1992, 1993, 1994, 1996, 1989, 1988, 1967, 1987, 1959,
       1990, 1991, 1974,   -1, 1975, 1973, 1985, 1984, 1986, 1981, 1979,
       1960, 1983, 1978, 1964, 1980, 1972, 1969, 1956, 1966, 1977, 1971,
       1963, 1953, 1961, 1952, 1949, 1976, 1965, 1937, 1968, 1958, 1962,
       1955, 1970, 1933, 1929, 1957, 1944, 1954, 1932, 1936, 1947, 1948])

In [74]:
df['param_rok-produkcji'] = df['param_rok-produkcji'].map(lambda x: -1 if str(x) == 'None' else int(x))

df['param_moc'] = df['param_moc'].map(lambda x: -1 if str(x) == 'None' else  int(x.split(' ')[0]) )
df['param_moc'].unique()


array([ 90, 115, 262, 110, 310, 105, 140, 175, 125, 185, 190, 440, 141,
       200, 224,  75,  99, 184, 109, 233, 116,  68, 286, 126, 160, 135,
       120, 272,  -1, 150, 180, 136, 102, 131, 218, 245, 170, 112, 250,
       252,  73, 100, 313, 101, 285,  70, 383, 174, 277, 132, 130, 215,
        60, 330, 163, 177,  98,  78, 189, 156, 143,  69, 113,  65, 122,
        82, 251,  95, 197, 235, 238, 171, 381, 400, 178,  80, 165,  85,
       258, 142, 204, 124,  55, 144, 231, 248, 152, 181, 210, 340, 129,
       147,  50,  54, 290, 306, 193,  77, 164,  96, 194, 111, 166, 206,
       118, 360, 211, 271, 455, 280, 106, 114, 421,  74, 213, 121, 275,
       435, 384, 326,  88, 220, 260,  64,  86, 128, 256, 240, 244, 162,
       237, 350,  35, 265, 202, 133,  83, 117, 146,  92, 192, 145, 525,
       254, 182, 328, 367, 148, 456,  97, 270, 107, 108, 203, 155,  94,
        93, 241,  20,  71, 173,  58, 205, 236,   1, 557,  84, 457,  72,
       295, 134, 425, 228,  81, 230, 201,  87, 234, 299, 585, 20

In [76]:
feats3 = ['param_napęd__cat','param_rok-produkcji','param_stan__cat','param_skrzynia-biegów__cat','param_faktura-vat__cat','param_moc','param_marka-pojazdu__cat','param_typ__cat','feature_kamera-cofania__cat','param_pojemność-skokowa__cat','seller_name__cat','param_kod-silnika__cat','param_model-pojazdu__cat','feature_wspomaganie-kierownicy__cat','param_wersja__cat','feature_czujniki-parkowania-przednie__cat','feature_asystent-pasa-ruchu__cat','feature_regulowane-zawieszenie__cat','feature_system-start-stop__cat','feature_światła-led__cat']
run_model(xgb.XGBRegressor(**xgb_params), feats3)



(-9602.94111071797, 57.96672683246094)

In [0]:
df['param_pojemność-skokowa'] = df['param_pojemność-skokowa'].map(lambda x: -1 if str(x) == 'None' else  int(x.split('cm')[0].replace(" ", '')) )
#df['param_pojemność-skokowa'].unique()

In [80]:
feats4 = ['param_napęd__cat','param_rok-produkcji','param_stan__cat','param_skrzynia-biegów__cat','param_faktura-vat__cat','param_moc','param_marka-pojazdu__cat','param_typ__cat','feature_kamera-cofania__cat','param_pojemność-skokowa','seller_name__cat','param_kod-silnika__cat','param_model-pojazdu__cat','feature_wspomaganie-kierownicy__cat','param_wersja__cat','feature_czujniki-parkowania-przednie__cat','feature_asystent-pasa-ruchu__cat','feature_regulowane-zawieszenie__cat','feature_system-start-stop__cat','feature_światła-led__cat']
run_model(xgb.XGBRegressor(**xgb_params), feats4)



(-9449.513980284812, 81.47168211987172)