# Decision Tree

In [1]:
from mllab.DecisionTree import DecisionTree

In [11]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import numpy.linalg as la
from pprint import pprint

# Car Recommendation

In [173]:
#shell scripts for downloading the data and placing it in a corresponding directory
!mkdir datasets/CAR 
!curl -o datasets/CAR/data "http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
!curl -o datasets/CAR/description "http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.names"
#download the description and display it here.
#!cat datasets/CAR/description

mkdir: datasets/CAR: File exists
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 51867  100 51867    0     0  79672      0 --:--:-- --:--:-- --:--:-- 79550
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3097  100  3097    0     0   6943      0 --:--:-- --:--:-- --:--:--  6928


In [3]:
# csv-file has no header, so we define it manually
col_names = ['price_buy', 'price_main', 'n_doors', 'n_persons', 'lug_boot', 'safety', 'recommendation']
df = pd.read_csv("datasets/CAR/data", header=None, names=col_names)
# All attributes are categorical - a mix of strings and integers.
# We simply map the categorical values of each attribute to a set of distinct integers
ai2an_map = col_names
ai2aiv2aivn_map = []
enc_cols = []
for col in df.columns:
    df[col] = df[col].astype('category')
    a = np.array(df[col].cat.codes.values).reshape((-1,1))
    enc_cols.append(a)
    ai2aiv2aivn_map.append(list(df[col].cat.categories.values))  
# Get the data as numpy 2d-matrix (n_samples, n_features)
dataset = np.hstack(enc_cols)
X, y = dataset[:,:6], dataset[:,6]
print(X.shape, y.shape)

(1728, 6) (1728,)


In [4]:
dataset = pd.DataFrame(dataset)
dataset = dataset.rename({0:'price_buy', 1:'price_main', 2:'n_doors', 3:'n_persons', 4:'lug_boot', 5:'safety', 6:'recommentaion'}, axis=1)

In [5]:
dataset.shape

(1728, 7)

In [6]:
trainx = dataset[:1500].drop('recommentaion', axis=1)
trainy = dataset[:1500]['recommentaion']

testx = dataset[1500:].drop('recommentaion', axis=1)
testy = dataset[1500:]['recommentaion']

__Train Decision Tree__

In [7]:
dtree = DecisionTree()
dtree.train(trainx, trainy)

__Test on unseen data__

In [8]:
y_pred = dtree.infer(testx)

__Accuracy__

In [9]:
from sklearn import metrics
print('Accuracy: %2.2f %%' % (100. * metrics.accuracy_score(testy, y_pred)))

Accuracy: 73.68 %


__To view the learned tree__

In [10]:
dtree.tree

{'safety': {0: {'n_persons': {0.0: 2.0,
    1.0: {'price_buy': {0.0: {'price_main': {0.0: 0.0,
        1.0: 0.0,
        2.0: 0.0,
        3.0: 2.0}},
      1.0: {'price_main': {0.0: {'lug_boot': {0.0: 3.0,
          1.0: {'n_doors': {0.0: 0.0, 1.0: 0.0, 2.0: 3.0, 3.0: 3.0}},
          2.0: 0.0}},
        3.0: 0.0}},
      2.0: {'price_main': {0.0: 0.0,
        1.0: {'lug_boot': {0.0: 3.0,
          1.0: {'n_doors': {0.0: 1.0, 1.0: 1.0, 2.0: 3.0, 3.0: 3.0}},
          2.0: 1.0}},
        2.0: {'lug_boot': {0.0: 3.0,
          1.0: {'n_doors': {0.0: 0.0, 1.0: 0.0, 2.0: 3.0, 3.0: 3.0}},
          2.0: 0.0}},
        3.0: 0.0}},
      3.0: {'price_main': {0.0: 2.0, 1.0: 0.0, 2.0: 0.0, 3.0: 2.0}}}},
    2.0: {'price_buy': {0.0: {'price_main': {0.0: {'n_doors': {0.0: {'lug_boot': {0.0: 0.0,
            1.0: 0.0,
            2.0: 2.0}},
          1.0: 0.0,
          2.0: 0.0,
          3.0: 0.0}},
        1.0: {'n_doors': {0.0: {'lug_boot': {0.0: 0.0, 1.0: 0.0, 2.0: 2.0}},
          1.0: 0.0

In [12]:
pprint(dtree.tree)

{'safety': {0: {'n_persons': {0.0: 2.0,
                              1.0: {'price_buy': {0.0: {'price_main': {0.0: 0.0,
                                                                       1.0: 0.0,
                                                                       2.0: 0.0,
                                                                       3.0: 2.0}},
                                                  1.0: {'price_main': {0.0: {'lug_boot': {0.0: 3.0,
                                                                                          1.0: {'n_doors': {0.0: 0.0,
                                                                                                            1.0: 0.0,
                                                                                                            2.0: 3.0,
                                                                                                            3.0: 3.0}},
                                                            