In [1]:
import numpy as np
import matplotlib.pyplot as plt
from env import *
from agent import *
from time import time
%matplotlib inline
import pandas as pd
from skmultiflow.data import FileStream
from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.evaluation import EvaluatePrequential, EvaluateHoldout

In [2]:
data = np.genfromtxt('../data/nursery.data', delimiter=',', dtype=str)
data = data[data[:, -1] != 'recommend', :]
np.random.seed(0)
np.random.shuffle(data)
# data_train, data_test = data[:10000, :], data[10000:, :]
data_train, data_test = data[:1000, :], data[10000:, :]

In [3]:
categories = [len(set(data_train[:, j])) for j in range(data_train.shape[1]-1)]
rewards_queries = dict([(i, -.5) for i in range(len(categories))])

In [4]:
env = EnvironmentReal(data_train, rewards_queries, r_plus=5, r_minus=-10)

In [5]:
env.maps

{0: {'great_pret': 0, 'pretentious': 1, 'usual': 2},
 1: {'critical': 0,
  'improper': 1,
  'less_proper': 2,
  'proper': 3,
  'very_crit': 4},
 2: {'complete': 0, 'completed': 1, 'foster': 2, 'incomplete': 3},
 3: {'1': 0, '2': 1, '3': 2, 'more': 3},
 4: {'convenient': 0, 'critical': 1, 'less_conv': 2},
 5: {'convenient': 0, 'inconv': 1},
 6: {'nonprob': 0, 'problematic': 1, 'slightly_prob': 2},
 7: {'not_recom': 0, 'priority': 1, 'recommended': 2},
 8: {'not_recom': 0, 'priority': 1, 'spec_prior': 2, 'very_recom': 3}}

In [6]:
data_x, data_y = np.empty_like(data_train[:, :-1], dtype=np.int), np.empty_like(data_train[:, -1], dtype=np.int)
for i in range(len(data_y)):
    a, b = env.preprocess(data_train[i, :])
    data_x[i, :], data_y[i] = np.array(a, dtype=np.int), int(b)
    

In [7]:
ht = HoeffdingTreeClassifier()

In [8]:
start_time = time()
ht.fit(data_x, data_y)
print(time() - start_time)

0.5132889747619629


In [9]:
data_x, data_y = np.empty_like(data_test[:, :-1], dtype=np.int), np.empty_like(data_test[:, -1], dtype=np.int)
for i in range(len(data_y)):
    a, b = env.preprocess(data_test[i, :])
    data_x[i, :], data_y[i] = np.array(a, dtype=np.int), int(b)
    

In [10]:
predictions = ht.predict(data_x)

In [11]:
(predictions == data_y).sum()/len(predictions)

0.8404327248140635

In [24]:
ht.get_rules_description()

'Att (3) <= 1.730 and Att (3) <= 0.450 and Att (6) <= 7.270 and Att (2) <= 3.450 | class: 1\nAtt (3) <= 1.730 and Att (3) <= 0.450 and Att (6) <= 7.270 and Att (2) > 3.450 and Att (1) <= 3.270 and Att (6) <= 0.180 | class: 1\nAtt (3) <= 1.730 and Att (3) <= 0.450 and Att (6) <= 7.270 and Att (2) > 3.450 and Att (1) <= 3.270 and Att (6) > 0.180 | class: 0\nAtt (3) <= 1.730 and Att (3) <= 0.450 and Att (6) <= 7.270 and Att (2) > 3.450 and Att (1) > 3.270 | class: 0\nAtt (3) <= 1.730 and Att (3) <= 0.450 and Att (6) > 7.270 and Att (0) <= 2.360 | class: 0\nAtt (3) <= 1.730 and Att (3) <= 0.450 and Att (6) > 7.270 and Att (0) > 2.360 | class: 0\nAtt (3) <= 1.730 and Att (3) > 0.450 and Att (0) <= 1.090 and Att (7) <= 5.000 | class: 1\nAtt (3) <= 1.730 and Att (3) > 0.450 and Att (0) <= 1.090 and Att (7) > 5.000 | class: 1\nAtt (3) <= 1.730 and Att (3) > 0.450 and Att (0) > 1.090 and Att (0) <= 5.270 and Att (2) <= 5.180 | class: 1\nAtt (3) <= 1.730 and Att (3) > 0.450 and Att (0) > 1.090 a