In [2]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:08<00:00, 979.74it/s] 


Loading data from dataloader


100%|██████████| 192/192 [00:04<00:00, 47.79it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 151.52it/s]


In [3]:
for i in train_dataframe.loc[0].items():
    print(i)

('IPT_1', 0.0)
('IPT_2', 3.0)
('IPT_3', 0.0)
('IPT_4', 0.0)
('IPT_5', 111.0)
('IPT_6', 0.0)
('IPT_7', 0.0)
('IPT_8', 5.0)
('IPT_9', 1.0)
('IPT_10', 0.0)
('IPT_11', 0.0)
('IPT_12', 1.0)
('IPT_13', 0.0)
('IPT_14', 5.0)
('IPT_15', 1.0)
('IPT_16', 3.0)
('IPT_17', 1575.0)
('IPT_18', 1.0)
('IPT_19', 0.0)
('IPT_20', 26.0)
('IPT_21', 13.0)
('IPT_22', 29.0)
('IPT_23', 28.0)
('IPT_24', 1.0)
('IPT_25', 15.0)
('IPT_26', 4593.0)
('IPT_27', 1.0)
('IPT_28', 70.0)
('IPT_29', 0.0)
('IPT_30', 0.0)
('DIR_1', 1.0)
('DIR_2', -1.0)
('DIR_3', -1.0)
('DIR_4', -1.0)
('DIR_5', -1.0)
('DIR_6', -1.0)
('DIR_7', -1.0)
('DIR_8', 1.0)
('DIR_9', 1.0)
('DIR_10', -1.0)
('DIR_11', -1.0)
('DIR_12', 1.0)
('DIR_13', -1.0)
('DIR_14', 1.0)
('DIR_15', 1.0)
('DIR_16', 1.0)
('DIR_17', 1.0)
('DIR_18', 1.0)
('DIR_19', -1.0)
('DIR_20', 1.0)
('DIR_21', -1.0)
('DIR_22', -1.0)
('DIR_23', 1.0)
('DIR_24', 1.0)
('DIR_25', -1.0)
('DIR_26', 1.0)
('DIR_27', -1.0)
('DIR_28', -1.0)
('DIR_29', -1.0)
('DIR_30', -1.0)
('SIZE_1', 1232.0)
('SIZE_2

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from dataclasses import dataclass
import random

from pyqlearning.q_learning import QLearning

import numpy as np

@dataclass
class State_key:
    X_used : np.ndarray
    y_used : np.ndarray

    def __hash__(self):
        X_bytes = self.X_used.tobytes()
        y_bytes = self.y_used.tobytes()

        return hash((X_bytes, y_bytes))
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        return np.array_equal(self.X_used, other.X_used) and np.array_equal(self.y_used, other.y_used)

X = train_dataframe.drop(columns="APP").to_numpy()[:1000]
y = train_dataframe["APP"].to_numpy()[:1000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:100]
y_test = test_dataframe["APP"].to_numpy()[:100]

class Q(QLearning):
    # last_acc = 0
    # i_not_used = set()
    # X_used = np.ndarray()
    # y_used = np.ndarray()
    # clf = RandomForestClassifier()

    def update_state(self, state_key, action_key):
        return state_key

    def initialize(self, len, cols):
        self.X_used = np.ndarray(shape = (1, cols))
        self.y_used = np.ndarray(shape = (1, cols))
        self.last_acc = 0
        self.clf = RandomForestClassifier()
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)
    
    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        self.i_not_used.remove(action_key)

        return action_key

    def train_clf(self, state_key):
        if self.t == 1:
            self.clf.fit(state_key.X_used[self.t - 1].reshape(1, -1), 
                        state_key.y_used[self.t - 1].reshape(1, -1))
        else:
            self.clf.fit(state_key.X_used[:(self.t - 1)], 
                        state_key.y_used[:(self.t - 1)])

    def test_acc(self, state_key):
        self.train_clf(state_key)

        predict_arr = self.clf.predict(X_test)

        return f1_score(y_test, predict_arr, average='weighted')

    def observe_reward_value(self, state_key, action_key):
        state_key.X_used[self.t - 1] = X[action_key]
        state_key.y_used[self.t - 1] = y[action_key]

        cur_acc = self.test_acc(state_key)
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        return reward

q = Q()
q.epsilon_greedy_rate = 0.1
q.alpha_value = 0.9
q.gamma_value = 0.9

print(y.shape)

iters = 100

for i in range(200):
    print(f"iter {i}\n")
    q.initialize(len(X), X.shape[1])
    state_key = State_key(np.ndarray(shape = (iters, X.shape[1])), 
                          np.ndarray(shape = (iters,)))
    
    q.learn(state_key, iters)
    predict_arr = q.clf.predict(X_test)
    print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_test)
print(f"base_model_acc: {accuracy_score(y_test, predict_arr):.4f}")


(1000,)
iter 0

q_learning_acc: 0.0600
iter 1

q_learning_acc: 0.0500
iter 2

q_learning_acc: 0.0000
iter 3

q_learning_acc: 0.0300
iter 4



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 5

q_learning_acc: 0.0500
iter 6

q_learning_acc: 0.0000
iter 7

q_learning_acc: 0.0000
iter 8

q_learning_acc: 0.0200
iter 9



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 10

q_learning_acc: 0.1000
iter 11

q_learning_acc: 0.0000
iter 12

q_learning_acc: 0.0100
iter 13



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 14

q_learning_acc: 0.0400
iter 15

q_learning_acc: 0.0100
iter 16

q_learning_acc: 0.0000
iter 17



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 18

q_learning_acc: 0.0000
iter 19

q_learning_acc: 0.0500
iter 20

q_learning_acc: 0.0200
iter 21



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 22

q_learning_acc: 0.0300
iter 23

q_learning_acc: 0.0100
iter 24

q_learning_acc: 0.0400
iter 25



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 26

q_learning_acc: 0.0300
iter 27

q_learning_acc: 0.0600
iter 28



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 29

q_learning_acc: 0.0100
iter 30

q_learning_acc: 0.0400
iter 31



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 32

q_learning_acc: 0.0100
iter 33

q_learning_acc: 0.0900
iter 34



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1000
iter 35

q_learning_acc: 0.0200
iter 36

q_learning_acc: 0.0300
iter 37



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 38

q_learning_acc: 0.0300
iter 39

q_learning_acc: 0.0200
iter 40



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1000
iter 41

q_learning_acc: 0.0400
iter 42



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 43

q_learning_acc: 0.0200
iter 44



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 45

q_learning_acc: 0.0100
iter 46



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 47

q_learning_acc: 0.0400
iter 48



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 49

q_learning_acc: 0.0500
iter 50



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 51

q_learning_acc: 0.0600
iter 52



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 53

q_learning_acc: 0.0100
iter 54



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 55

q_learning_acc: 0.0200
iter 56



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 57

q_learning_acc: 0.0000
iter 58



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 59

q_learning_acc: 0.0300
iter 60



  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 61



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 62



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 63



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 64



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 65



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 66



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 67



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 68



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 69



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 70



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 71



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 72



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 73



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 74



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 75



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 76



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 77



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 78



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 79



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 80



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 81



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 82



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 83



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 84



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 85



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 86



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 87



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 88



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 89



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 90



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 91



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 92



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 93



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 94



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 95



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 96



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 97



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 98



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 99



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 100



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 101



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 102



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 103



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 104



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 105



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 106



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 107



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 108



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 109



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 110



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 111



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 112



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 113



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 114



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 115



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 116



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 117



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 118



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 119



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 120



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 121



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 122



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 123



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 124



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 125



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 126



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 127



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 128



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 129



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 130



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 131



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 132



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 133



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 134



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 135



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 136



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 137



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 138



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 139



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 140



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1100
iter 141



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 142



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 143



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 144



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 145



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 146



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 147



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 148



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0000
iter 149



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 150



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 151



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 152



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 153



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1000
iter 154



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 155



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 156



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 157



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 158



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 159



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 160



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 161



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 162



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 163



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 164



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0100
iter 165



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 166



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1300
iter 167



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 168



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 169



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 170



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 171



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 172



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 173



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 174



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 175



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 176



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1100
iter 177



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 178



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 179



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 180



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.1100
iter 181



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 182



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 183



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 184



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 185



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0400
iter 186



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 187



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 188



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0900
iter 189



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 190



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0300
iter 191



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0700
iter 192



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0200
iter 193



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 194



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 195



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0800
iter 196



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0500
iter 197



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.0600
iter 198



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.2900
iter 199



  return fit_method(estimator, *args, **kwargs)


q_learning_acc: 0.2700
base_model_acc: 0.4900
