In [3]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:08<00:00, 910.60it/s] 


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 48.73it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:09<00:00, 135.30it/s]


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from dataclasses import dataclass
import random

from pyqlearning.q_learning import QLearning

import numpy as np

from copy import deepcopy

@dataclass
class State_key:
    used : np.array

    def __hash__(self):
        return hash(self.used.tobytes())
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        return np.array_equal(self.used, other.used)

X = train_dataframe.drop(columns="APP").to_numpy()[:10000]
y = train_dataframe["APP"].to_numpy()[:10000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:1000]
y_test = test_dataframe["APP"].to_numpy()[:1000]

class Q(QLearning):
    def update_state(self, state_key, action_key):
        new_state = deepcopy(state_key)
        new_state.used[self.t - 1] = action_key
        return new_state

    def initialize(self, len, cols, iters, batch):
        self.batch = batch
        self.X_used = np.ndarray(shape = (iters*batch, cols))
        self.y_used = np.ndarray(shape = (iters*batch,))
        self.last_acc = 0
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)
    
    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        return action_key


    def train_clf(self, clf):
        if self.t == 1:
            clf.fit(self.X_used[self.t - 1].reshape(1, -1), 
                         self.y_used[self.t - 1].ravel())
        else:
            clf.fit(self.X_used[:(self.t - 1)], 
                         self.y_used[:(self.t - 1)])

    def test_acc(self, state_key):
        clf = RandomForestClassifier()
        self.train_clf(clf)

        predict_arr = clf.predict(X_test)

        return f1_score(y_test, predict_arr, average='weighted')

    def observe_reward_value(self, state_key, action_key):
        from_index = action_key*self.batch
        to_index = (self.t - 1)*self.batch

        self.X_used[to_index:to_index + self.batch] = X[from_index:from_index + self.batch]
        self.y_used[to_index:to_index + self.batch] = y[from_index:from_index + self.batch]

        cur_acc = self.test_acc(state_key)
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        self.save_r_df(state_key, reward)

        return reward

q = Q()
q.epsilon_greedy_rate = 0.9
q.alpha_value = 0.1
q.gamma_value = 0.9

iters = 100
batch = 10

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_test)
print(f"base_model_acc: {accuracy_score(y_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X[:1000], y[:1000])
predict_arr = clf.predict(X_test)
print(f"10%_model_acc: {accuracy_score(y_test, predict_arr):.4f}")

epochs = 100

for i in range(epochs):
    print(f"\niter {i}")
    q.initialize(int(len(X)/batch),  X.shape[1], iters, batch)
    state_key = State_key(np.zeros(iters))

    q.epsilon_greedy_rate = i / epochs
    
    q.learn(state_key, iters)
    clf = RandomForestClassifier()
    clf.fit(q.X_used, q.y_used)
    predict_arr = clf.predict(X_test)
    print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

base_model_acc: 0.6910
10%_model_acc: 0.5490

iter 0
q_learning_acc: 0.5520

iter 1
q_learning_acc: 0.5320

iter 2
q_learning_acc: 0.5270

iter 3
q_learning_acc: 0.5400

iter 4
q_learning_acc: 0.5450

iter 5
q_learning_acc: 0.5610

iter 6
q_learning_acc: 0.5450

iter 7
q_learning_acc: 0.5480

iter 8
q_learning_acc: 0.5400

iter 9
q_learning_acc: 0.5340

iter 10
q_learning_acc: 0.5560

iter 11
q_learning_acc: 0.5300

iter 12
q_learning_acc: 0.5420

iter 13
q_learning_acc: 0.5330

iter 14
q_learning_acc: 0.5370

iter 15
q_learning_acc: 0.5390

iter 16
q_learning_acc: 0.5360

iter 17
q_learning_acc: 0.5590

iter 18
q_learning_acc: 0.5330

iter 19
q_learning_acc: 0.5470

iter 20
q_learning_acc: 0.5060

iter 21
q_learning_acc: 0.5710

iter 22
q_learning_acc: 0.5460

iter 23
q_learning_acc: 0.5410

iter 24
q_learning_acc: 0.5330

iter 25
q_learning_acc: 0.5470

iter 26
q_learning_acc: 0.5640

iter 27
q_learning_acc: 0.5640

iter 28
q_learning_acc: 0.5230

iter 29
q_learning_acc: 0.5150

iter