In [1]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:08<00:00, 973.58it/s] 


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 49.25it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 150.91it/s]


In [2]:
for i in train_dataframe.loc[0].items():
    print(i)

('IPT_1', 0.0)
('IPT_2', 3.0)
('IPT_3', 0.0)
('IPT_4', 0.0)
('IPT_5', 111.0)
('IPT_6', 0.0)
('IPT_7', 0.0)
('IPT_8', 5.0)
('IPT_9', 1.0)
('IPT_10', 0.0)
('IPT_11', 0.0)
('IPT_12', 1.0)
('IPT_13', 0.0)
('IPT_14', 5.0)
('IPT_15', 1.0)
('IPT_16', 3.0)
('IPT_17', 1575.0)
('IPT_18', 1.0)
('IPT_19', 0.0)
('IPT_20', 26.0)
('IPT_21', 13.0)
('IPT_22', 29.0)
('IPT_23', 28.0)
('IPT_24', 1.0)
('IPT_25', 15.0)
('IPT_26', 4593.0)
('IPT_27', 1.0)
('IPT_28', 70.0)
('IPT_29', 0.0)
('IPT_30', 0.0)
('DIR_1', 1.0)
('DIR_2', -1.0)
('DIR_3', -1.0)
('DIR_4', -1.0)
('DIR_5', -1.0)
('DIR_6', -1.0)
('DIR_7', -1.0)
('DIR_8', 1.0)
('DIR_9', 1.0)
('DIR_10', -1.0)
('DIR_11', -1.0)
('DIR_12', 1.0)
('DIR_13', -1.0)
('DIR_14', 1.0)
('DIR_15', 1.0)
('DIR_16', 1.0)
('DIR_17', 1.0)
('DIR_18', 1.0)
('DIR_19', -1.0)
('DIR_20', 1.0)
('DIR_21', -1.0)
('DIR_22', -1.0)
('DIR_23', 1.0)
('DIR_24', 1.0)
('DIR_25', -1.0)
('DIR_26', 1.0)
('DIR_27', -1.0)
('DIR_28', -1.0)
('DIR_29', -1.0)
('DIR_30', -1.0)
('SIZE_1', 1232.0)
('SIZE_2

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from dataclasses import dataclass
import random

from pyqlearning.q_learning import QLearning

import numpy as np

from copy import deepcopy

@dataclass
class State_key:
    used : np.array

    def __hash__(self):
        return hash(self.used.tobytes())
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        return np.array_equal(self.used, other.used)

X = train_dataframe.drop(columns="APP").to_numpy()[:1000]
y = train_dataframe["APP"].to_numpy()[:1000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:100]
y_test = test_dataframe["APP"].to_numpy()[:100]

class Q(QLearning):
    def update_state(self, state_key, action_key):
        state_key.used[self.t - 1] = action_key
        return deepcopy(state_key)

    def initialize(self, len, cols, iters):
        self.X_used = np.ndarray(shape = (iters, cols))
        self.y_used = np.ndarray(shape = (iters,))
        self.last_acc = 0
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)
    
    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        self.i_not_used.remove(action_key)

        return action_key

    def train_clf(self, clf):
        if self.t == 1:
            clf.fit(self.X_used[self.t - 1].reshape(1, -1), 
                         self.y_used[self.t - 1].ravel())
        else:
            clf.fit(self.X_used[:(self.t - 1)], 
                         self.y_used[:(self.t - 1)])

    def test_acc(self, state_key):
        clf = RandomForestClassifier()
        self.train_clf(clf)

        predict_arr = clf.predict(X_test)

        return f1_score(y_test, predict_arr, average='weighted')

    def observe_reward_value(self, state_key, action_key):
        self.X_used[self.t - 1] = X[action_key]
        self.y_used[self.t - 1] = y[action_key]

        cur_acc = self.test_acc(state_key)
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        self.save_r_df(state_key, reward)

        return reward

q = Q()
q.epsilon_greedy_rate = 0.2
q.alpha_value = 0.9
q.gamma_value = 0.9

iters = 100

for i in range(50):
    print(f"\niter {i}")
    q.initialize(len(X), X.shape[1], iters)
    state_key = State_key(np.zeros(iters))
    
    if i == 49:
        q.epsilon_greedy_rate = 0
    q.learn(state_key, iters)
    clf = RandomForestClassifier()
    clf.fit(q.X_used, q.y_used)
    predict_arr = clf.predict(X_test)
    print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_test)
print(f"base_model_acc: {accuracy_score(y_test, predict_arr):.4f}")



iter 0
q_learning_acc: 0.2600

iter 1
q_learning_acc: 0.3700

iter 2
q_learning_acc: 0.3200

iter 3
q_learning_acc: 0.2400

iter 4
q_learning_acc: 0.2400

iter 5
q_learning_acc: 0.2900

iter 6
q_learning_acc: 0.3500

iter 7
q_learning_acc: 0.3000

iter 8
