In [2]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:08<00:00, 941.92it/s] 


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 49.67it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 145.83it/s]


In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from dataclasses import dataclass
import random

from pyqlearning.q_learning import QLearning

import numpy as np

X = train_dataframe.drop(columns="APP").to_numpy()[:1000]
y = train_dataframe["APP"].to_numpy()[:1000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:100]
y_test = test_dataframe["APP"].to_numpy()[:100]

X_final_test = test_dataframe.drop(columns="APP").to_numpy()[:10000]
y_final_test = test_dataframe["APP"].to_numpy()[:10000]

class Q(QLearning):
    # last_acc = 0
    # i_not_used = set()
    # X_used = np.ndarray()
    # y_used = np.ndarray()
    # clf = RandomForestClassifier()

    def update_state(self, state_key, action_key):
        return 0

    def initialize(self, len, iters, cols):
        self.X_used = np.ndarray(shape = (iters, cols))
        self.y_used = np.ndarray(shape = (iters,))
        self.last_acc = 0
        self.clf = RandomForestClassifier()
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)
    
    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        self.i_not_used.remove(action_key)

        return int(action_key)

    def train_clf(self):
        if self.t == 1:
            self.clf.fit(self.X_used[self.t - 1].reshape(1, -1), 
                         self.y_used[self.t - 1].ravel())
        else:
            self.clf.fit(self.X_used[:(self.t - 1)], 
                         self.y_used[:(self.t - 1)])

    def test_acc(self):
        self.train_clf()

        predict_arr = self.clf.predict(X_test)

        return f1_score(y_test, predict_arr, average='weighted')

    def observe_reward_value(self, state_key, action_key):
        self.X_used[self.t - 1] = X[action_key]
        self.y_used[self.t - 1] = y[action_key]

        cur_acc = self.test_acc()
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        self.save_r_df(state_key, reward)

        return reward

q = Q()
q.epsilon_greedy_rate = 0.1
q.alpha_value = 0.9
q.gamma_value = 0.9

iters = 100

for i in range(40):
    print(f"iter {i}\n")
    q.initialize(len(X), iters, X.shape[1])
    state_key = 0
    
    if i == 39:
        q.epsilon_gredy_rate = 0
        
    q.learn(state_key, iters)
    predict_arr = q.clf.predict(X_final_test)
    print(f"q_learning_acc: {accuracy_score(y_final_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_final_test)
print(f"base_model_acc: {accuracy_score(y_final_test, predict_arr):.4f}")


iter 0

q_learning_acc: 0.3174
iter 1

q_learning_acc: 0.3445
iter 2

q_learning_acc: 0.3535
iter 3

q_learning_acc: 0.3387
iter 4

q_learning_acc: 0.3206
iter 5

q_learning_acc: 0.3401
iter 6

q_learning_acc: 0.2544
iter 7

q_learning_acc: 0.3156
iter 8

q_learning_acc: 0.3581
iter 9

q_learning_acc: 0.3569
iter 10

q_learning_acc: 0.3678
iter 11

q_learning_acc: 0.3527
iter 12

q_learning_acc: 0.3698
iter 13

q_learning_acc: 0.3466
iter 14

q_learning_acc: 0.3499
iter 15

q_learning_acc: 0.3441
iter 16

q_learning_acc: 0.3669
iter 17

q_learning_acc: 0.3557
iter 18

q_learning_acc: 0.3649
iter 19

q_learning_acc: 0.3636
iter 20

q_learning_acc: 0.3684
iter 21

q_learning_acc: 0.3342
iter 22

q_learning_acc: 0.3514
iter 23

q_learning_acc: 0.3190
iter 24

q_learning_acc: 0.3384
iter 25

q_learning_acc: 0.3511
iter 26

q_learning_acc: 0.3532
iter 27

q_learning_acc: 0.3397
iter 28

q_learning_acc: 0.3581
iter 29

q_learning_acc: 0.3530
iter 30

q_learning_acc: 0.3835
iter 31

q_learnin

In [6]:
q_df = q.q_df
q_df = q_df.sort_values(by=["q_value"], ascending=False)
print(q_df)

    state_key  action_key   q_value
0          39         112  0.229820
0           3         210  0.223521
0         199         236  0.221046
0          25         712  0.220169
0         808         612  0.219948
..        ...         ...       ...
0         671         583 -0.070342
0         811         222 -0.070377
0         691         817 -0.072917
0         893         501 -0.074189
0         753         948 -0.075150

[18835 rows x 3 columns]
