In [2]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:08<00:00, 979.38it/s] 


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 48.67it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 145.86it/s]


In [32]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from dataclasses import dataclass
import random

from pyqlearning.qlearning.boltzmann_q_learning import BoltzmannQLearning

import numpy as np

from copy import copy, deepcopy

@dataclass
class State_key:
    used : np.array

    def __hash__(self):
        return hash(self.used.tobytes())
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        return np.array_equal(self.used, other.used)

X = train_dataframe.drop(columns="APP").to_numpy()[:1000]
y = train_dataframe["APP"].to_numpy()[:1000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:100]
y_test = test_dataframe["APP"].to_numpy()[:100]

class Q(BoltzmannQLearning):
    def get_index(self):
        return (self.t - 1) % self.iters

    def update_state(self, state_key, action_key):
        if self.t % self.iters == 0 and self.total_iters > self.t:
            self.initialize(self.len, X.shape[1], self.iters, self.total_iters)
            return State_key(np.zeros(self.iters))
        
        self.i_not_used.remove(action_key)
        new_state = deepcopy(state_key)
        new_state.used[self.get_index()] = action_key
        return new_state

    def initialize(self, len, cols, iters, total_iters):
        self.total_iters = total_iters
        self.iters = iters
        self.len = len
        self.X_used = np.ndarray(shape = (iters, cols))
        self.y_used = np.ndarray(shape = (iters,))
        self.last_acc = 0
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)

    def train_clf(self, clf):
        index = self.get_index()

        if index == 0:
            clf.fit(self.X_used[index].reshape(1, -1), 
                    self.y_used[index].ravel())
        else:
            clf.fit(self.X_used[:index], 
                    self.y_used[:index])

    def test_acc(self):
        clf = RandomForestClassifier()
        self.train_clf(clf)

        predict_arr = clf.predict(X_test)

        return accuracy_score(y_test, predict_arr)

    def observe_reward_value(self, state_key, action_key):
        index = self.get_index()
        self.X_used[index] = X[action_key]
        self.y_used[index] = y[action_key]

        cur_acc = self.test_acc()
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        self.save_r_df(state_key, reward)

        return reward
    
    def update_q(self, state_key, action_key, reward_value, next_max_q):
        '''
        Update Q-Value.

        Args:
            state_key:              The key of state.
            action_key:             The key of action.
            reward_value:           R-Value(Reward).
            next_max_q:             Maximum Q-Value.

        '''

        print(self.q_df)
        # Now Q-Value.
        q = self.extract_q_df(state_key, action_key)
        # Update Q-Value.
        new_q = q + self.alpha_value * (reward_value + (self.gamma_value * next_max_q) - q)
        # Save updated Q-Value.
        self.save_q_df(state_key, action_key, new_q)

q = Q()
q.epsilon_greedy_rate = 0.75
q.alpha_value = 0.9
q.gamma_value = 0.9

iters = 100
total_iters = 100*10

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_test)
print(f"base_model_acc: {accuracy_score(y_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X[:iters], y[:iters])
predict_arr = clf.predict(X_test)
print(f"10%_model_acc: {accuracy_score(y_test, predict_arr):.4f}")

# for i in range(100):
    # print(f"iter {i}\n")
q.initialize(len(X), X.shape[1], iters, total_iters)
state_key = State_key(np.zeros(iters))

q.learn(state_key, total_iters)
clf = RandomForestClassifier()
clf.fit(q.X_used, q.y_used)
predict_arr = clf.predict(X_test)
print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

    # for i in np.sort(state_key.used):
    #     print(i)

q_df = q.q_df
q_df = q_df.sort_values(by=["q_value"], ascending=False)
print(q_df.head())



base_model_acc: 0.4900
10%_model_acc: 0.3700
                                           state_key  action_key  q_value
0  State_key(used=array([441.,   0.,   0.,   0., ...         603      0.0
                                           state_key  action_key  q_value
0  State_key(used=array([441., 603.,   0.,   0., ...         365    0.000
0  State_key(used=array([0., 0., 0., 0., 0., 0., ...         441    0.018
0  State_key(used=array([441.,   0.,   0.,   0., ...         603    0.000
                                           state_key  action_key  q_value
0  State_key(used=array([441., 603., 365.,   0., ...         291    0.000
0  State_key(used=array([441.,   0.,   0.,   0., ...         603    0.000
0  State_key(used=array([441., 603.,   0.,   0., ...         365    0.000
0  State_key(used=array([0., 0., 0., 0., 0., 0., ...         441    0.018
                                           state_key  action_key  q_value
0  State_key(used=array([441., 603., 365., 291., ...         778   

In [28]:
print(q_df.shape)

for index, row in q_df.iterrows():
    print(f"Row {index}: {row.to_dict()}")


(101, 3)
Row 0: {'state_key': State_key(used=array([318., 143., 554., 497., 535., 346., 189., 628., 111., 702.,  75.,
       348., 253., 270., 372., 252., 505., 387., 838., 859., 280., 551.,
       587., 399., 971., 539., 147., 873., 755., 841., 443., 555., 475.,
       343., 138., 849.,  23., 260., 108., 100., 256., 467., 953., 723.,
       338., 213., 136., 911., 619., 515., 517., 623., 489., 567., 606.,
       626., 910., 673., 153., 351., 782., 869., 843., 503., 970., 590.,
       174., 596., 390., 622., 494., 621., 376., 788., 616., 638., 137.,
       255., 413., 965., 230., 502., 811., 631., 386., 444., 272., 226.,
       110., 999., 834., 435., 700.,  68., 976., 269., 942.,   0.,   0.,
         0.])), 'action_key': 947, 'q_value': 0.11412359844205036}
Row 0: {'state_key': State_key(used=array([318., 143., 554., 497., 535., 346., 189., 628., 111., 702.,  75.,
       348., 253., 270., 372., 252., 505., 387., 838., 859., 280., 551.,
       587., 399., 971., 539., 147., 873., 755., 