In [2]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:07<00:00, 1030.05it/s]


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 49.39it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:07<00:00, 156.23it/s]


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from dataclasses import dataclass
import random

from pyqlearning.qlearning.boltzmann_q_learning import BoltzmannQLearning

import numpy as np

from copy import copy, deepcopy

@dataclass
class State_key:
    used : np.array

    def __hash__(self):
        return hash(self.used.tobytes())
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        return np.array_equal(self.used, other.used)

X = train_dataframe.drop(columns="APP").to_numpy()[:10000]
y = train_dataframe["APP"].to_numpy()[:10000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:1000]
y_test = test_dataframe["APP"].to_numpy()[:1000]

class Q(BoltzmannQLearning):
    def update_state(self, state_key, action_key):
        state_key.used[self.t - 1] = action_key
        return deepcopy(state_key)

    def initialize(self, len, cols, iters):
        self.X_used = np.ndarray(shape = (iters, cols))
        self.y_used = np.ndarray(shape = (iters,))
        self.last_acc = 0
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)
    
    def select_action(self, state_key, next_action_list):
        if self.q_df is None or self.q_df.shape[0] == 0:
            action = random.choice(next_action_list)
            self.i_not_used.remove(action)
            return action

        next_action_b_df = self.__calculate_boltzmann_factor(state_key, next_action_list)

        if next_action_b_df.shape[0] == 1:
            action = next_action_b_df["action_key"].values[0]
            self.i_not_used.remove(action)
            return action
        elif next_action_b_df.shape[0] == 0:
            action = random.choice(next_action_list)
            self.i_not_used.remove(action)
            return action
        

        prob = np.random.random()
        next_action_b_df = next_action_b_df.sort_values(by=["boltzmann_factor"])

        i = 0
        while prob > next_action_b_df.iloc[i, :]["boltzmann_factor"] + next_action_b_df.iloc[i + 1, :]["boltzmann_factor"]:
            i += 1
            if i + 1 >= next_action_b_df.shape[0]:
                break

        max_b_action_key = next_action_b_df.iloc[i, :]["action_key"]

        if type(max_b_action_key) == np.int64:
            self.i_not_used.remove(max_b_action_key)
            return max_b_action_key
        else:
            action = random.choice(next_action_list)
            self.i_not_used.remove(action)
            return action


    def train_clf(self, clf):
        if self.t == 1:
            clf.fit(self.X_used[self.t - 1].reshape(1, -1), 
                    self.y_used[self.t - 1].ravel())
        else:
            clf.fit(self.X_used[:(self.t - 1)], 
                    self.y_used[:(self.t - 1)])

    def test_acc(self):
        clf = RandomForestClassifier()
        self.train_clf(clf)

        predict_arr = clf.predict(X_test)

        return accuracy_score(y_test, predict_arr)

    def observe_reward_value(self, state_key, action_key):
        self.X_used[self.t - 1] = X[action_key]
        self.y_used[self.t - 1] = y[action_key]

        cur_acc = self.test_acc()
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        self.save_r_df(state_key, reward)

        return reward
    
    
    def __calculate_sigmoid(self):
        sigmoid = 1 / np.log(self.t * self.time_rate + 1.1)
        return sigmoid

    def __calculate_boltzmann_factor(self, state_key, next_action_list):
        sigmoid = self.__calculate_sigmoid()
        q_df = self.q_df[self.q_df.state_key == state_key]
        q_df = q_df[q_df.isin(next_action_list)]
        q_df["boltzmann_factor"] = q_df["q_value"] / sigmoid
        q_df["boltzmann_factor"] = q_df["boltzmann_factor"].apply(np.exp)
        q_df["boltzmann_factor"] = q_df["boltzmann_factor"] / q_df["boltzmann_factor"].sum()
        
        return q_df

q = Q()
q.epsilon_greedy_rate = 0.1
q.alpha_value = 0.9
q.gamma_value = 0.9

iters = 1000

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_test)
print(f"base_model_acc: {accuracy_score(y_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X[:iters], y[:iters])
predict_arr = clf.predict(X_test)
print(f"10%_model_acc: {accuracy_score(y_test, predict_arr):.4f}")

for i in range(100):
    print(f"iter {i}\n")
    q.initialize(len(X), X.shape[1], iters)
    state_key = State_key(np.zeros(iters))
    
    q.learn(state_key, iters)
    clf = RandomForestClassifier()
    clf.fit(q.X_used, q.y_used)
    predict_arr = clf.predict(X_test)
    print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

    # for i in np.sort(state_key.used):
    #     print(i)

q_df = q.q_df
q_df = q_df.sort_values(by=["q_value"], ascending=False)
print(q_df.head())



base_model_acc: 0.6910
10%_model_acc: 0.3620
iter 0



KeyboardInterrupt: 

In [None]:
for index, row in q_df.iterrows():
    print(f"Row {index}: {row.to_dict()}")


Row 0: {'state_key': State_key(used=array([216., 164., 953., 352., 311., 542., 347., 166., 716., 748., 346.,
        81.,  10., 564., 987.,  91., 851., 478.,  44.,  92., 859., 840.,
       461., 650.,   6., 625., 440., 621., 784., 148., 106., 299.,  64.,
       914., 411., 885., 750., 207., 415., 829., 990., 211., 246., 963.,
       443., 172., 267., 645., 657.,  46., 156., 974.,  50., 436., 345.,
       572., 927., 846., 301., 554.,  89., 335., 452., 222., 689., 563.,
       258.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.])), 'action_key': 258, 'q_value': 0.054}
Row 0: {'state_key': State_key(used=array([216.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,