In [2]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:08<00:00, 997.03it/s] 


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 48.16it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 140.12it/s]


In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from pyqlearning.q_learning import QLearning

from dataclasses import dataclass
from collections import defaultdict
import random

import numpy as np
import pandas as pd
from math import log

from tqdm import tqdm

X = train_dataframe.drop(columns="APP").to_numpy()
y = train_dataframe["APP"].to_numpy()

# X_test = test_dataframe.drop(columns="APP").to_numpy()[:10000]
# y_test = test_dataframe["APP"].to_numpy()[:10000]

# same amount of samples from all classes
def create_balanced_test_data():
    grouped = test_dataframe.groupby('APP')

    X_arr = np.ndarray(shape = (10100, X.shape[1]))
    y_arr = np.ndarray(shape = (10100,))

    for index, i in enumerate(grouped):
        X_temp = i[1].drop(columns="APP").to_numpy()
        y_temp = i[1]["APP"].to_numpy()

        X_arr[index*100:(index*100)+100] = X_temp[:100]
        y_arr[index*100:(index*100)+100] = y_temp[:100]

    return (X_arr, y_arr)


@dataclass
class State_key:
    percent_of_class : int  # 1e-2, 1e-1. 5e-1, 1, more than 1
    predict_proba    : int  # 0-25, 25-50, 50-75, 75-100
    correct_predict  : bool # True or False
    # bytes_client     : int  # log3, 9 buckets
    # bytes_server     : int  # log5, 7 buckets
    duration         : int

    def __hash__(self):
        return self.percent_of_class + \
               self.predict_proba * 5 + \
               int(self.correct_predict) * 20 + \
               self.duration * 40
            #    self.bytes_client * 40 + \
            #    self.bytes_server * 360 + \
               
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        
        return True
        # return self.percent_of_class == other.percent_of_class and \
        #        self.predict_proba == other.predict_proba and \
        #        self.percent_used == other.percent_used and \
        #        self.correct_predict == other.correct_predict
        
class Q(QLearning):
    def get_clf_prediction(self, index):

        proba = self.clf.predict_proba(X[index].reshape(1, -1))[0]
        hit = (self.clf.predict(X[index].reshape(1, -1)) == y[index])[0]

        return (proba, hit)

    def class_percent_into_discrete(self, percent):
        for i in range(len(self.CLASS_PERCENT_VALUES)):
            if percent < self.CLASS_PERCENT_VALUES[i]:
                return i
            
        return len(self.CLASS_PERCENT_VALUES)
    
    def predict_proba_into_discrete(self, proba):
        for i in range(len(self.PREDICT_PROBA_VALUES)):
            if proba < self.PREDICT_PROBA_VALUES[i]:
                return i
            
        return len(self.PREDICT_PROBA_VALUES)

    def percent_used_into_discrete(self, percent):
        for i, el in enumerate(self.PERCENT_USED_VALUES):
            if percent < el:
                return i
            
        return len(self.PERCENT_USED_VALUES)

    def client_bytes_into_discrete(self, nbytes):
        return int(np.clip(int(log(nbytes, 3)) - 5, 1, 9) - 1)
    
    def server_bytes_into_discrete(self, nbytes):
        return int(np.clip(int(log(nbytes, 5)) - 4, 0, 7))

    def duration_into_discrete(self, duration):
        for i, el in enumerate(self.DURATION_VALUES):
            if duration < el:
                return i
        
        return len(self.DURATION_VALUES)

    def update_state(self, state_key, action_key):
        # print(self.t, end = " ")
        # print(state_key, end = " ")
        # print(action_key)

        sample_index = self.base_i + self.t + 1

        next_class = y[sample_index]

        if action_key == 1:
            self.class_amount[self.y_used[self.to_i - 1]] += 1
            self.used += 1

        class_percent = self.class_amount[next_class] / (self.used + self.base_samples)
        
        (proba, hit) = self.get_clf_prediction(sample_index)

        res_index = np.where(self.clf.classes_ == next_class)
        if len(res_index[0]):
            proba = self.predict_proba_into_discrete(proba[res_index[0][0]])
        else:
            proba = 0

        client_bytes = self.client_bytes_into_discrete(X[sample_index][90])
        server_bytes = self.server_bytes_into_discrete(X[sample_index][91])

        duration = self.duration_into_discrete(X[sample_index][94])

        return State_key(self.class_percent_into_discrete(class_percent), 
                         proba,
                         hit,
                         duration)

    def initialize(self, cols, iters, already_used, nclasses):
        self.q_count = defaultdict(int)

        self.epsilon_greedy_rate = 0.9
        self.alpha_value = 0.4
        self.gamma_value = 0.9

        self.clf = RandomForestClassifier(max_depth=10)

        self.CLASS_PERCENT_VALUES = [0.0001, 0.01, 0.05, 0.1]
        self.PREDICT_PROBA_VALUES = [0.25, 0.50, 0.75]
        self.PERCENT_USED_VALUES  = [0.05, 0.08, 0.13, 0.21, 0.34, 0.89]
        self.DURATION_VALUES      = [0.1, 1, 29.9, 59.9, 89.9, 119.9, 299]

        self.used = 0
        self.base_samples = already_used
        self.base_i = already_used - 1
        self.to_i = already_used
        self.class_amount = {}

        for i in range(nclasses):
            self.class_amount[i] = 0

        self.X_used = np.ndarray(shape = (iters + already_used, cols))
        self.y_used = np.ndarray(shape = (iters + already_used,))
        self.last_f1 = 0
    
    def extract_possible_actions(self, state_key):
        return list({0, 1})

    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        return action_key

    def train_clf(self, clf):
        clf.fit(self.X_used[:self.to_i], 
                self.y_used[:self.to_i])

    def test_acc(self):
        self.clf = RandomForestClassifier(max_depth=10, n_jobs=-1)

        self.train_clf(self.clf)

        predict_arr = self.clf.predict(X_test)

        return f1_score(y_test, predict_arr, average="weighted")

    def observe_reward_value(self, state_key, action_key):
        # if action_key == 0:
        #     return 0

        self.X_used[self.to_i] = X[self.base_i + self.t]
        self.y_used[self.to_i] = y[self.base_i + self.t]

        self.to_i += 1

        cur_f1 = self.test_acc()
        reward = cur_f1 - self.last_f1

        if action_key == 0:
            self.to_i -= 1
            reward = -reward
        else:
            self.last_f1 = cur_f1

        self.save_r_df(state_key, reward)

        return reward

    def learn(self, state_key, limit=1000, increased_rd = 1, decrease_alpha = 0):
        self.t = 1

        for _ in tqdm(range(1, limit + 1)):
            self.epsilon_greedy_rate = min(self.t / increased_rd, 0.9)
            self.alpha_value = max(self.alpha_value - decrease_alpha, 0.05)
            
            next_action_list = self.extract_possible_actions(state_key)
            if len(next_action_list):
                action_key = self.select_action(
                    state_key=state_key,
                    next_action_list=next_action_list
                )
                reward_value = self.observe_reward_value(state_key, action_key)

            if len(next_action_list):
                # Max-Q-Value in next action time.
                next_state_key = self.update_state(
                    state_key=state_key,
                    action_key=action_key
                )

                next_next_action_list = self.extract_possible_actions(next_state_key)
                next_action_key = self.predict_next_action(next_state_key, next_next_action_list)
                next_max_q = self.extract_q_df(next_state_key, next_action_key)

                # Update Q-Value.
                self.update_q(
                    state_key=state_key,
                    action_key=action_key,
                    reward_value=reward_value,
                    next_max_q=next_max_q
                )
                # Update State.
                state_key = next_state_key

            # Normalize.
            self.normalize_q_value()
            self.normalize_r_value()

            # Vis.
            self.visualize_learning_result(state_key)
            # Check.
            if self.check_the_end_flag(state_key) is True:
                break

            # Epsode.
            self.t += 1

    def save_q_df(self, state_key, action_key, q_value):
        '''
        Insert or update Q-Value in `self.q_df`.

        Args:
            state_key:      State.
            action_key:     Action.
            q_value:        Q-Value.

        Exceptions:
            TypeError:      If the type of `q_value` is not float.

        '''
        if isinstance(q_value, float) is False:
            raise TypeError("The type of q_value must be float.")

        new_q_df = pd.DataFrame([(state_key, action_key, q_value)], columns=["state_key", "action_key", "q_value"])
        
        if q_value != 0.0:
            self.q_count[(state_key, action_key)] += 1

        if self.q_df is not None:
            self.q_df = pd.concat([new_q_df, self.q_df])
            self.q_df = self.q_df.drop_duplicates(["state_key", "action_key"])
        else:
            self.q_df = new_q_df

(X_test, y_test) = create_balanced_test_data()

increased_rd = 100 # epsilon = min(self.t / increased_rd, 0.9)
decrease_alpha = 0.001
iters = 100
base_samples_amount = 400

nclasses = len(train_dataframe.groupby('APP'))

q = Q()
q.t = 1
q.initialize(X.shape[1], iters, base_samples_amount, nclasses)

q.X_used[:base_samples_amount] = X[:base_samples_amount]
q.y_used[:base_samples_amount] = y[:base_samples_amount]
q.last_f1 = q.test_acc()

for i in range(base_samples_amount):
    q.class_amount[y[i]] += 1

state_key = q.update_state(State_key(0, 0, 0, 0), 0)

q.learn(state_key, iters, increased_rd)



100%|██████████| 100/100 [00:22<00:00,  4.41it/s]


In [13]:
X_test = test_dataframe.drop(columns="APP").to_numpy()[:100000]
y_test = test_dataframe["APP"].to_numpy()[:100000]

print(q.to_i)

clf = RandomForestClassifier()
clf.fit(q.X_used[:q.to_i], q.y_used[:q.to_i])

predict_arr = clf.predict(X_test)

print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")


clf = RandomForestClassifier()
clf.fit(X[:1100], y[:1100])

predict_arr = clf.predict(X_test)

print(f"random_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

q_df = q.q_df
q_df = q_df.sort_values(by=["q_value"], ascending=False)
print(q_df.head())

446
q_learning_acc: 0.4194
random_learning_acc: 0.4909
                                           state_key  action_key   q_value
0  State_key(percent_of_class=2, predict_proba=0,...           0  0.007082
0  State_key(percent_of_class=3, predict_proba=1,...           0  0.006469
0  State_key(percent_of_class=3, predict_proba=1,...           0  0.005452
0  State_key(percent_of_class=2, predict_proba=1,...           1  0.004722
0  State_key(percent_of_class=2, predict_proba=1,...           0  0.004396


In [14]:
print(q_df.shape)

for index, row in q_df.iterrows():
    count = q.q_count[(row["state_key"], row["action_key"])]
    print(f"{count}: {row.to_dict()}")


(59, 3)
2: {'state_key': State_key(percent_of_class=2, predict_proba=0, correct_predict=True, duration=1), 'action_key': 0, 'q_value': 0.007081836331093865}
3: {'state_key': State_key(percent_of_class=3, predict_proba=1, correct_predict=True, duration=1), 'action_key': 0, 'q_value': 0.006469214287906824}
1: {'state_key': State_key(percent_of_class=3, predict_proba=1, correct_predict=True, duration=4), 'action_key': 0, 'q_value': 0.005452179285359425}
2: {'state_key': State_key(percent_of_class=2, predict_proba=1, correct_predict=True, duration=2), 'action_key': 1, 'q_value': 0.004721835808914636}
2: {'state_key': State_key(percent_of_class=2, predict_proba=1, correct_predict=True, duration=3), 'action_key': 0, 'q_value': 0.004396087140365295}
4: {'state_key': State_key(percent_of_class=2, predict_proba=0, correct_predict=False, duration=2), 'action_key': 1, 'q_value': 0.004276969599552089}
1: {'state_key': State_key(percent_of_class=3, predict_proba=2, correct_predict=True, duration=0)