In [1]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:07<00:00, 1032.81it/s]


Loading data from dataloader


100%|██████████| 192/192 [00:04<00:00, 47.77it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 148.51it/s]


In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score

from pyqlearning.q_learning import QLearning

from dataclasses import dataclass
from collections import defaultdict
import random

import numpy as np
import pandas as pd
from math import log

from tqdm import tqdm

import multiprocess as mp

# X_test = test_dataframe.drop(columns="APP").to_numpy()[:10000]
# y_test = test_dataframe["APP"].to_numpy()[:10000]

from datetime import datetime
with open("out.txt", "a") as f:
    time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    f.write("\nstart(" + time + ")")

def big_test(q, iters):
    with open("out.txt", "a") as f:
        f.write(str(q.to_i) + "\n")
        
        clf = RandomForestClassifier()
        clf.fit(q.X_used[:q.to_i], q.y_used[:q.to_i])
        
        predict_arr = clf.predict(X_big_test)
        
        f.write(f"q_learning_acc: {accuracy_score(y_big_test, predict_arr):.4f}" + "\n")
        
        
        clf = RandomForestClassifier()
        indices = np.random.choice(base_samples_amount + iters, q.to_i, replace=False)

        clf.fit(X[indices], y[indices])
        
        predict_arr = clf.predict(X_big_test)
        
        f.write(f"random_learning_acc: {accuracy_score(y_big_test, predict_arr):.4f}" + "\n")
        
        
        clf = RandomForestClassifier()
        clf.fit(X[:base_samples_amount + iters], y[:base_samples_amount + iters])
        
        predict_arr = clf.predict(X_big_test)
        
        f.write(f"total_learning_acc: {accuracy_score(y_big_test, predict_arr):.4f}" + "\n")
        
        q_df = q.q_df
        q_df = q_df.sort_values(by=["q_value"], ascending=False)
        f.write(str(q_df.head()) + "\n\n")

# same amount of samples from all classes
def create_balanced_test_data():
    grouped = test_dataframe.groupby('APP')

    X_arr = np.ndarray(shape = (10100, X.shape[1]))
    y_arr = np.ndarray(shape = (10100,))

    for index, i in enumerate(grouped):
        X_temp = i[1].drop(columns="APP").to_numpy()
        y_temp = i[1]["APP"].to_numpy()

        X_arr[index*100:(index*100)+100] = X_temp[:100]
        y_arr[index*100:(index*100)+100] = y_temp[:100]

    return (X_arr, y_arr)


@dataclass
class State_key:
    percent_of_class : int  # 1e-2, 1e-1. 5e-1, 1, more than 1
    predict_proba    : int  # 0-25, 25-50, 50-75, 75-100
    correct_predict  : bool # True or False
    percent_duration : int
    # bytes_client     : int  # log3, 9 buckets
    # bytes_server     : int  # log5, 7 buckets
    duration         : int

    def __hash__(self):
        return self.percent_of_class + \
               self.predict_proba * 5 + \
               int(self.correct_predict) * 20 + \
               self.percent_duration * 40 + \
               self.duration * 320
            #    self.bytes_client * 40 + \
            #    self.bytes_server * 360 + \
               
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        
        return True
        # return self.percent_of_class == other.percent_of_class and \
        #        self.predict_proba == other.predict_proba and \
        #        self.percent_used == other.percent_used and \
        #        self.correct_predict == other.correct_predict
        
class Q(QLearning):
    def get_clf_prediction(self, index):

        proba = self.clf.predict_proba(X[index].reshape(1, -1))[0]
        hit = (self.clf.predict(X[index].reshape(1, -1)) == y[index])[0]

        return (proba, hit)
    
    def value_into_discrete(self, value, thresholds):
        for i, threshold in enumerate(thresholds):
            if value < threshold:
                return i
        return len(thresholds)

    def class_percent_into_discrete(self, percent):
        return self.value_into_discrete(percent, self.CLASS_PERCENT_VALUES)

    def predict_proba_into_discrete(self, proba):
        return self.value_into_discrete(proba, self.PREDICT_PROBA_VALUES)

    def percent_used_into_discrete(self, percent):
        return self.value_into_discrete(percent, self.PERCENT_USED_VALUES)
    
    def duration_into_discrete(self, duration):
        return self.value_into_discrete(duration, self.DURATION_VALUES)

    def percent_duration_into_discrete(self, duration_percent):
        return self.value_into_discrete(duration_percent, self.DURATION_PERCENT_VALUES)
            
    def client_bytes_into_discrete(self, nbytes):
        return int(np.clip(int(log(nbytes, 3)) - 5, 1, 9) - 1)
    
    def server_bytes_into_discrete(self, nbytes):
        return int(np.clip(int(log(nbytes, 5)) - 4, 0, 7))

    def update_state(self, state_key, action_key, offset=0):
        sample_index = self.base_i + self.t + 1 + offset

        next_class = y[sample_index]

        if action_key == 1 and offset == 0:
            prev_duration = self.duration_into_discrete(self.X_used[self.to_i - 1][94])

            self.duration_amount[prev_duration] += 1
            self.class_amount[self.y_used[self.to_i - 1]] += 1
            self.used += 1

        class_percent = self.class_amount[next_class] / (self.used + self.base_samples)
        
        (proba, hit) = self.get_clf_prediction(sample_index)

        res_index = np.where(self.clf.classes_ == next_class)
        if len(res_index[0]):
            proba = self.predict_proba_into_discrete(proba[res_index[0][0]])
        else:
            proba = 0

        # client_bytes = self.client_bytes_into_discrete(X[sample_index][90])
        # server_bytes = self.server_bytes_into_discrete(X[sample_index][91])

        duration = self.duration_into_discrete(X[sample_index][94])
        percent_duration = self.duration_amount[duration] / (self.used + self.base_samples)

        return State_key(self.class_percent_into_discrete(class_percent), 
                         proba,
                         hit,
                         self.percent_duration_into_discrete(percent_duration),
                         duration)

    def initialize(self, cols, iters, already_used, nclasses):
        self.q_count = defaultdict(int)

        self.epsilon_greedy_rate = 0.9
        self.alpha_value = 0.2
        self.gamma_value = 0.9

        self.clf = RandomForestClassifier(max_depth=10)

        self.CLASS_PERCENT_VALUES    = [0.0001, 0.01, 0.05, 0.1]
        self.PREDICT_PROBA_VALUES    = [0.25, 0.50, 0.75]
        self.DURATION_VALUES         = [0.1, 1, 29.9, 59.9, 89.9, 119.9, 299]
        self.DURATION_PERCENT_VALUES = [0.05, 0.1, 0.2, 0.4]

        self.pool_size = mp.cpu_count() - 1
        self.pool = mp.Pool(self.pool_size)
        self.used = 0
        self.base_samples = already_used
        self.base_i = already_used - 1
        self.to_i = already_used

        self.class_amount = defaultdict(int)
        self.duration_amount = defaultdict(int)

        self.X_used = np.ndarray(shape = (iters + already_used, cols))
        self.y_used = np.ndarray(shape = (iters + already_used,))
        self.last_f1 = 0
    
    def extract_possible_actions(self, state_key):
        return list({0, 1})

    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        return action_key

    def learn_iter(self, state_key, iter):
        if iter >= self.pool_size:
            return

        state_key = self.update_state(state_key, 0, iter)

        next_action_list = self.extract_possible_actions(state_key)
        if len(next_action_list):
            action_key = self.select_action(
                state_key=state_key,
                next_action_list=next_action_list
            )
            
            result = self.pool.apply_async(observe_reward_value,
                                        args = (action_key,
                                                self.X_used[:self.to_i + 1], self.y_used[:self.to_i + 1],
                                                self.last_f1,
                                                X[self.base_i + self.t + iter],
                                                y[self.base_i + self.t + iter])
                                        )
            
            self.learn_iter(state_key, iter + 1)

            if action_key == 1:
                self.X_used[self.to_i] = X[self.base_i + self.t]
                self.y_used[self.to_i] = y[self.base_i + self.t]

                self.to_i += 1

            result.wait()

            (clf, reward_value, cur_f1) = result.get()
            if iter == 0:
                self.clf = clf
            
            self.save_r_df(state_key, reward_value)
            self.last_f1 = cur_f1

            if len(next_action_list):
                # Max-Q-Value in next action time.
                next_state_key = self.update_state(
                    state_key=state_key,
                    action_key=action_key
                )

                next_next_action_list = self.extract_possible_actions(next_state_key)
                next_action_key = self.predict_next_action(next_state_key, next_next_action_list)
                next_max_q = self.extract_q_df(next_state_key, next_action_key)

                # Update Q-Value.
                self.update_q(
                    state_key=state_key,
                    action_key=action_key,
                    reward_value=reward_value,
                    next_max_q=next_max_q
                )
                # Update State.
                state_key = next_state_key

            # Normalize.
            self.normalize_q_value()
            self.normalize_r_value()

            # Epsode.
            self.t += 1

            return state_key

    def learn_parallel(self, state_key, limit=1000, increased_rd = 1, decrease_alpha = 0):
        self.t = 1
        last_t = 1

        for _ in tqdm(range(1, limit + 1, self.pool_size)):
            if self.t - last_t > 100:
                big_test(self, self.t)
                last_t = self.t

            self.epsilon_greedy_rate = min(self.t / increased_rd, 0.9)
            self.alpha_value = max(self.alpha_value - decrease_alpha, 0.05)
            state_key = self.learn_iter(state_key, 0)


    def save_q_df(self, state_key, action_key, q_value):
        if isinstance(q_value, float) is False:
            raise TypeError("The type of q_value must be float.")

        new_q_df = pd.DataFrame([(state_key, action_key, q_value)], columns=["state_key", "action_key", "q_value"])
        
        if q_value != 0.0:
            self.q_count[(state_key, action_key)] += 1

        if self.q_df is not None:
            self.q_df = pd.concat([new_q_df, self.q_df])
            self.q_df = self.q_df.drop_duplicates(["state_key", "action_key"])
        else:
            self.q_df = new_q_df

    def observe_reward_value(self, state_key, action_key):
        pass

def test_acc(X_used, y_used, clf):
    clf.fit(X_used, y_used)

    predict_arr = clf.predict(X_test)

    return f1_score(y_test, predict_arr, average="weighted")

def observe_reward_value(action_key, X_used, y_used, last_reward, X_s, y_s):
    X_used[-1] = X_s
    y_used[-1] = y_s

    test_iters = 3
    cur_f1 = 0
    for i in range(test_iters):
        clf = RandomForestClassifier(max_depth=10, n_jobs=1)
        cur_f1 += test_acc(X_used, y_used, clf)

    cur_f1 /= test_iters

    reward = cur_f1 - last_reward

    if action_key == 0:
        reward = -reward

    return (clf, reward, cur_f1)

if __name__ == '__main__':
    X = train_dataframe.drop(columns="APP").to_numpy()
    y = train_dataframe["APP"].to_numpy()

    X_big_test = test_dataframe.drop(columns="APP").to_numpy()[:100000]
    y_big_test = test_dataframe["APP"].to_numpy()[:100000]

    (X_test, y_test) = create_balanced_test_data()

    increased_rd = 500 # epsilon = min(self.t / increased_rd, 0.9)
    decrease_alpha = 0.0001
    iters = 50000
    base_samples_amount = 400

    nclasses = len(train_dataframe.groupby('APP'))

    q = Q()
    q.t = 1
    q.initialize(X.shape[1], iters, base_samples_amount, nclasses)

    q.X_used[:base_samples_amount] = X[:base_samples_amount]
    q.y_used[:base_samples_amount] = y[:base_samples_amount]

    clf = RandomForestClassifier(max_depth=10, n_jobs=1)
    q.last_f1 = test_acc(q.X_used[:base_samples_amount], 
                        q.y_used[:base_samples_amount], clf)
    q.clf = clf

    for i in range(base_samples_amount):
        q.class_amount[y[i]] += 1
        q.duration_amount[q.duration_into_discrete(X[i][94])]

    state_key = q.update_state(State_key(0, 0, 0, 0, 0), 0)

    q.learn_parallel(state_key, iters, increased_rd, decrease_alpha)

  6%|▌         | 416/7143 [32:20<8:42:58,  4.66s/it] 


KeyboardInterrupt: 