In [2]:
from cesnet_datazoo.datasets import CESNET_QUIC22
from cesnet_datazoo.config import DatasetConfig, AppSelection, ValidationApproach

dataset = CESNET_QUIC22("~/datasets/CESNET-QUIC22/", size="XS")

common_params = {
    "dataset": dataset,
    "apps_selection": AppSelection.ALL_KNOWN,
    "train_period_name": "W-2022-44",
    "val_approach": ValidationApproach.SPLIT_FROM_TRAIN,
    "train_val_split_fraction": 0.2,
    "use_packet_histograms": True,
}
dataset_config = DatasetConfig(**common_params)
dataset.set_dataset_config_and_initialize(dataset_config)
train_dataframe = dataset.get_train_df(flatten_ppi=True)
val_dataframe = dataset.get_val_df(flatten_ppi=True)
test_dataframe = dataset.get_test_df(flatten_ppi=True)

Loading data from dataloader


100%|██████████| 8162/8162 [00:07<00:00, 1026.68it/s]


Loading data from dataloader


100%|██████████| 192/192 [00:03<00:00, 49.73it/s]


Loading data from dataloader


100%|██████████| 1247/1247 [00:08<00:00, 147.73it/s]


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score
from incremental_trees.models.classification.streaming_rfc import StreamingRFC

from dataclasses import dataclass
import random

from pyqlearning.q_learning import QLearning

import numpy as np

@dataclass
class State_key:
    X_used : np.ndarray
    y_used : np.ndarray

    def __hash__(self):
        X_bytes = self.X_used.tobytes()
        y_bytes = self.y_used.tobytes()

        return hash((X_bytes, y_bytes))
    
    def __eq__(self, other):
        if not isinstance(other, State_key) or self.__hash__() != other.__hash__():
            return False
        return np.array_equal(self.X_used, other.X_used) and np.array_equal(self.y_used, other.y_used)

X = train_dataframe.drop(columns="APP").to_numpy()[:1000]
y = train_dataframe["APP"].to_numpy()[:1000]

X_test = test_dataframe.drop(columns="APP").to_numpy()[:100]
y_test = test_dataframe["APP"].to_numpy()[:100]

class Q(QLearning):
    def update_state(self, state_key, action_key):
        return state_key

    def initialize(self, len, cols):
        self.last_acc = 0
        self.clf = StreamingRFC()
        self.i_not_used = set(range(len))

    def extract_possible_actions(self, state_key):
        return list(self.i_not_used)
    
    def select_action(self, state_key, next_action_list):
        epsilon_greedy_flag = bool(np.random.binomial(n=1, p=self.epsilon_greedy_rate))

        if epsilon_greedy_flag is False:
            action_key = random.choice(next_action_list)
        else:
            action_key = self.predict_next_action(state_key, next_action_list)

        self.i_not_used.remove(action_key)

        return action_key

    def train_clf(self, state_key):
        if self.t == 1:
            self.clf.fit(state_key.X_used[self.t - 1].reshape(1, -1), 
                         state_key.y_used[self.t - 1].ravel())
        else:
            self.clf.partial_fit(state_key.X_used[(self.t - 1)].reshape(1, -1), 
                                 state_key.y_used[(self.t - 1)].ravel())

    def test_acc(self, state_key):
        self.train_clf(state_key)

        predict_arr = self.clf.predict(X_test)

        return f1_score(y_test, predict_arr, average='weighted')

    def observe_reward_value(self, state_key, action_key):
        state_key.X_used[self.t - 1] = X[action_key]
        state_key.y_used[self.t - 1] = y[action_key]

        cur_acc = self.test_acc(state_key)
        reward = cur_acc - self.last_acc
        self.last_acc = cur_acc

        self.save_r_df(state_key, reward)

        return reward

q = Q()
q.epsilon_greedy_rate = 0.1
q.alpha_value = 0.9
q.gamma_value = 0.9

iters = 100

for i in range(200):
    print(f"iter {i}\n")
    q.initialize(len(X), X.shape[1])
    state_key = State_key(np.ndarray(shape = (iters, X.shape[1])), 
                          np.ndarray(shape = (iters,)))
    
    q.learn(state_key, iters)
    predict_arr = q.clf.predict(X_test)
    print(f"q_learning_acc: {accuracy_score(y_test, predict_arr):.4f}")

clf = RandomForestClassifier()
clf.fit(X, y)
predict_arr = clf.predict(X_test)
print(f"base_model_acc: {accuracy_score(y_test, predict_arr):.4f}")


iter 0

625
312
685
683
577
445
199
278
852
215
47
622
604
879
459
814
555
592
529
500


  warn(


897
547
948
185
495
364
599
301
571
713
491
230
900
328
872
662
811
180
841
393
877
777
927
658
205
827
929
50
675
905
778
597
353
907
576
984
356
127
663
427
825
684
681
720
791
12
96
86
669
267
476
851
482
706
999
87
7
865
906
538
424
281
774
574
804
965
49
594
434
203
915
995
985
310
516
5
646
506
331
977
504
739
319
537
114
51
326
831
466
65
947
561
10
678
465
703
453
337
131
980
925
876
181
974
352
251
886
800
848
81
343
551
747
296
950
523
406
440
295
616
189
856
78
698
76
163
938
588
366
757
567
533
121
157
730
693
655
129
958
813
315
68
56
986
960
917
376
23
752
11
586
172
419
998
697
988
400
812
341
178
809
705
824
190
271
544
195
598
550
740
524
758
875
895
166
761
238
982
873
909
762
248
391
226
528
535
934
52
964
912
573
733
116
956
520
502
514
289
37
784
91
518
332
102
412
368
404
284
346
816
191
632
349
839
256
783
16
354
799
405
979
451
307
122
82
206
457
153
548
490
25
447
407
931
316
603
639
214
968
436
943
543
377
565
109
901
916
70
724
941
93
484
771
913
721
336
222
