In [47]:
#!pip install flaml[notebook,vw]==1.1.2

In [1]:
from flaml import AutoVW
from flaml.tune import loguniform, choice

In [2]:
import psutil
import time
import json

In [3]:
from capymoa.stream import stream_from_file
from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator

In [4]:
from moa.streams.filters import NormalisationFilter
from moa.streams import FilteredStream
from capymoa.stream import Stream

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
datasets = [
    'ailerons',
    'elevators',
    'fried',
    'hyperA',
    'abalone',
    'bike',
    'House8L',
    'MetroTraffic'
]

In [14]:
dataset_name = 'cpu_activity'

In [26]:
stream = stream_from_file(f"RDatasets/{dataset_name}.arff")

In [27]:
regressionEvaluator = RegressionEvaluator(schema=stream.get_schema())
regressionWindowedEvaluator = RegressionWindowedEvaluator(schema=stream.get_schema(),window_size=1000)

In [28]:
""" create an AutoVW instance for tuning namespace interactions and learning rate"""
# set up the search space and init config
search_space_nilr = {
    "interactions": AutoVW.AUTOMATIC,
    "learning_rate": loguniform(lower=2e-10, upper=1.0),
}
init_config_nilr = {"interactions": set(), "learning_rate": 0.5}
# create an AutoVW instance
autovw = AutoVW(
    max_live_model_num=5,
    search_space=search_space_nilr,
    init_config=init_config_nilr,
    random_seed=42,
)

In [29]:
def to_vw_format(instance):
    res = f"{instance.y_value} |"
    for idx, value in enumerate(instance.x):
        res += f" {idx}:{value}"
    return res

In [30]:
t=0
times = []
memories = []
while stream.has_more_instances():
    instance = stream.next_instance()
    vw_instance = to_vw_format(instance)
    mem_before = psutil.Process().memory_info().rss # Recording Memory
    start = time.time()  # Recording Time
    prediction = autovw.predict(vw_instance)
    #print(f"y_true: {instance.y_value}, y_pred: {prediction}")
    regressionEvaluator.update(instance.y_value, prediction)
    regressionWindowedEvaluator.update(instance.y_value, prediction)
    autovw.learn(vw_instance)
    end = time.time()
    mem_after = psutil.Process().memory_info().rss
    iteration_mem = mem_after - mem_before
    memories.append(iteration_mem)
    iteration_time = end - start
    times.append(iteration_time)
    t+=1
    if t%1000==0:
        print(f"Running Instance **{t}**")
        print(f"R2 score - {round(regressionEvaluator.R2(),3)}")
        print(f"RMSE score - {round(regressionEvaluator.RMSE(),3)}")
        print("-"*40)

Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1
Enabled reductions: gd, scorer
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
8100.000000 8100.000000            1            1.0  90.0000   0.0000       17
Num weight bits = 18
learning rate = 1
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1
Enabled reductions: gd, scorer
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
8100.000000 8100.000000            1            1.0  90.0000   0.0000       17
8100.000000 8100.000000            2            2.0  90.0000   0.0000       17
Num weight bits = 18
learning rate = 0.0535887
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1
Enabled reductions: gd, scorer
average  since

Running Instance **1000**
R2 score - -6.064
RMSE score - 49.111
----------------------------------------


7096.871394 6873.161472          512          512.0  72.0000   7.0787       21
4845.709784 4081.767401         2048         2048.0  95.0000  20.8180       21
3811.127168 2880.273162         2048         2048.0  95.0000  36.7868       15
7074.156086 6945.117343         1024         1024.0  96.0000   2.7302       21
6777.997677 6459.123960         1024         1024.0  94.0000   5.2982       14

finished run
number of examples = 1680
weighted example sum = 1680.000000
weighted label sum = 141396.000000
average loss = 6965.160620
best constant = 84.164284
total feature number = 30542
Num weight bits = 18
learning rate = 0.0535887
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1
Enabled reductions: gd, scorer
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
5041.000000 5041.000000            1            1.0  71.0000   0.0000       22
5041.000000 5041.000000        

Running Instance **2000**
R2 score - -3.481
RMSE score - 40.223
----------------------------------------


1545.636869 814.628180         4096         4096.0  88.0000  69.4337       21
6883.726797 6741.184422         1024         1024.0  91.0000   2.9147       21
6521.640810 6216.647139         1024         1024.0   0.0000   4.3576       22
3913.629514 2981.549244         4096         4096.0  59.0000  74.0537       22
2856.218328 1901.309488         4096         4096.0  79.0000  71.5396       16
6758.016385 6632.305972         2048         2048.0  95.0000   3.8337       15
6335.595462 6149.550115         2048         2048.0  98.0000   8.4420       15


Running Instance **3000**
R2 score - -2.326
RMSE score - 35.62
----------------------------------------



finished run
number of examples = 3360
weighted example sum = 3360.000000
weighted label sum = 279264.000000
average loss = 6672.394569
best constant = 83.114288
total feature number = 61064
Num weight bits = 18
learning rate = 0.0535887
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1
Enabled reductions: gd, scorer
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
6241.000000 6241.000000            1            1.0  79.0000   0.0000       17
6241.000000 6241.000000            2            2.0  79.0000   0.0000       17
6983.256592 7725.513184            4            4.0  88.0000   0.1051       17
7235.000854 7486.745117            8            8.0  88.0000   0.5610       21
6982.276001 6729.551147           16           16.0  92.0000   0.3137       15
6860.559709 6738.843416           32           32.0  90.0000   0.3595       17
6535.018776 6209.477844        

Running Instance **4000**
R2 score - -1.787
RMSE score - 32.628
----------------------------------------


1021.987187 498.337505         8192         8192.0  95.0000  80.8110       16
6775.107223 6514.294622         2048         2048.0  94.0000   4.3587       15
2968.902466 2024.175418         8192         8192.0  98.0000  46.5371       14
2007.897933 1159.577537         8192         8192.0  97.0000  66.5580       17
6332.985021 5979.830509         2048         2048.0  98.0000   7.4729       17


Running Instance **5000**
R2 score - -1.555
RMSE score - 30.354
----------------------------------------


6650.698805 6526.290387         4096         4096.0  78.0000   6.6037       16
5978.682726 5624.380431         4096         4096.0  97.0000  12.5220       15


Running Instance **6000**
R2 score - -1.358
RMSE score - 28.616
----------------------------------------



finished run
number of examples = 6720
weighted example sum = 6720.000000
weighted label sum = 564024.000000
average loss = 6449.256802
best constant = 83.932144
total feature number = 121942
Num weight bits = 18
learning rate = 0.0535887
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = 
num sources = 1
Enabled reductions: gd, scorer
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
6084.000000 6084.000000            1            1.0  78.0000   0.0000       17
6084.000000 6084.000000            2            2.0  78.0000   0.0000       17
6459.224121 6834.448242            4            4.0  83.0000   0.3293       20
6879.589233 7299.954346            8            8.0  93.0000   0.2063       20
6466.521240 6053.453247           16           16.0  69.0000   0.6828       16
6966.465576 7466.409912           32           32.0  90.0000   0.4139       20
6953.443100 6940.420624       

Running Instance **7000**
R2 score - -1.187
RMSE score - 27.374
----------------------------------------


7172.751731 7113.295775         1024         1024.0  69.0000   4.1728       22
6898.593459 6639.992338         1024         1024.0  88.0000   4.8458       16
6969.351078 6765.950424         2048         2048.0  75.0000   5.6853       20
6518.838620 6139.083781         2048         2048.0  93.0000   6.8518       15


Running Instance **8000**
R2 score - -1.035
RMSE score - 26.343
----------------------------------------


670.134155 318.281122        16384        16384.0  80.0000  90.6320       16


In [31]:
regressionEvaluator.metrics_dict()

{'classified instances': 8192.0,
 'mean absolute error': 20.410048337478656,
 'root mean squared error': 26.13808919974477,
 'relative mean absolute error': 1.9161065168785383,
 'relative root mean squared error': 1.4178075853784249,
 'coefficient of determination': -1.0101783491565999,
 'adjusted coefficient of determination': -1.0155919767342039}

In [21]:
regressionWindowedEvaluator.metrics_per_window()

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000.0,60.280526,62.974469,5.704752,3.388483,-10.481819,-10.740365
1,1000.0,41.24351,44.392456,3.669117,2.275832,-4.179413,-4.296042
2,1000.0,33.293499,36.391224,2.764178,1.769571,-2.131382,-2.201895
3,1000.0,29.055709,31.936624,2.533214,1.630984,-1.660107,-1.720007
4,1000.0,25.339237,28.474322,2.563036,1.713768,-1.937002,-2.003137
5,1000.0,23.008847,26.245399,2.389275,1.567187,-1.456076,-1.511382
6,1000.0,23.560151,26.676317,2.29851,1.503917,-1.261767,-1.312697
7,1000.0,22.634382,25.608511,2.117299,1.409272,-0.986047,-1.030769


In [15]:
# saving results in dict
save_record = {
    "model": 'CHACHA',
    "dataset": dataset_name,
    "regressionEvaluator": regressionEvaluator.metrics_dict(),
    "windows_scores": regressionWindowedEvaluator.metrics_per_window().to_dict(orient='list'),
    "time": times,
    "memory": memories
}

In [17]:
save_record.keys

<function dict.keys>

In [43]:
file_name = f"{save_record['model']}_{save_record['dataset']}.json"

In [44]:
file_name

'CHACHA_elevators.json'

In [45]:
# To store the dictionary in a JSON file
with open(f"TEMP/{file_name}", 'w') as json_file:  # change temp to  saved_results_json for final run
    json.dump(save_record, json_file)