In [1]:
from river import tree,preprocessing

In [2]:
import psutil
import time
import json

In [3]:
from capymoa.stream import stream_from_file
from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
datasets = [
    'ailerons',
    'elevators',
    'fried',
    'hyperA',
    'abalone',
    'bike',
    'House8L',
    'MetroTraffic'
]

In [36]:
dataset_name = 'ailerons'

In [37]:
stream = stream_from_file(f"RDatasets/{dataset_name}.arff")

In [38]:
regressionEvaluator = RegressionEvaluator(schema=stream.get_schema())
regressionWindowedEvaluator = RegressionWindowedEvaluator(schema=stream.get_schema(), window_size=1000)

In [39]:
HATR = preprocessing.MinMaxScaler() | tree.HoeffdingAdaptiveTreeRegressor(seed=42)

In [40]:
t=0
times = []
memories = []
while stream.has_more_instances():
    instance = stream.next_instance()
    x = dict(enumerate(instance.x))
    mem_before = psutil.Process().memory_info().rss # Recording Memory
    start = time.time()  # Recording Time
    prediction = HATR.predict_one(x)
    #print(f"y_true: {instance.y_value}, y_pred: {prediction}")
    regressionEvaluator.update(instance.y_value, prediction)
    regressionWindowedEvaluator.update(instance.y_value, prediction)
    HATR.learn_one(x, instance.y_value)
    end = time.time()
    mem_after = psutil.Process().memory_info().rss
    iteration_mem = mem_after - mem_before
    memories.append(iteration_mem)
    iteration_time = end - start
    times.append(iteration_time)
    t+=1
    if t%1000==0:
        print(f"Running Instance **{t}**")
        print(f"R2 score - {round(regressionEvaluator.R2(),3)}")
        print(f"RMSE score - {round(regressionEvaluator.RMSE(),3)}")
        print("-"*40)

Running Instance **1000**
R2 score - 0.6
RMSE score - 0.056
----------------------------------------
Running Instance **2000**
R2 score - 0.685
RMSE score - 0.061
----------------------------------------
Running Instance **3000**
R2 score - 0.725
RMSE score - 0.058
----------------------------------------
Running Instance **4000**
R2 score - 0.74
RMSE score - 0.058
----------------------------------------
Running Instance **5000**
R2 score - 0.759
RMSE score - 0.057
----------------------------------------
Running Instance **6000**
R2 score - 0.773
RMSE score - 0.054
----------------------------------------
Running Instance **7000**
R2 score - 0.769
RMSE score - 0.055
----------------------------------------
Running Instance **8000**
R2 score - 0.775
RMSE score - 0.056
----------------------------------------
Running Instance **9000**
R2 score - 0.767
RMSE score - 0.056
----------------------------------------
Running Instance **10000**
R2 score - 0.769
RMSE score - 0.054
-------------

In [41]:
regressionEvaluator.metrics_dict()

{'classified instances': 13750.0,
 'mean absolute error': 0.038831812197572056,
 'root mean squared error': 0.05349872403908303,
 'relative mean absolute error': 0.4483914036858179,
 'relative root mean squared error': 0.472134555702693,
 'coefficient of determination': 0.7770889613114207,
 'adjusted coefficient of determination': 0.7764222446068517}

In [12]:
regressionWindowedEvaluator.metrics_per_window()

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000.0,0.001995,0.004166,0.535179,0.65714,0.568167,0.559795
1,1000.0,0.001795,0.00362,0.420919,0.546904,0.700896,0.695097
2,1000.0,0.003014,0.005864,0.490466,0.646576,0.581939,0.573834
3,1000.0,0.001641,0.003305,0.318483,0.492845,0.757103,0.752394
4,1000.0,0.002318,0.003884,0.502485,0.478946,0.77061,0.766163
5,1000.0,0.002204,0.003745,0.430003,0.506791,0.743163,0.738183
6,1000.0,0.001387,0.002063,0.423729,0.475464,0.773934,0.769551
7,1000.0,0.002091,0.003546,0.496917,0.517966,0.731712,0.72651
8,1000.0,0.002249,0.00343,0.495672,0.580899,0.662557,0.656014
9,1000.0,0.002208,0.00356,0.477463,0.572172,0.67262,0.666272


In [13]:
# saving results in dict
save_record = {
    "model": 'HATR',
    "dataset": dataset_name,
    "regressionEvaluator": regressionEvaluator.metrics_dict(),
    "windows_scores": regressionWindowedEvaluator.metrics_per_window().to_dict(orient='list'),
    "time": times,
    "memory": memories
}

In [14]:
file_name = f"{save_record['model']}_{save_record['dataset']}.json"

In [15]:
file_name

'HATR_elevators.json'

In [None]:
# To store the dictionary in a JSON file
with open(f"TEMP/{file_name}", 'w') as json_file:  # change temp to  saved_results_json for final run
    json.dump(save_record, json_file)

# Classified instance Update

In [46]:
rwe = regressionWindowedEvaluator.metrics_per_window()

In [47]:
i = 0
c = []
for _ in range(len(rwe['classified instances'])):
    i+=1000
    c.append(i)

In [48]:
rwe['classified instances'] = c

In [49]:
rwe

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000,0.037106,0.056032,0.61328,0.630788,0.602107,0.585078
1,2000,0.047293,0.065434,0.537896,0.540653,0.707694,0.695184
2,3000,0.037823,0.051743,0.380549,0.430168,0.814955,0.807036
3,4000,0.043036,0.058583,0.439499,0.446557,0.800587,0.792053
4,5000,0.0364,0.051148,0.385506,0.418449,0.8249,0.817407
5,6000,0.026731,0.035376,0.351882,0.388513,0.849058,0.842598
6,7000,0.049073,0.063706,0.501329,0.483045,0.766667,0.756681
7,8000,0.040931,0.057376,0.413571,0.456762,0.791368,0.78244
8,9000,0.040363,0.054635,0.503514,0.561557,0.684654,0.671158
9,10000,0.031572,0.043237,0.437042,0.466682,0.782208,0.772887
