In [1]:
from river import ensemble, preprocessing

In [2]:
import psutil
import time
import json

In [3]:
from capymoa.stream import stream_from_file
from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator

In [4]:
from ASML_MOA.moa_models import ARFRegressor,MoaSOKNL

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
datasets = [
    'ailerons',
    'elevators',
    'fried',
    'hyperA',
    'kin8nm'
    'abalone',
    'bike',
    'House8L',
    'MetroTraffic'
]

In [7]:
dataset_name = 'bike'

In [8]:
stream = stream_from_file(f"RDatasets/{dataset_name}.arff")

In [9]:
regressionEvaluator = RegressionEvaluator(schema=stream.get_schema())
regressionWindowedEvaluator = RegressionWindowedEvaluator(schema=stream.get_schema(),window_size=1000)

In [11]:
ARFR = preprocessing.MinMaxScaler() | ensemble.AdaptiveRandomForestRegressor(seed=42)
#ARFR = preprocessing.MinMaxScaler()  | ARFRegressor(random_seed=42,schema=stream.get_schema())
#ARFR = preprocessing.MinMaxScaler()  | MoaSOKNL(random_seed=42,schema=stream.get_schema())

In [12]:
ARFR

In [13]:
t=0
times = []
memories = []
while stream.has_more_instances():
    instance = stream.next_instance()
    x = dict(enumerate(instance.x))
    mem_before = psutil.Process().memory_info().rss # Recording Memory
    start = time.time()  # Recording Time
    prediction = ARFR.predict_one(x)
    #print(f"y_true: {instance.y_value}, y_pred: {prediction}")
    regressionEvaluator.update(instance.y_value, prediction)
    regressionWindowedEvaluator.update(instance.y_value, prediction)
    ARFR.learn_one(x, instance.y_value)
    end = time.time()
    mem_after = psutil.Process().memory_info().rss
    iteration_mem = mem_after - mem_before
    memories.append(iteration_mem)
    iteration_time = end - start
    times.append(iteration_time)
    t+=1
    if t%1000==0:
        print(f"Running Instance **{t}**")
        print(f"R2 score - {round(regressionEvaluator.R2(),3)}")
        print(f"RMSE score - {round(regressionEvaluator.RMSE(),3)}")
        print("-"*40)

Running Instance **1000**
R2 score - 0.309
RMSE score - 42.455
----------------------------------------
Running Instance **2000**
R2 score - 0.401
RMSE score - 51.39
----------------------------------------
Running Instance **3000**
R2 score - 0.529
RMSE score - 65.843
----------------------------------------
Running Instance **4000**
R2 score - 0.569
RMSE score - 78.228
----------------------------------------
Running Instance **5000**
R2 score - 0.61
RMSE score - 80.827
----------------------------------------
Running Instance **6000**
R2 score - 0.632
RMSE score - 81.347
----------------------------------------
Running Instance **7000**
R2 score - 0.649
RMSE score - 81.224
----------------------------------------
Running Instance **8000**
R2 score - 0.655
RMSE score - 79.654
----------------------------------------
Running Instance **9000**
R2 score - 0.658
RMSE score - 77.904
----------------------------------------
Running Instance **10000**
R2 score - 0.663
RMSE score - 77.097
--

In [14]:
regressionEvaluator.metrics_dict()

{'classified instances': 17379.0,
 'mean absolute error': 68.76873839880619,
 'root mean squared error': 101.5172074339205,
 'relative mean absolute error': 0.521752534708603,
 'relative root mean squared error': 0.5596467092184207,
 'coefficient of determination': 0.6867955608609924,
 'adjusted coefficient of determination': 0.6865610858993565}

In [35]:
regressionWindowedEvaluator.metrics_per_window()

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000.0,30.857347,42.500294,0.789029,0.831632,0.308388,0.299269
1,1000.0,44.121192,59.61527,0.742809,0.770094,0.406956,0.399137
2,1000.0,71.315743,98.210087,0.746073,0.768201,0.409867,0.402086
3,1000.0,77.252283,102.847035,0.635753,0.681667,0.53533,0.529203
4,1000.0,60.640473,84.335261,0.490307,0.56202,0.684134,0.679969
5,1000.0,53.161934,76.885261,0.435801,0.514584,0.735203,0.731712
6,1000.0,63.476618,89.291674,0.528955,0.598551,0.641737,0.637014
7,1000.0,54.763025,75.27732,0.521032,0.592683,0.648727,0.644095
8,1000.0,45.401301,64.766701,0.51828,0.596472,0.644221,0.63953
9,1000.0,57.271817,83.102705,0.569805,0.634859,0.596954,0.59164


In [None]:
# saving results in dict
save_record = {
    "model": 'ARFR',
    "dataset": dataset_name,
    "regressionEvaluator": regressionEvaluator.metrics_dict(),
    "windows_scores": regressionWindowedEvaluator.metrics_per_window().to_dict(orient='list'),
    "time": times,
    "memory": memories
}

In [None]:
file_name = f"{save_record['model']}_{save_record['dataset']}.json"

In [None]:
file_name

In [None]:
# To store the dictionary in a JSON file
with open(f"TEMP/{file_name}", 'w') as json_file:  # change temp to  saved_results_json for final run
    json.dump(save_record, json_file)