In [1]:
from ASML_REG import AutoStreamRegressor

In [2]:
import psutil
import time
import json

In [3]:
from capymoa.stream import stream_from_file
from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator

In [4]:
from river import metrics

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
datasets = [
    'ailerons',
    'elevators',
    'fried',
    'hyperA',
    'FriedmanGsg',
    'FriedmanGra',
    'FriedmanLea',
    'kin8nm',
    'abalone',
    'bike',
    'House8L',
    'MetroTraffic',
    'cpu_activity',
    'white_wine',
]

In [7]:
dataset_name = 'abalone'

In [8]:
stream = stream_from_file(f"RDatasets/{dataset_name}.arff")

In [9]:
regressionEvaluator = RegressionEvaluator(schema=stream.get_schema())
regressionWindowedEvaluator = RegressionWindowedEvaluator(schema=stream.get_schema(),window_size=1000)

In [10]:
ASR = AutoStreamRegressor(config_dict=None, # config_dict
    exploration_window=1000, # Window Size
    prediction_mode="ensemble", #change 'best' or 'ensemble' if you want best model prediction 
    budget=10, # How many pipelines run concurrently
    ensemble_size=5, # Ensemble size 
    metric=metrics.RMSE(), # Online metrics
    feature_selection = True,
    aggregation_method='mean', #mean, median
    verbose=False,
    seed=42) # Random/Fixed seed

In [11]:
len(ASR.pipeline_list)

16

In [12]:
%%time
t=0
times = []
memories = []
#track_pipes = []
while stream.has_more_instances():
    instance = stream.next_instance()
    x = dict(enumerate(instance.x))
    mem_before = psutil.Process().memory_info().rss # Recording Memory
    start = time.time()  # Recording Time
    #print(f"true: {instance.y_value}")
    prediction = ASR.predict_one(x)
    #print(f"y_true: {instance.y_value}, y_pred: {prediction}")
    regressionEvaluator.update(instance.y_value, prediction)
    regressionWindowedEvaluator.update(instance.y_value, prediction)
    ASR.learn_one(x, instance.y_value)
    end = time.time()
    mem_after = psutil.Process().memory_info().rss
    iteration_mem = mem_after - mem_before
    memories.append(iteration_mem)
    iteration_time = end - start
    times.append(iteration_time)
    t+=1
    print(f"Running Instance....{t}",end='\r')
    if t%1000==0:
        print(f"Running Instance **{t}**")
        print(f"R2 score - {round(regressionEvaluator.R2(),5)}")
        print(f"RMSE score - {round(regressionEvaluator.RMSE(),5)}")
        print("-"*40)

Running Instance **1000**
R2 score - 0.75645
RMSE score - 1.86395
----------------------------------------
Running Instance **2000**
R2 score - 0.64079
RMSE score - 2.22304
----------------------------------------
Running Instance **3000**
R2 score - 0.62416
RMSE score - 2.14489
----------------------------------------
Running Instance **4000**
R2 score - 0.61431
RMSE score - 2.10512
----------------------------------------
CPU times: user 4min 22s, sys: 3.2 s, total: 4min 25s
Wall time: 5min 30s


In [13]:
regressionEvaluator.metrics_dict()

{'classified instances': 4977.0,
 'mean absolute error': 1.4723384211230481,
 'root mean squared error': 2.1048947925747306,
 'relative mean absolute error': 0.5802732355395288,
 'relative root mean squared error': 0.6310772782503957,
 'coefficient of determination': 0.6017414688760725,
 'adjusted coefficient of determination': 0.6010198407745796}

In [14]:
regressionWindowedEvaluator.metrics_per_window()

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000.0,1.280362,1.863951,0.43528,0.493997,0.755967,0.753749
1,1000.0,1.830959,2.531704,0.650878,0.699065,0.511309,0.506866
2,1000.0,1.373937,1.979341,0.627352,0.646168,0.582466,0.578671
3,1000.0,1.424874,1.981018,0.628206,0.648783,0.579081,0.575254


In [15]:
# saving results in dict
save_record = {
    "model": 'ASML_REG',
    "dataset": dataset_name,
    "regressionEvaluator": regressionEvaluator.metrics_dict(),
    "windows_scores": regressionWindowedEvaluator.metrics_per_window().to_dict(orient='list'),
    "time": times,
    "memory": memories
}

In [16]:
file_name = f"{save_record['model']}_{save_record['dataset']}.json"

In [17]:
file_name

'ASML_REG_abalone.json'

In [56]:
# To store the dictionary in a JSON file
with open(f"TEMP/{file_name}", 'w') as json_file:  # change temp to  saved_results_json for final run
    json.dump(save_record, json_file)