In [1]:
from ASML_REG_BASE import AutoStreamRegressorBase

In [2]:
import psutil
import time
import json

In [3]:
from capymoa.stream import stream_from_file
from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator

In [4]:
from river import metrics

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
datasets = [
    'ailerons',
    'elevators',
    'fried',
    'hyperA',
    'FriedmanGsg',
    'FriedmanGra',
    'FriedmanLea',
    'kin8nm',
    'abalone',
    'bike',
    'House8L',
    'MetroTraffic',
    'cpu_activity',
    'white_wine',
]

In [7]:
dataset_name = 'bike'

In [8]:
stream = stream_from_file(f"RDatasets/{dataset_name}.arff")

In [9]:
regressionEvaluator = RegressionEvaluator(schema=stream.get_schema())
regressionWindowedEvaluator = RegressionWindowedEvaluator(schema=stream.get_schema(),window_size=1000)

In [10]:
ASR = AutoStreamRegressorBase(config_dict=None, # config_dict
    exploration_window=1000, # Window Size
    prediction_mode="ensemble", #change 'best' or 'ensemble' if you want best model prediction 
    budget=10, # How many pipelines run concurrently
    ensemble_size=3, # Ensemble size 
    metric=metrics.RMSE(), # Online metrics
    feature_selection = True,
    verbose=False,
    seed=42) # Random/Fixed seed

In [11]:
len(ASR.pipeline_list)

16

In [12]:
%%time
t=0
times = []
memories = []
#track_pipes = []
while stream.has_more_instances():
    instance = stream.next_instance()
    x = dict(enumerate(instance.x))
    mem_before = psutil.Process().memory_info().rss # Recording Memory
    start = time.time()  # Recording Time
    #print(f"true: {instance.y_value}")
    prediction = ASR.predict_one(x)
    #print(f"y_true: {instance.y_value}, y_pred: {prediction}")
    regressionEvaluator.update(instance.y_value, prediction)
    regressionWindowedEvaluator.update(instance.y_value, prediction)
    ASR.learn_one(x, instance.y_value)
    end = time.time()
    mem_after = psutil.Process().memory_info().rss
    iteration_mem = mem_after - mem_before
    memories.append(iteration_mem)
    iteration_time = end - start
    times.append(iteration_time)
    t+=1
    print(f"Running Instance....{t}",end='\r')
    if t%1000==0:
        print(f"Running Instance **{t}**")
        print(f"R2 score - {round(regressionEvaluator.R2(),5)}")
        print(f"RMSE score - {round(regressionEvaluator.RMSE(),5)}")
        print("-"*40)

Running Instance **1000**
R2 score - 0.44975
RMSE score - 37.8863
----------------------------------------
Running Instance **2000**
R2 score - 0.65053
RMSE score - 39.24978
----------------------------------------
Running Instance **3000**
R2 score - 0.74378
RMSE score - 48.57044
----------------------------------------
Running Instance **4000**
R2 score - 0.81092
RMSE score - 51.7936
----------------------------------------
Running Instance **5000**
R2 score - 0.8433
RMSE score - 51.24449
----------------------------------------
Running Instance **6000**
R2 score - 0.83994
RMSE score - 53.67395
----------------------------------------
Running Instance **7000**
R2 score - 0.83999
RMSE score - 54.8196
----------------------------------------
Running Instance **8000**
R2 score - 0.83425
RMSE score - 55.22937
----------------------------------------
Running Instance **9000**
R2 score - 0.82698
RMSE score - 55.39695
----------------------------------------
Running Instance **10000**
R2 sc

In [13]:
regressionEvaluator.metrics_dict()

{'classified instances': 17379.0,
 'mean absolute error': 54.36309123247082,
 'root mean squared error': 86.803026158296,
 'relative mean absolute error': 0.4124560273397291,
 'relative root mean squared error': 0.47852998686268905,
 'coefficient of determination': 0.7710090516731947,
 'adjusted coefficient of determination': 0.7708376216514125}

In [14]:
regressionWindowedEvaluator.metrics_per_window()

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000.0,25.740442,37.886304,0.658189,0.741347,0.450404,0.443158
1,1000.0,27.14474,40.567446,0.457,0.524039,0.725383,0.721762
2,1000.0,42.522939,63.215302,0.444856,0.494471,0.755498,0.752274
3,1000.0,42.479638,60.44043,0.349589,0.400597,0.839522,0.837406
4,1000.0,33.684463,48.986514,0.272355,0.326452,0.893429,0.892024
5,1000.0,41.505354,64.462177,0.340245,0.431438,0.813862,0.811407
6,1000.0,41.105666,61.245046,0.342536,0.410545,0.831453,0.82923
7,1000.0,39.816679,58.016786,0.378828,0.456785,0.791347,0.788596
8,1000.0,37.867604,56.719777,0.432279,0.522363,0.727136,0.723539
9,1000.0,56.760001,81.657479,0.564713,0.623819,0.61085,0.60572


In [15]:
# saving results in dict
save_record = {
    "model": 'ASML_REG_BASE',
    "dataset": dataset_name,
    "regressionEvaluator": regressionEvaluator.metrics_dict(),
    "windows_scores": regressionWindowedEvaluator.metrics_per_window().to_dict(orient='list'),
    "time": times,
    "memory": memories
}

In [16]:
file_name = f"{save_record['model']}_{save_record['dataset']}.json"

In [55]:
file_name

'ASML_REG_NProb_bike.json'