In [1]:
from capymoa.regressor import SOKNL

In [9]:
from moa.streams.filters import StandardisationFilter
from capymoa.stream.preprocessing import RegressorPipeline
from capymoa.stream.preprocessing import MOATransformer

In [10]:
import psutil
import time
import json

In [11]:
from capymoa.stream import stream_from_file
from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator

In [12]:
import warnings
warnings.filterwarnings("ignore")

In [13]:
datasets = [
    'ailerons',
    'elevators',
    'fried',
    'hyperA',
    'abalone',
    'bike',
    'House8L',
    'MetroTraffic'
]

In [24]:
dataset_name = 'elevators'

In [25]:
stream = stream_from_file(f"RDatasets/{dataset_name}.arff")

In [26]:
# Creating a transformer
normalisation_transformer = MOATransformer(schema=stream.get_schema(), moa_filter=StandardisationFilter())

In [27]:
regressionEvaluator = RegressionEvaluator(schema=stream.get_schema())
regressionWindowedEvaluator = RegressionWindowedEvaluator(schema=stream.get_schema(),window_size=1000)

In [28]:
soknl = SOKNL(schema=stream.get_schema())

In [29]:
# Creating and populating the pipeline
pipeline_soknl = RegressorPipeline(transformers=[normalisation_transformer],
                              learner=soknl)

In [30]:
t=0
times = []
memories = []
while stream.has_more_instances():
    instance = stream.next_instance()
    mem_before = psutil.Process().memory_info().rss # Recording Memory
    start = time.time()  # Recording Time
    prediction = pipeline_soknl.predict(instance)
    #print(f"y_true: {instance.y_value}, y_pred: {prediction}")
    regressionEvaluator.update(instance.y_value, prediction)
    regressionWindowedEvaluator.update(instance.y_value, prediction)
    pipeline_soknl.train(instance)
    end = time.time()
    mem_after = psutil.Process().memory_info().rss
    iteration_mem = mem_after - mem_before
    memories.append(iteration_mem)
    iteration_time = end - start
    times.append(iteration_time)
    t+=1
    if t%1000==0:
        print(f"Running Instance **{t}**")
        print(f"R2 score - {round(regressionEvaluator.R2(),3)}")
        print(f"RMSE score - {round(regressionEvaluator.RMSE(),3)}")
        print("-"*40)

Running Instance **1000**
R2 score - 0.601
RMSE score - 0.004
----------------------------------------
Running Instance **2000**
R2 score - 0.57
RMSE score - 0.004
----------------------------------------
Running Instance **3000**
R2 score - 0.541
RMSE score - 0.005
----------------------------------------
Running Instance **4000**
R2 score - 0.521
RMSE score - 0.005
----------------------------------------
Running Instance **5000**
R2 score - 0.553
RMSE score - 0.005
----------------------------------------
Running Instance **6000**
R2 score - 0.575
RMSE score - 0.005
----------------------------------------
Running Instance **7000**
R2 score - 0.585
RMSE score - 0.005
----------------------------------------
Running Instance **8000**
R2 score - 0.59
RMSE score - 0.005
----------------------------------------
Running Instance **9000**
R2 score - 0.583
RMSE score - 0.004
----------------------------------------
Running Instance **10000**
R2 score - 0.581
RMSE score - 0.004
------------

In [31]:
regressionEvaluator.metrics_dict()

{'classified instances': 16599.0,
 'mean absolute error': 0.0026851731020402113,
 'root mean squared error': 0.004390520136766927,
 'relative mean absolute error': 0.5908083888101199,
 'relative root mean squared error': 0.6531232749149068,
 'coefficient of determination': 0.5734299877644271,
 'adjusted coefficient of determination': 0.5729411265404405}

In [32]:
regressionWindowedEvaluator.metrics_per_window()

Unnamed: 0,classified instances,mean absolute error,root mean squared error,relative mean absolute error,relative root mean squared error,coefficient of determination,adjusted coefficient of determination
0,1000.0,0.002374,0.004001,0.636721,0.6312,0.601587,0.593862
1,1000.0,0.002642,0.004425,0.619526,0.668427,0.553206,0.544544
2,1000.0,0.003769,0.006613,0.613253,0.729165,0.468319,0.458011
3,1000.0,0.002741,0.004699,0.53198,0.700744,0.508957,0.499437
4,1000.0,0.002808,0.004665,0.608634,0.575278,0.669056,0.662639
5,1000.0,0.002705,0.004189,0.52765,0.566991,0.678522,0.672289
6,1000.0,0.001735,0.00243,0.530049,0.560024,0.686373,0.680292
7,1000.0,0.002637,0.004135,0.6267,0.60397,0.635221,0.628149
8,1000.0,0.002783,0.004159,0.613517,0.704302,0.503959,0.494341
9,1000.0,0.002691,0.004124,0.58186,0.662758,0.560751,0.552235


In [33]:
# saving results in dict
save_record = {
    "model": 'SOKNL',
    "dataset": dataset_name,
    "regressionEvaluator": regressionEvaluator.metrics_dict(),
    "windows_scores": regressionWindowedEvaluator.metrics_per_window().to_dict(orient='list'),
    "time": times,
    "memory": memories
}

In [22]:
file_name = f"{save_record['model']}_{save_record['dataset']}.json"

In [23]:
file_name

'SOKNL_elevators.json'

In [None]:
# To store the dictionary in a JSON file
with open(f"TEMP/{file_name}", 'w') as json_file:  # change temp to  saved_results_json for final run
    json.dump(save_record, json_file)