# Confronto tra progressive_val_score e implementazione personalizzata

In [2]:
import pandas as pd
import numpy as np
from river import datasets
from river import stream
from river import metrics
from river import linear_model
from river.evaluate import progressive_val_score
from river import preprocessing
import logging
from river import forest,drift,tree
from Oespl import OESPL



logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

## Preparazione dei dati

In [3]:
import pandas as pd
b = pd.read_csv(r"C:\Users\franc\Desktop\TESI\SML_thesis_line_follower_robot\e_puck\data\data\sensors_data\train_data.csv")

In [4]:
X = b.drop(columns=['target'])
y = b['target']


## Implementazione personalizzata di progressive_val_score

In [5]:
def custom_progressive_val_score(dataset, model, metric, print_every=None):
    for i, (x, y) in enumerate(dataset):
        x = dict(x) if isinstance(x, pd.Series) else x  # Ensure x is a dict
        y_pred = model.predict_one(x)
        if y_pred is not None:
            old_metric = metric # Convert to float to avoid references
            metric.update(y_true=y, y_pred=y_pred)
            #logger.debug(f"Step {i}: y_true={y}, y_pred={y_pred:.4f}, metric before={old_metric.get():.4f}, metric after={metric.get():.4f}")
        model.learn_one(x, y)

        if print_every and i % print_every == 0:
            print(f"[{i}] {metric}")

    return metric.get()

## Confronto tra i due metodi

In [8]:

model = (preprocessing.StandardScaler() | OESPL(
            base_estimator=tree.HoeffdingTreeRegressor(),
            ensemble_size=3,
            lambda_fixed=6.0,
            seed=42,
            drift_detector=drift.ADWIN(),
            patience=1000,
            awakening=500,
            reset_model=True
        ))
metric = metrics.MAE()
# Valutazione con progressive_val_score standard
streams1 = stream.iter_pandas(X, y, shuffle=True, seed=42)
result1 = progressive_val_score(dataset=streams1, model=model, metric=metric, print_every=1000)
print(f'Progressive validation score: {result1}')

metric = metrics.MAE()
model = (preprocessing.StandardScaler() | OESPL(
            base_estimator=tree.HoeffdingTreeRegressor(),
            ensemble_size=3,
            lambda_fixed=6.0,
            seed=42,
            drift_detector=drift.ADWIN(),
            patience=1000,
            awakening=500,
            reset_model=True
        ))
# Valutazione con implementazione personalizzata
streams2 = stream.iter_pandas(X, y, shuffle=True, seed=42)
result2 = custom_progressive_val_score(dataset=streams2, model=model, metric=metric, print_every=1000)
print(f'Custom progressive validation score: {result2}')

# Confronto dei risultati
print(f'Differenza tra i risultati: {abs(result1.get() - result2)}')



[1,000] MAE: 17.670057
[2,000] MAE: 17.645193
[3,000] MAE: 17.201596
[4,000] MAE: 16.912333
[5,000] MAE: 17.062971
[6,000] MAE: 16.956905
[7,000] MAE: 17.095356
[8,000] MAE: 16.610703
[9,000] MAE: 16.632725
[10,000] MAE: 16.269243
[11,000] MAE: 16.267041
[11,036] MAE: 16.229564
Progressive validation score: MAE: 16.229564
[0] MAE: 199.998847
[1000] MAE: 17.654681
[2000] MAE: 17.64179
[3000] MAE: 17.198109
[4000] MAE: 16.908222
[5000] MAE: 17.065489
[6000] MAE: 16.956195
[7000] MAE: 17.093223
[8000] MAE: 16.613387
[9000] MAE: 16.632654
[10000] MAE: 16.269339
[11000] MAE: 16.265683
Custom progressive validation score: 16.22956419628686
Differenza tra i risultati: 0.0


## Analisi delle differenze