# **Friedman synthetic dataset with concept drifts.**

### Import libraries

In [2]:
import river
from river import datasets
from river import linear_model, tree
from river import drift, metrics
from river.datasets import synth
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from frouros.detectors.concept_drift.streaming.window_based.adwin import ADWIN
from frouros.detectors.concept_drift.streaming.statistical_process_control.ddm import DDM
from frouros.detectors.concept_drift.streaming.statistical_process_control.eddm import EDDM
from frouros.detectors.concept_drift.streaming.change_detection.page_hinkley import PageHinkley


### Define the datasets


##### **'lea': Local Expanding Abrupt drift.**
The concept drift appears in two distinct regions of the instance space, while the remaining regions are left unaltered. There are three points of abrupt change in the training dataset. At every consecutive change the regions of drift are expanded.

In [3]:
dataset = synth.FriedmanDrift(
    drift_type='lea',
    position=(1, 2, 3),
    seed=42
)
     

In [4]:
# Print the first 5 samples from the dataset
for x, y in dataset.take(5):
    print(list(x.values()), y)

[0.6394267984578837, 0.025010755222666936, 0.27502931836911926, 0.22321073814882275, 0.7364712141640124, 0.6766994874229113, 0.8921795677048454, 0.08693883262941615, 0.4219218196852704, 0.029797219438070344] 7.6612066799391085
[0.026535969683863625, 0.1988376506866485, 0.6498844377795232, 0.5449414806032167, 0.2204406220406967, 0.5892656838759087, 0.8094304566778266, 0.006498759678061017, 0.8058192518328079, 0.6981393949882269] 8.330237325619887
[0.3402505165179919, 0.15547949981178155, 0.9572130722067812, 0.33659454511262676, 0.09274584338014791, 0.09671637683346401, 0.8474943663474598, 0.6037260313668911, 0.8071282732743802, 0.7297317866938179] 7.044929465589236
[0.3785343772083535, 0.552040631273227, 0.8294046642529949, 0.6185197523642461, 0.8617069003107772, 0.577352145256762, 0.7045718362149235, 0.045824383655662215, 0.22789827565154686, 0.28938796360210717] 18.160369621840488
[0.0797919769236275, 0.23279088636103018, 0.10100142940972912, 0.2779736031100921, 0.6356844442644002, 0.

In [5]:
# Initialize the data containers
data = []
for i, (x, y) in enumerate(dataset):
    x_values = list(x.values())
    data.append(x_values + [y])
    if i >= 5000:  # Limiting to 5000 samples for simplicity
        break

In [6]:
# Define the column names
column_names = [f'x{i}' for i in range(1, len(x_values) + 1)] + ['y']


### Convert to DataFrame

In [7]:
# Create the DataFrame
df = pd.DataFrame(data, columns=column_names)

In [8]:
# Split the DataFrame into train and test sets
df_train = df.iloc[:2500]
df_test = df.iloc[2500:]

print("Train DataFrame:")
print(df_train.head())

print("\nTest DataFrame:")
print(df_test.head())
     

Train DataFrame:
         x1        x2        x3        x4        x5        x6        x7  \
0  0.639427  0.025011  0.275029  0.223211  0.736471  0.676699  0.892180   
1  0.026536  0.198838  0.649884  0.544941  0.220441  0.589266  0.809430   
2  0.340251  0.155479  0.957213  0.336595  0.092746  0.096716  0.847494   
3  0.378534  0.552041  0.829405  0.618520  0.861707  0.577352  0.704572   
4  0.079792  0.232791  0.101001  0.277974  0.635684  0.364832  0.370181   

         x8        x9       x10          y  
0  0.086939  0.421922  0.029797   7.661207  
1  0.006499  0.805819  0.698139   8.330237  
2  0.603726  0.807128  0.729732   7.044929  
3  0.045824  0.227898  0.289388  18.160370  
4  0.209507  0.266978  0.936655  -2.655357  

Test DataFrame:
            x1        x2        x3        x4        x5        x6        x7  \
2500  0.364768  0.555611  0.730283  0.160368  0.207994  0.796483  0.543040   
2501  0.578037  0.166128  0.252746  0.637494  0.139315  0.466338  0.961549   
2502  0.887

### Train-Test Split

In [11]:
# Split features and target
X_train = df_train.drop(columns='y').values
y_train = df_train['y'].values
X_test = df_test.drop(columns='y').values
y_test = df_test['y'].values



### Define machine learning algorithms

In [12]:
model = SVR()

### Train the Model

In [13]:
model.fit(X_train, y_train)

### Evaluate the Model

In [14]:
y_pred = model.predict(X_test)
# Calculate the initial error
initial_mse = mean_squared_error(y_test, y_pred)

In [15]:
print("Initial Mean Squared Error:", initial_mse)

Initial Mean Squared Error: 10.684625230622444


**MSE indicates the average squared difference between the model's predictions and the actual target values.
R² Score of indicates how much of the variation of a dependent variable is explained by an independent variable in a regression model.**

### Define drift detection algorithms

In [16]:
adwin = ADWIN()

In [17]:
ddm = DDM()

In [18]:
eddm = EDDM()

In [19]:
ph = PageHinkley()

### Function to check for drift 

In [22]:
def check_drift(y_true, y_pred, dd):
    drift_detected = False
    for yt, yp in zip(y_true, y_pred):
        error = (yt - yp) ** 2
        dd.update(error)
        if dd.drift:
            drift_detected = True
            break
    return drift_detected

### Check drift for ADWIN

In [23]:
# Check for drift
drift_detected = check_drift(y_test, y_pred, adwin)

print(f"Initial MSE: {initial_mse}")
print(f"Drift Detected: {drift_detected}")

if drift_detected:
    print("Concept drift detected.")
else:
    print("No concept drift detected.")

Initial MSE: 10.684625230622444
Drift Detected: False
No concept drift detected.


### Check drift for DDM

In [24]:
# Check for drift
drift_detected = check_drift(y_test, y_pred, ddm)

print(f"Initial MSE: {initial_mse}")
print(f"Drift Detected: {drift_detected}")

if drift_detected:
    print("Concept drift detected.")
else:
    print("No concept drift detected.")

Initial MSE: 10.684625230622444
Drift Detected: False
No concept drift detected.


  std = np.sqrt(


### Check drift for EDDM

In [26]:
# Check for drift
drift_detected = check_drift(y_test, y_pred, eddm)

print(f"Initial MSE: {initial_mse}")
print(f"Drift Detected: {drift_detected}")

if drift_detected:
    print("Concept drift detected.")
else:
    print("No concept drift detected.")

Initial MSE: 10.684625230622444
Drift Detected: False
No concept drift detected.


### Check drift for PageHinkley

In [27]:
# Check for drift
drift_detected = check_drift(y_test, y_pred, ph)

print(f"Initial MSE: {initial_mse}")
print(f"Drift Detected: {drift_detected}")

if drift_detected:
    print("Concept drift detected.")
else:
    print("No concept drift detected.")

Initial MSE: 10.684625230622444
Drift Detected: True
Concept drift detected.
