In [1]:
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from frouros.detectors.concept_drift import DDM, DDMConfig
from frouros.metrics import PrequentialError

In [2]:

# Set a random seed for reproducibility
np.random.seed(seed=31)

# Load the Iris dataset
X, y = load_iris(return_X_y=True)

# Split the dataset into training (70%) and testing (30%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=31)


In [3]:

# Define a machine learning pipeline
# This pipeline first standardizes the data (StandardScaler) and then applies a logistic regression model
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LogisticRegression()),
])

# Fit the model to the training data
pipeline.fit(X=X_train, y=y_train)


In [4]:

# Define the configuration for the drift detector
# The warning level is set to 2.0, the drift level to 3.0, and the minimum number of instances before checking for concept drift is 25
config = DDMConfig(
    warning_level=2.0,
    drift_level=3.0,
    min_num_instances=25,
)

# Instantiate the drift detector with the defined configuration
detector = DDM(config=config)

# Define a metric to compute accuracy
# Here, we use the PrequentialError metric with alpha=1.0, which is equivalent to normal accuracy
metric = PrequentialError(alpha=1.0)

### Testing of the DDM Detector On Power Supply Data

In [5]:
from scipy.io import arff
from skmultiflow.drift_detection import DDM
from sklearn.metrics import accuracy_score
import pandas as pd

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


TypeError: argument of type 'builtin_function_or_method' is not iterable

In [None]:

# Load the .arff file
data = arff.loadarff('/Users/hirushau/Code/Model-Monitoring/datasets/real_datasets/powersupply.arff')
df = pd.DataFrame(data[0])

# Convert class labels to integers
df['class'] = df['class'].str.decode('utf-8').astype(int)


In [None]:
# Initialize the drift detector
drift_detector = DDM()


In [None]:

# Prepare variables
y_true = []
y_pred = []

# Process the stream
for i in range(len(df)):
    y = df.iloc[i]['class']
    
    # In this example, we assume that drift is related to the output variable
    # So we use the output variable (y) as input for the drift detector
    drift_detector.add_element(y)
    
    # Check if the drift detector indicates a drift
    y_hat = int(drift_detector.detected_change())
    
    y_true.append(y)
    y_pred.append(y_hat)
    


  self.miss_std = np.sqrt(self.miss_prob * (1 - self.miss_prob) / float(self.sample_count))


In [None]:
# Calculate detection accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Detection Accuracy: {accuracy}')


Detection Accuracy: 0.041666666666666664
