In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.linear_model import PassiveAggressiveClassifier, SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from skmultiflow.trees import HoeffdingTree


In [3]:

# Generate synthetic data for training
num_samples = 1000
X_train_synthetic = np.random.randn(num_samples, 10)
y_train_synthetic = np.random.randint(0, 2, num_samples)

# Initialize classifiers
pac = PassiveAggressiveClassifier(random_state=42)
sgd = SGDClassifier(random_state=42)
gb = GradientBoostingClassifier(random_state=42)
ht = HoeffdingTree()

classifiers = [pac, sgd, gb, ht]
classifier_names = ['PAC', 'SDG', 'GradientBoosting', 'HoeffdingTree']


The old name will be removed in v0.7.0


In [4]:

# Track accuracy during training
accuracies = {name: [] for name in classifier_names}
ensemble_accuracies = []

# Train classifiers incrementally and track accuracy
for i in range(num_samples):
    for clf, name in zip(classifiers, classifier_names):
        clf.partial_fit(X_train_synthetic[i:i+1], y_train_synthetic[i:i+1], classes=np.unique(y_train_synthetic))
        y_pred = clf.predict(X_train_synthetic[:i+1])
        acc = accuracy_score(y_train_synthetic[:i+1], y_pred)
        accuracies[name].append(acc)
    
    # Ensemble prediction
    ensemble_pred = np.mean([clf.predict(X_train_synthetic[i:i+1]) for clf in classifiers])
    ensemble_acc = accuracy_score(y_train_synthetic[:i+1], (ensemble_pred >= 0.5).astype(int))
    ensemble_accuracies.append(ensemble_acc)


AttributeError: 'GradientBoostingClassifier' object has no attribute 'partial_fit'

In [None]:

# Plot learning curves
plt.figure(figsize=(10, 6))
for name in classifier_names:
    plt.plot(np.arange(num_samples), accuracies[name], label=name)

plt.plot(np.arange(num_samples), ensemble_accuracies, label='Ensemble', linestyle='--', color='black')

# Plot target accuracy points
plt.axhline(y=0.9935, color='r', linestyle='-.', label='PAC Target')
plt.axhline(y=0.9778, color='g', linestyle='-.', label='SDG Target')
plt.axhline(y=0.9918, color='b', linestyle='-.', label='Hoeffding Target')
plt.axhline(y=0.9953, color='m', linestyle='-.', label='Ensemble Target')

plt.title('Learning Curve: Accuracy vs. Training Samples')
plt.xlabel('Number of Training Samples')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()