In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split data into initial train and test sets, and then simulate new test sets over time
X_train, X_initial_test, y_train, y_initial_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the classifier
model = RandomForestClassifier(random_state=42)

# Train the model on the initial training data
model.fit(X_train, y_train)

# Simulate "new data over time" by splitting the initial test set into multiple smaller batches
test_batches = np.array_split(X_initial_test, 5)
label_batches = np.array_split(y_initial_test, 5)

# Track accuracy for each batch to calculate stability
accuracies = []

for i, (X_test_batch, y_test_batch) in enumerate(zip(test_batches, label_batches)):
    # Predict on each batch to simulate performance over time
    y_pred_batch = model.predict(X_test_batch)
    accuracy = accuracy_score(y_test_batch, y_pred_batch)
    accuracies.append(accuracy)
    print(f"Accuracy for batch {i+1}: {accuracy:.2f}")

# Calculate the Learning Stability Index (LSI) as the variance of accuracies across batches
lsi = np.var(accuracies)
print(f"\nLearning Stability Index (LSI): {lsi:.4f}")

# Display batch results and variance in a table for clarity
results_df = pd.DataFrame({
    'Batch': np.arange(1, len(accuracies) + 1),
    'Accuracy': accuracies
})
print("\nBatch-wise Accuracy Results:\n", results_df)

# Explanation
print("\nExplanation of Learning Stability Index (LSI):")
print("The LSI measures how consistent the model's performance is across different data batches. Lower variance in accuracy across batches indicates higher stability in learning.")
