In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Step 1: Create a noisy dataset with 2 classes and 3 informative features
X, y = make_classification(
    n_samples=500,    # 500 samples
    n_features=4,     # 4 features
    n_classes=2,      # 2 classes
    n_informative=3,  # 3 informative features
    n_redundant=1,    # 1 redundant feature
    random_state=42,  # Set seed for reproducibility
    flip_y=0.1,       # Introduce some noise by flipping 10% of labels
)

# Step 2: Split into training and testing datasets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Initialize the model (Logistic Regression)
model = LogisticRegression(max_iter=10000)

# Step 4: Variables to store results
batch_accuracies = []
sample_sizes = []
improvement = []

# Step 5: Train the model on progressively larger batches of data
batch_size = len(X_train) // 5  # Split training set into 5 parts

for i in range(1, 6):
    # Training on the first i batches of data
    X_train_batch = X_train[:i * batch_size]
    y_train_batch = y_train[:i * batch_size]
    
    model.fit(X_train_batch, y_train_batch)  # Train the model
    
    # Predict on the test set and calculate accuracy
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    batch_accuracies.append(acc)
    sample_sizes.append(i * batch_size)

    # Track accuracy improvement
    if i > 1:
        improvement.append(batch_accuracies[i-1] - batch_accuracies[i-2])
    else:
        improvement.append('N/A')  # No improvement for the first batch

# Step 6: Calculate Adaptation Efficiency Score (AES)
AES = []
for i in range(1, len(batch_accuracies)):
    if improvement[i] != 'N/A':
        improvement_rate = improvement[i] / max(sample_sizes[i], 1)  # Avoid divide by zero
        AES.append(improvement_rate)  # Adjust improvement per sample seen
    else:
        AES.append('N/A')

# Step 7: Ensure that all lists have the same length for DataFrame
AES = ['N/A'] + AES  # Add 'N/A' for the first batch

# Step 8: Calculate the Final Accuracy (Traditional Metric)
final_accuracy = batch_accuracies[-1]

# Show the results in a clean format
results_df = pd.DataFrame({
    'Batch': range(1, 6),
    'Accuracy': batch_accuracies,
    'Improvement': improvement,
    'Samples Seen': sample_sizes,
    'AES': AES
})

# Final output of the model's performance
print(f"Adaptation Efficiency Score (AES): {np.mean([a for a in AES if a != 'N/A']):.4f}")
print(f"Final Test Accuracy (Traditional Metric): {final_accuracy:.4f}")

# Display the result as a table
print("\nAccuracy, Improvement, and AES per Batch:")
print(results_df)

# Plot the learning curve for better understanding
plt.figure(figsize=(10, 5))

# Plot Accuracy vs Samples Seen
plt.subplot(1, 2, 1)
plt.plot(range(1, 6), batch_accuracies, marker='o', color='b', label="Accuracy")
plt.xlabel('Batch number')
plt.ylabel('Accuracy')
plt.title('Model Performance - Accuracy')
plt.xticks(range(1, 6))
plt.grid(True)
plt.legend()

# Plot AES vs Samples Seen
plt.subplot(1, 2, 2)
# Convert AES to numeric and ignore 'N/A'
AES_numeric = [a if a != 'N/A' else np.nan for a in AES]
plt.plot(range(1, 6), AES_numeric, marker='o', color='r', label="AES")
plt.xlabel('Batch number')
plt.ylabel('Adaptation Efficiency Score (AES)')
plt.title('Model Adaptation Efficiency')
plt.xticks(range(1, 6))
plt.grid(True)
plt.legend()

# Show both plots
plt.tight_layout()
plt.show()

# Interpretation of AES
if np.mean([a for a in AES if a != 'N/A']) > 0.02:
    print("The model shows efficient adaptation to new data.")
else:
    print("The model shows slow adaptation to new data.")
