# Test-Time Scaling Experiments

This notebook implements and evaluates different test-time scaling strategies:
1. Standard scaling (baseline)
2. Quantile transformation
3. Robust scaling
4. Test-time Z-score
5. MinMax scaling

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
from utils.preprocessing import ScalingManager
import joblib
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Load Data and Model

In [None]:
# Load the data
X_train = np.load('../data/processed/X_train.npy')
X_test = np.load('../data/processed/X_test.npy')
y_train = np.load('../data/processed/y_train.npy')
y_test = np.load('../data/processed/y_test.npy')

# Load the trained model
model = joblib.load('../data/processed/baseline_model.pkl')

## 2. Initialize Scaling Manager

In [None]:
scaling_manager = ScalingManager()

# Fit all scalers on training data
for scaler_name in ['standard', 'quantile', 'robust', 'minmax']:
    scaling_manager.fit_scaler(X_train, scaler_name)

## 3. Evaluate Different Scaling Methods

In [None]:
def evaluate_scaling(X_test_scaled, name):
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nResults for {name} scaling:")
    print(f"Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    return accuracy

results = {}

# Evaluate each scaling method
for scaler_name in ['standard', 'quantile', 'robust', 'minmax']:
    X_test_scaled = scaling_manager.transform(X_test, scaler_name)
    results[scaler_name] = evaluate_scaling(X_test_scaled, scaler_name)

# Evaluate test-time z-score
X_test_zscore = scaling_manager.test_time_zscore(X_test)
results['test_time_zscore'] = evaluate_scaling(X_test_zscore, 'test-time z-score')

## 4. Visualize Results

In [None]:
# Plot accuracy comparison
plt.figure(figsize=(10, 6))
methods = list(results.keys())
accuracies = list(results.values())

plt.bar(methods, accuracies)
plt.title('Accuracy Comparison of Different Scaling Methods')
plt.xlabel('Scaling Method')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 5. Distribution Analysis

In [None]:
def plot_feature_distributions(feature_idx=0):
    plt.figure(figsize=(15, 5))
    
    # Original distribution
    plt.subplot(1, 3, 1)
    sns.histplot(X_test[:, feature_idx], kde=True)
    plt.title('Original Distribution')
    
    # Standard scaling
    plt.subplot(1, 3, 2)
    X_test_standard = scaling_manager.transform(X_test, 'standard')
    sns.histplot(X_test_standard[:, feature_idx], kde=True)
    plt.title('Standard Scaling')
    
    # Test-time z-score
    plt.subplot(1, 3, 3)
    sns.histplot(X_test_zscore[:, feature_idx], kde=True)
    plt.title('Test-time Z-score')
    
    plt.tight_layout()
    plt.show()

# Plot distributions for first feature
plot_feature_distributions(0)