In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime
np.random.seed(42)

from sktime.regression.kernel_based import RocketRegressor
import pickle

from apply_rocket import apply_rocket
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

In [None]:
def run_rocket_experiments_incremental(train_dict_path, val_dict_path):
    """Run experiments and save results incrementally"""
    
    positions = ['FWD', 'MID', 'DEF', 'GK']
    rocket_models = ['rocket','minirocket']
    
    # Create results directory
    results_dir = f"../outputs/rocket_experiments/{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    os.makedirs(results_dir, exist_ok=True)
    
    # Initialize or load existing results DataFrame
    results_file = f'{results_dir}/rocket_results_summary.csv'
    results_df = pd.DataFrame()
    print("Starting fresh experiments")
    
    # Load data once  
    with open(train_dict_path, 'rb') as f:
        train_dict = pickle.load(f)
    with open(val_dict_path, 'rb') as f:
        val_dict = pickle.load(f)
    
    for model in rocket_models:
        print(f"\n=== Running {model.upper()} ===")
        
        for position in positions:
        
            print(f"  Processing {position}...")
            
            try:
                # Run the experiment
                y_pred, y_val = apply_rocket(train_dict, val_dict, 
                                           position=position, 
                                           rocket_model=model)
                
                mae = mean_absolute_error(y_val, y_pred)
                rmse = root_mean_squared_error(y_val, y_pred)
                
                # Create new result entry
                new_result = pd.DataFrame([{
                    'train_dict_path': train_dict_path,
                    'val_dict_path': val_dict_path,
                    'model': model,
                    'position': position,
                    'mae': mae,
                    'rmse': rmse,
                    'n_samples': len(y_val),
                    'timestamp': datetime.now().isoformat(),
                    'status': 'completed'
                }])
                
                # Add to results DataFrame
                results_df = pd.concat([results_df, new_result], ignore_index=True)
                
                # Save detailed predictions separately
                pred_file = f'{results_dir}/predictions_{model}_{position}.pkl'
                with open(pred_file, 'wb') as f:
                    pickle.dump({
                        'predictions': y_pred, 
                        'actual': y_val,
                        'model': model,
                        'position': position,
                        'train_dict_path': train_dict_path,
                        'val_dict_path': val_dict_path,                        
                        'timestamp': datetime.now().isoformat()
                    }, f)
                
                # Save updated DataFrame immediately
                results_df.to_csv(results_file, index=False)
                results_df.to_pickle(f'{results_dir}/rocket_results_summary.pkl')
                
                print(f"    RMSE: {rmse:.3f}, MAE: {mae:.3f} - SAVED")
                
            except Exception as e:
                print(f"    Error: {e}")
                
                # Save error result
                error_result = pd.DataFrame([{
                    'model': model,
                    'position': position,
                    'mae': None,
                    'rmse': None,
                    'n_samples': None,
                    'timestamp': datetime.now().isoformat(),
                    'status': 'error',
                    'error_message': str(e)
                }])
                
                results_df = pd.concat([results_df, error_result], ignore_index=True)
                
                # Save DataFrame even with error
                results_df.to_csv(results_file, index=False)
                results_df.to_pickle(f'{results_dir}/rocket_results_summary.pkl')
    
    return results_df

In [None]:
# Run experiments
# training_dictionary_file = '../datasets/training_dictionary.pkl'
# validation_dictionary_file = '../datasets/validation_dictionary.pkl'
training_dictionary_file = '../datasets/training_dictionary_20_weeks.pkl'
validation_dictionary_file = '../datasets/validation_dictionary_20_weeks.pkl'

results_df = run_rocket_experiments_incremental(training_dictionary_file, validation_dictionary_file)

# Display final summary
print("\n" + "="*50)
print("FINAL RESULTS SUMMARY")
print("="*50)

completed_results = results_df[results_df['status'] == 'completed']
if len(completed_results) > 0:
    print("\nRMSE by Position and Model:")
    rmse_pivot = completed_results.pivot(index='position', columns='model', values='rmse')
    print(rmse_pivot.round(3))
    
    print("\nMAE by Position and Model:")
    mae_pivot = completed_results.pivot(index='position', columns='model', values='mae')
    print(mae_pivot.round(3))
else:
    print("No completed results found")

print(f"\nResults saved to: ../outputs/rocket_experiments/")
print(f"Summary file: rocket_results_summary.csv")

In [None]:
# Load the saved results for further analysis
results_path = '../outputs/rocket_experiments/20250612_120426'

predictions_file = f'{results_path}/predictions_rocket_FWD.pkl'
with open(predictions_file, 'rb') as f:
    predictions_data = pickle.load(f)
print("\nLoaded predictions data:")

# Turn predictions into DataFrame for easier handling
predictions_df = pd.DataFrame({
    'predictions': predictions_data['predictions'],
    'actual': predictions_data['actual'],
    'model': predictions_data['model'],
    'position': predictions_data['position'],
    'timestamp': predictions_data['timestamp']
})

# Load the summary DataFrame
summary_file = f'{results_path}/rocket_results_summary.csv'
summary_df = pd.read_csv(summary_file)