In [None]:
import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesPredictor
import os

# Dictionary mapping each cluster to a list of model directories (one model per time series)
cluster_models = {
    'cluster_1': ['models/cluster_1_model1', 'models/cluster_1_model2'],
    'cluster_2': ['models/cluster_2_model1', 'models/cluster_2_model2'],
    # add additional clusters and models as needed
}

# File paths (update these as appropriate for your environment)
ACTUALS_PATH = 'data/weekly_actuals.csv'        # CSV with columns: cluster_id, timestamp, value
PREDICTIONS_PATH = 'data/weekly_predictions.csv'  # CSV with columns: cluster_id, timestamp, value, model_name (optional)
ALL_DATA_PATH = 'data/all_cluster_data.csv'       # Historical data used for fine-tuning

# Define the error threshold (e.g., if MAPE > 10% fine-tune the model)
ERROR_THRESHOLD = 0.10

def calculate_mape(actual, predicted):
    """Calculate the Mean Absolute Percentage Error (MAPE)."""
    actual = np.where(actual == 0, 1e-8, actual)
    return np.mean(np.abs((actual - predicted) / actual))

def load_data(path):
    """Load CSV file data into a DataFrame."""
    return pd.read_csv(path)

def fine_tune_model(model_dir, training_data):
    """
    Fine tune the AutoGluon TimeSeries model saved in model_dir.
    Training_data should include the new week’s actual values along with historical data.
    """
    # Load the pre-trained or previously fine tuned model
    predictor = TimeSeriesPredictor.load(model_dir)
    
    # Fine tune using new data with fine_tune flag enabled.
    predictor.fit(
        training_data,
        hyperparameters={"Chronos": {"fine_tune": True}},
        time_limit=600  # Adjust as necessary
    )
    
    # Save the updated model
    predictor.save(model_dir)
    return predictor

def process_model(cluster_id, model_dir, actual_df, pred_df, all_data_df):
    """
    Process a single model:
      - Compute error by comparing last week's prediction with actual values.
      - Fine tune if error exceeds the threshold.
    """
    # Filter the actual and prediction data for the given cluster
    cluster_actual = actual_df[actual_df['cluster_id'] == cluster_id].copy()
    cluster_pred = pred_df[ (pred_df['cluster_id'] == cluster_id) & (pred_df['model_dir'] == model_dir) ].copy()
    
    # Optionally, sort by timestamp to ensure alignment (assumes same order)
    cluster_actual.sort_values('timestamp', inplace=True)
    cluster_pred.sort_values('timestamp', inplace=True)
    
    # Calculate error metric (MAPE)
    error = calculate_mape(cluster_actual['value'].values, cluster_pred['value'].values)
    print(f"Model {model_dir} for cluster {cluster_id} MAPE: {error:.2%}")
    
    # Only fine tune if the error exceeds the threshold
    if error > ERROR_THRESHOLD:
        print(f"Error exceeds threshold for model {model_dir} in cluster {cluster_id}. Fine tuning...")
        # Gather training data for this cluster
        cluster_training_data = all_data_df[all_data_df['cluster_id'] == cluster_id]
        # Fine tune the model
        fine_tuned_model = fine_tune_model(model_dir, cluster_training_data)
        print(f"Model {model_dir} for cluster {cluster_id} has been fine tuned and updated.")
    else:
        print(f"Model {model_dir} for cluster {cluster_id} is within acceptable error limits.")

def main():
    # Load actuals, predictions, and historical (all) data from CSV files
    actual_df = load_data(ACTUALS_PATH)
    pred_df = load_data(PREDICTIONS_PATH)
    all_data_df = load_data(ALL_DATA_PATH)
    
    # Optionally, add a 'model_dir' column in the predictions CSV if not already present.
    # This column should indicate which model produced the prediction.
    # For example, you might have saved the model identifier during prediction.
    
    # Loop over each cluster and each model in that cluster
    for cluster_id, models in cluster_models.items():
        for model_dir in models:
            process_model(cluster_id, model_dir, actual_df, pred_df, all_data_df)

if __name__ == '__main__':
    main()
