In [2]:
import pandas as pd
import pickle
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings


warnings.filterwarnings("ignore")

def clean_and_prepare_data(data_filepath, lookup_filepath):
    """Loads, cleans, and categorizes the raw dealership data."""
    df_raw = pd.read_csv(data_filepath)
    df_lookup = pd.read_csv(lookup_filepath)
    df_categorized = pd.merge(df_raw, df_lookup, on='english_name', how='left')
    df_categorized.dropna(subset=['year', 'month'], inplace=True)
    df_categorized['date'] = pd.to_datetime(df_categorized['year'].astype(int).astype(str) + '-' + df_categorized['month'].astype(int).astype(str))
    sales_errors_mask = (df_categorized['category'] == 'Currency:Revenue/Sales') & (df_categorized['monthly_value'] < 0)
    df_categorized.loc[sales_errors_mask, 'monthly_value'] = 0
    return df_categorized

def main():
    """
    Trains forecasting models for the main financial categories and saves them.
    """
    print("Starting financial model training process...")
    
    # --- 1. SETUP ---
    training_data_file = 'FS-data-80475.csv'
    lookup_file = 'categorized_kpis.csv'
    output_model_file = 'trained_financial_models.pkl'

    # --- 2. PREPARE DATA ---
    df_full = clean_and_prepare_data(training_data_file, lookup_file)
    
    # --- 3. TRAIN MODELS FOR FINANCIAL CATEGORIES ---
    trained_models = {}
    
    financial_categories = ['Currency:Revenue/Sales', 'Currency:Expense', 'Currency:Payroll/Compensation', 'Currency:Warranty']
    df_financials = df_full[df_full['category'].isin(financial_categories)]
    df_aggregated = df_financials.groupby(['date', 'category'])['monthly_value'].sum().reset_index()

    for category in financial_categories:
        print(f"Training model for {category}...")
        ts_data = df_aggregated[df_aggregated['category'] == category].set_index('date')['monthly_value']
        
        if len(ts_data) >= 24:
            model = ExponentialSmoothing(ts_data, trend='add', seasonal='add', seasonal_periods=12).fit()
        else:
            model = ExponentialSmoothing(ts_data, trend='add').fit()
        trained_models[category] = model

    # --- 4. SAVE TRAINED MODELS ---
    with open(output_model_file, 'wb') as f:
        pickle.dump(trained_models, f)
        
    print(f"\nTraining complete. Financial models have been saved to '{output_model_file}'.")

if __name__ == '__main__':
    main()


Starting financial model training process...
Training model for Currency:Revenue/Sales...
Training model for Currency:Expense...
Training model for Currency:Payroll/Compensation...
Training model for Currency:Warranty...

Training complete. Financial models have been saved to 'trained_financial_models.pkl'.
