In [None]:
import pandas as pd
import pyodbc
import numpy as np
import joblib
import warnings
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error

warnings.filterwarnings("ignore")

# ---------------------------
# Load Data from SQL Server
# ---------------------------
def load_growth_data():
    df=pd.read_csv("")
    df = pd.read_sql(query, conn)
    conn.close()
    
    # Create Date column
    df["Date"] = pd.to_datetime(df["yr"].astype(str) + "-" + df["mn"].astype(str) + "-01")
    df = df.sort_values("Date")
    
    # Aggregate (server+db usage % over time)
    group = df.groupby("Date")["per"].mean().reset_index()
    group = group.set_index("Date").asfreq("MS")  # monthly series
    group["per"] = group["per"].fillna(method="ffill")
    
    return group


# ---------------------------
# Model Selection (ARIMA)
# ---------------------------
def select_best_arima(series, p_range=(0, 5), d_range=(0, 3), q_range=(0, 5)):
    best_aic = float("inf")
    best_order = None
    best_model = None
    
    for p in range(*p_range):
        for d in range(*d_range):
            for q in range(*q_range):
                try:
                    model = ARIMA(series, order=(p, d, q)).fit()
                    if model.aic < best_aic:
                        best_aic = model.aic
                        best_order = (p, d, q)
                        best_model = model
                except:
                    continue
    
    return best_model, best_order, best_aic


# ---------------------------
# Save Only Model Params + Metadata
# ---------------------------
def save_model_metadata(model, order, aic, mse, mae, filename="best_model_meta.pkl"):
    metadata = {
        "order": order,
        "aic": aic,
        "mse": mse,
        "mae": mae,
        "params": model.params.to_dict()  # save only coefficients
    }
    joblib.dump(metadata, filename)
    print(f"Model metadata saved as {filename}")


# ---------------------------
# Main Execution
# ---------------------------
if __name__ == "__main__":
    # Load data
    df = load_growth_data()
    series = df["per"]

    # Split Train/Test (last 12 months as test)
    train_size = int(len(series) * 0.8)
    train, test = series.iloc[:train_size], series.iloc[train_size:]
    
    # Run ARIMA selection
    best_model, best_order, best_aic = select_best_arima(
        train, p_range=(0, 6), d_range=(0, 3), q_range=(0, 6)
    )
    print(f"Best ARIMA order={best_order}, AIC={best_aic}")
    
    # Forecast on test set
    forecast = best_model.forecast(steps=len(test))
    mse = mean_squared_error(test, forecast)
    mae = mean_absolute_error(test, forecast)
    print(f"Test MSE={mse}, MAE={mae}")
    
    # Save only metadata (not full training data)
    save_model_metadata(best_model, best_order, best_aic, mse, mae, "best_arima_meta.pkl")
