In [None]:
# src/model_training.py

import pandas as pd
import pickle
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

def train_model(data_path, model_path):
    df = pd.read_csv(data_path)

    # Feature Selection
    feature_cols = [
        'lag_1',
        'lag_7',
        'rolling_mean_7',
        'Price',
        'Discount',
        'inventory_level',
        'holiday',
        'competitor_price'
    ]

    # Add seasonality dummies
    feature_cols += [col for col in df.columns if col.startswith('Seasonality_')]

    X = df[feature_cols]
    y = df['sales']

    # Time-series split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )

    model = XGBRegressor(
        n_estimators=400,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        random_state=42
    )

    model.fit(X_train, y_train)

    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)

    print(f"ðŸ“‰ Mean Absolute Error (MAE): {mae:.2f}")

    with open(model_path, "wb") as f:
        pickle.dump(model, f)

    print("âœ… Model Training & Saving Completed")

if __name__ == "__main__":
    train_model(
        data_path=r"F:\Career Launchpad\sales_forecasting week-1\data\processed\sales_features.csv",
        model_path=r"F:\Career Launchpad\sales_forecasting week-1\model\sales_model.pkl"
    )


âœ… Loaded features_data.pkl from f:\Career Launchpad\sales_forecasting week-1\notebooks\sales_forecasting_week_1\model\features_data.pkl
Mean Squared Error: 5.81
R^2 Score: nan
âœ… Model trained and saved at f:\Career Launchpad\sales_forecasting week-1\notebooks\sales_forecasting_week_1\model\sales_model.pkl
