In [None]:
# Step 1: Import necessary libraries
import pandas as pd
from prophet import Prophet
import joblib
from pathlib import Path
import warnings

# Ignore warnings to keep the output clean
warnings.filterwarnings('ignore')

print("Libraries imported successfully.")

# --- Data Loading and Preparation ---
print("Loading datasets...")
# Define the project root relative to the notebook's location
PROJECT_ROOT = Path.cwd().parent 

try:
    # Load the datasets from the 'data' folder
    train_df = pd.read_csv(PROJECT_ROOT / 'data' / 'train.csv')
    features_df = pd.read_csv(PROJECT_ROOT / 'data' / 'features.csv')
    stores_df = pd.read_csv(PROJECT_ROOT / 'data' / 'stores.csv')

    # Merge the datasets into one master dataframe
    df = train_df.merge(features_df, on=['Store', 'Date', 'IsHoliday'], how='inner')
    df = df.merge(stores_df, on='Store', how='inner')
    print("Datasets loaded and merged.")

    # --- Data Cleaning and Feature Engineering ---
    # Convert 'Date' column to datetime objects for time-series analysis
    df['Date'] = pd.to_datetime(df['Date'])

    # For our first model, we will focus on a single store and department
    # This keeps training fast and simple for our MVP
    df_filtered = df[(df['Store'] == 1) & (df['Dept'] == 1)].copy()
    print(f"Filtered data for Store 1, Dept 1. We have {len(df_filtered)} data points.")

    # Prophet requires specific column names: 'ds' for the date and 'y' for the value
    df_prophet = df_filtered[['Date', 'Weekly_Sales']].rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'})

    # --- Model Training ---
    print("Training Prophet model... This may take a minute.")
    # Initialize and fit the model
    model = Prophet()
    model.fit(df_prophet)
    print("Model training complete.")

    # --- Save the Model ---
    # Define the path to save our trained model
    MODELS_DIR = PROJECT_ROOT / 'models'
    MODELS_DIR.mkdir(exist_ok=True) # Create the 'models' directory if it doesn't exist
    model_path = MODELS_DIR / 'prophet_model.pkl'

    # Use joblib to save the model object to a file for later use in our app
    joblib.dump(model, model_path)
    print(f"SUCCESS: Model has been saved to: {model_path}")

except FileNotFoundError as e:
    print(f"ERROR: Could not find a data file. {e}. Make sure the Kaggle CSV files are in the 'data' directory.")

: 