In [None]:
import pandas as pd

In [None]:
test_df = pd.read_csv('test.csv')
test_df['Is_SuperBowl'] = np.where(test_df['Date'].isin(['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08']),1,0)
test_df['Is_LaborDay'] = np.where(test_df['Date'].isin(['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06']),1,0)
test_df['Is_Thanksgiving'] = np.where(test_df['Date'].isin(['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29']),1,0)
test_df['Is_Christmas'] = np.where(test_df['Date'].isin(['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']),1,0)

In [None]:
import mlflow

mlflow.artifacts.download_artifacts("mlflow-artifacts:/41361edef7c6479694d4921d6dd5803e/80248f20b4ff4c21b2b7fb0ba3989db9/artifacts/all_models.pkl")

In [None]:
loaded_models_path = "/tmp/tmppd3p4ti0/all_models.pkl"
with open(loaded_models_path, 'rb') as f:
    loaded_models = pickle.load(f)

In [None]:

def predict_test_data(test_df):
    """
    Predict sales for test data containing Store, Dept, Date columns
    Returns test_df with predicted sales added
    """
    test_df = test_df.copy()
    test_df['Date'] = pd.to_datetime(test_df['Date'])
    test_df['Predicted_Sales'] = np.nan

    print(f"Making predictions for {len(test_df)} test records...")

    training_end_dates = {}
    for model_key in loaded_models.keys():
        store, dept = int(model_key.split('_')[1]), int(model_key.split('_')[3])
        combo_data = df[(df['Store'] == store) & (df['Dept'] == dept)].copy()

        if len(combo_data) == 0:
            print(f"Warning: No training data found for Store {store}, Dept {dept}")
            continue

        combo_data['Date'] = pd.to_datetime(combo_data['Date'])
        combo_data = combo_data.sort_values('Date')

        train_size = int(len(combo_data) * TRAIN_RATIO)
        if train_size <= 0:
            train_size = 1
        if train_size > len(combo_data):
            train_size = len(combo_data)

        train_end_date = combo_data.iloc[train_size-1]['Date']
        training_end_dates[model_key] = train_end_date

    successful_predictions = 0
    failed_predictions = 0

    for idx, row in test_df.iterrows():
        store, dept, test_date = row['Store'], row['Dept'], row['Date']
        model_key = f"store_{store}_dept_{dept}"

        if model_key in loaded_models and model_key in training_end_dates:
            try:
                model = loaded_models[model_key]
                train_end_date = training_end_dates[model_key]

                days_ahead = (test_date - train_end_date).days

                if days_ahead <= 0:
                    forecast = model.forecast(steps=1)
                    test_df.loc[idx, 'Predicted_Sales'] = forecast[0]
                else:
                    forecast = model.forecast(steps=days_ahead)
                    test_df.loc[idx, 'Predicted_Sales'] = forecast[-1]

                successful_predictions += 1

            except Exception as e:
                print(f"Error predicting for Store {store}, Dept {dept}: {e}")
                test_df.loc[idx, 'Predicted_Sales'] = 0
                failed_predictions += 1
        else:
            test_df.loc[idx, 'Predicted_Sales'] = 0
            failed_predictions += 1

    print(f"Successful predictions: {successful_predictions}")
    print(f"Failed predictions: {failed_predictions}")

    return test_df

def predict_for_date(store, dept, target_date):
    """Predict sales for specific store-department on a specific date"""
    model_key = f"store_{store}_dept_{dept}"

    if model_key not in loaded_models:
        print(f"No model found for Store {store}, Dept {dept}")
        return None

    model = loaded_models[model_key]

    combo_data = df[(df['Store'] == store) & (df['Dept'] == dept)].copy()
    combo_data['Date'] = pd.to_datetime(combo_data['Date'])
    train_size = int(len(combo_data) * TRAIN_RATIO)
    train_end_date = combo_data.sort_values('Date').iloc[train_size-1]['Date']

    target_date = pd.to_datetime(target_date)
    days_ahead = (target_date - train_end_date).days

    if days_ahead <= 0:
        print(f"Target date {target_date} is before/same as training end date {train_end_date}")
        return None

    forecast = model.forecast(steps=days_ahead)
    return forecast[-1]

In [None]:
predictions_df = predict_test_data(test_df)

In [None]:
predictions_df["Id"] = predictions_df["Store"].astype(str) + "_" + predictions_df["Dept"].astype(str) + "_" + predictions_df["Date"].astype(str)

In [None]:
predictions_df.rename(columns={'Predicted_Sales':'Weekly_Sales'})[['Id','Weekly_Sales']].to_csv('predictions_kaggle.csv', index=False)