In [None]:
# RETAIL SALES FORECASTING: A COMPREHENSIVE CASE STUDY
# ===================================================
# This case study demonstrates a complete machine learning pipeline for forecasting
# daily sales for a retail chain across multiple stores and product categories.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as XGBRegressor
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
import joblib
from datetime import datetime, timedelta

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Set seed for reproducibility
np.random.seed(42)

# 1. DATA GENERATION
# -----------------
# In a real case study, you would load your data from files/database
# For this example, we'll generate realistic retail sales data

def generate_retail_sales_data(start_date='2020-01-01', periods=730):
    """Generate synthetic retail sales data with multiple stores and categories."""
    dates = pd.date_range(start=start_date, periods=periods)
    stores = ['Store_' + str(i) for i in range(1, 6)]  # 5 stores
    categories = ['Electronics', 'Clothing', 'Groceries', 'Home', 'Beauty']

    # Create empty dataframe
    data = []

    for store in stores:
        for category in categories:
            # Base level varies by store and category
            base_level = np.random.randint(5000, 15000)

            # Category-specific trend
            if category == 'Electronics':
                trend_factor = 20  # Growing
            elif category == 'Clothing':
                trend_factor = 10  # Moderate growth
            elif category == 'Groceries':
                trend_factor = 5   # Stable growth
            else:
                trend_factor = 0   # Neutral

            # Store-specific performance
            if 'Store_1' in store:
                store_factor = 1.2  # Best performing
            elif 'Store_5' in store:
                store_factor = 0.8  # Worst performing
            else:
                store_factor = 1.0  # Average

            # Generate daily sales with various patterns
            for i, date in enumerate(dates):
                # Base value
                sales = base_level * store_factor

                # Add trend component
                sales += i * trend_factor

                # Add weekly seasonality (weekends have higher sales)
                if date.dayofweek >= 5:  # Weekend
                    sales *= 1.3

                # Add monthly seasonality (higher sales at month start)
                if date.day <= 5:
                    sales *= 1.1

                # Add quarterly seasonality (Q4 has higher sales - holiday season)
                if date.quarter == 4:
                    sales *= 1.4

                # Add yearly seasonality
                if date.month in [11, 12]:  # Holiday season
                    sales *= 1.5
                elif date.month in [1, 2]:  # Post-holiday slump
                    sales *= 0.8

                # Add promotions (random 20% boost on some days)
                promo = np.random.choice([0, 1], p=[0.9, 0.1])
                if promo:
                    sales *= 1.2

                # Add weather effect (sales drop on "rainy" days)
                weather_bad = np.random.choice([0, 1], p=[0.85, 0.15])
                if weather_bad:
                    sales *= 0.9

                # Add noise
                sales *= np.random.normal(1, 0.05)

                # Record the data
                data.append({
                    'date': date,
                    'store': store,
                    'category': category,
                    'sales': max(0, round(sales, 2)),
                    'promo': promo,
                    'bad_weather': weather_bad
                })

    df = pd.DataFrame(data)

    # Add some store-specific attributes
    store_attributes = pd.DataFrame({
        'store': stores,
        'size_sqm': [5000, 3500, 7000, 4500, 2500],
        'years_open': [10, 5, 7, 3, 2],
        'city': ['New York', 'Chicago', 'Los Angeles', 'Houston', 'Phoenix']
    })

    return df, store_attributes

# Generate our dataset
sales_df, store_info = generate_retail_sales_data(periods=730)  # 2 years of data

print("Sample of generated sales data:")
print(sales_df.head())
print("\nStore information:")
print(store_info)

# 2. EXPLORATORY DATA ANALYSIS
# ---------------------------

print("\n== EXPLORATORY DATA ANALYSIS ==")

# Basic statistics
print("\nBasic statistics:")
print(sales_df.describe())

# Check for missing values
print("\nMissing values:")
print(sales_df.isnull().sum())

# Merge store information
sales_df = sales_df.merge(store_info, on='store', how='left')

# Analyze sales by store
store_sales = sales_df.groupby('store')['sales'].agg(['mean', 'median', 'std', 'sum'])
print("\nSales by store:")
print(store_sales)

# Analyze sales by category
category_sales = sales_df.groupby('category')['sales'].agg(['mean', 'median', 'std', 'sum'])
print("\nSales by category:")
print(category_sales)

# Time-based analysis
sales_df['year'] = sales_df['date'].dt.year
sales_df['month'] = sales_df['date'].dt.month
sales_df['day_of_week'] = sales_df['date'].dt.dayofweek
sales_df['day_of_month'] = sales_df['date'].dt.day
sales_df['is_weekend'] = sales_df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)
sales_df['quarter'] = sales_df['date'].dt.quarter

# Monthly sales trend
monthly_sales = sales_df.groupby(['year', 'month'])['sales'].sum().reset_index()
print("\nMonthly sales trend:")
print(monthly_sales.head())

# Day of week analysis
dow_sales = sales_df.groupby('day_of_week')['sales'].mean().reset_index()
print("\nAverage sales by day of week:")
print(dow_sales)

# Effect of promotions
promo_effect = sales_df.groupby('promo')['sales'].agg(['mean', 'median', 'count'])
print("\nImpact of promotions:")
print(promo_effect)

# Effect of bad weather
weather_effect = sales_df.groupby('bad_weather')['sales'].agg(['mean', 'median', 'count'])
print("\nImpact of bad weather:")
print(weather_effect)

# 3. VISUALIZATIONS
# ----------------

plt.figure(figsize=(15, 10))

# Plot 1: Overall sales trend
plt.subplot(2, 2, 1)
sales_ts = sales_df.groupby('date')['sales'].sum()
sales_ts.plot(title='Daily Total Sales', ax=plt.gca())
plt.grid(True)

# Plot 2: Sales by store
plt.subplot(2, 2, 2)
store_totals = sales_df.groupby('store')['sales'].sum()
store_totals.plot(kind='bar', title='Total Sales by Store', ax=plt.gca())
plt.grid(True)

# Plot 3: Sales by category
plt.subplot(2, 2, 3)
cat_totals = sales_df.groupby('category')['sales'].sum()
cat_totals.plot(kind='bar', title='Total Sales by Category', ax=plt.gca())
plt.grid(True)

# Plot 4: Avg sales by day of week
plt.subplot(2, 2, 4)
dow_sales.plot(kind='bar', x='day_of_week', y='sales',
               title='Average Sales by Day of Week', ax=plt.gca())
plt.grid(True)

plt.tight_layout()
plt.savefig('eda_plots.png')

# Time series decomposition for a specific store/category
single_series = sales_df[(sales_df['store'] == 'Store_1') &
                         (sales_df['category'] == 'Electronics')]
daily_sales = single_series.set_index('date')['sales']

# Perform seasonal decomposition
decomposition = seasonal_decompose(daily_sales, model='additive', period=7)

plt.figure(figsize=(12, 10))
decomposition.plot()
plt.tight_layout()
plt.savefig('seasonal_decomposition.png')

# 4. FEATURE ENGINEERING
# ---------------------

print("\n== FEATURE ENGINEERING ==")

# Create features for forecasting
def create_features(df):
    """Extract time-based features from date."""
    df = df.copy()

    # Calendar features
    df['dayofweek'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.isocalendar().week

    # Flag weekends and holidays
    df['is_weekend'] = df['dayofweek'].apply(lambda x: 1 if x >= 5 else 0)

    # Add holiday flags (simplified)
    df['is_holiday'] = ((df['month'] == 12) & (df['dayofmonth'] == 25)) | \
                       ((df['month'] == 1) & (df['dayofmonth'] == 1)) | \
                       ((df['month'] == 7) & (df['dayofmonth'] == 4))

    # Cyclical features for day of week, month, etc.
    df['month_sin'] = np.sin(2 * np.pi * df['month']/12)
    df['month_cos'] = np.cos(2 * np.pi * df['month']/12)
    df['dow_sin'] = np.sin(2 * np.pi * df['dayofweek']/7)
    df['dow_cos'] = np.cos(2 * np.pi * df['dayofweek']/7)

    # Moving averages (lag features)
    for store in df['store'].unique():
        for category in df['category'].unique():
            mask = (df['store'] == store) & (df['category'] == category)
            df.loc[mask, 'sales_lag7'] = df.loc[mask, 'sales'].shift(7)
            df.loc[mask, 'sales_lag14'] = df.loc[mask, 'sales'].shift(14)
            df.loc[mask, 'sales_lag28'] = df.loc[mask, 'sales'].shift(28)
            df.loc[mask, 'sales_mean7'] = df.loc[mask, 'sales'].shift().rolling(window=7).mean()
            df.loc[mask, 'sales_mean28'] = df.loc[mask, 'sales'].shift().rolling(window=28).mean()

    # Drop rows with NaN (lag features will create NaN for first rows)
    df = df.dropna()

    return df

# Apply feature engineering
model_df = create_features(sales_df)

# Check the new features
print("Dataframe with new features:")
print(model_df.columns.tolist())
print(model_df.head())

# 5. PREPARING FOR MODELING
# ------------------------

# Split data temporally: use first 80% for training, next 10% for validation, last 10% for test
model_df = model_df.sort_values('date')
train_cutoff = int(len(model_df) * 0.8)
val_cutoff = int(len(model_df) * 0.9)

train_df = model_df.iloc[:train_cutoff]
val_df = model_df.iloc[train_cutoff:val_cutoff]
test_df = model_df.iloc[val_cutoff:]

print(f"\nData splits: Train: {len(train_df)}, Validation: {len(val_df)}, Test: {len(test_df)}")

# Define features and target
cat_features = ['store', 'category', 'city']
num_features = ['size_sqm', 'years_open', 'promo', 'bad_weather',
                'dayofweek', 'month', 'year', 'is_weekend', 'is_holiday',
                'month_sin', 'month_cos', 'dow_sin', 'dow_cos',
                'sales_lag7', 'sales_lag14', 'sales_lag28',
                'sales_mean7', 'sales_mean28']

features = cat_features + num_features
target = 'sales'

# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features)
    ])

# 6. MODEL TRAINING AND EVALUATION
# -------------------------------

print("\n== MODEL TRAINING ==")

# Define models to try
models = {
    'LinearRegression': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingRegressor(random_state=42),
    'XGBoost': XGBRegressor.XGBRegressor(random_state=42)
}

# Train and evaluate each model
results = {}

for name, model in models.items():
    print(f"\nTraining {name}...")

    # Create pipeline with preprocessing
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])

    # Fit the model
    pipeline.fit(train_df[features], train_df[target])

    # Evaluate on validation set
    val_preds = pipeline.predict(val_df[features])
    val_mae = mean_absolute_error(val_df[target], val_preds)
    val_rmse = np.sqrt(mean_squared_error(val_df[target], val_preds))
    val_r2 = r2_score(val_df[target], val_preds)

    print(f"  Validation - MAE: {val_mae:.2f}, RMSE: {val_rmse:.2f}, R²: {val_r2:.4f}")

    # Store results
    results[name] = {
        'model': pipeline,
        'val_mae': val_mae,
        'val_rmse': val_rmse,
        'val_r2': val_r2
    }

# Find the best model
best_model_name = min(results, key=lambda x: results[x]['val_rmse'])
best_model = results[best_model_name]['model']

print(f"\nBest model: {best_model_name} with RMSE: {results[best_model_name]['val_rmse']:.2f}")

# 7. HYPERPARAMETER TUNING
# -----------------------

print("\n== HYPERPARAMETER TUNING ==")

# Focus on tuning the best model
if best_model_name == 'RandomForest':
    print("Tuning RandomForest...")

    param_grid = {
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [None, 10, 20, 30],
        'model__min_samples_split': [2, 5, 10]
    }

elif best_model_name == 'GradientBoosting':
    print("Tuning GradientBoosting...")

    param_grid = {
        'model__n_estimators': [100, 200],
        'model__learning_rate': [0.01, 0.1],
        'model__max_depth': [3, 5, 7]
    }

elif best_model_name == 'XGBoost':
    print("Tuning XGBoost...")

    param_grid = {
        'model__n_estimators': [100, 200],
        'model__learning_rate': [0.01, 0.1],
        'model__max_depth': [3, 5, 7]
    }

else:  # LinearRegression
    print("Linear Regression doesn't require hyper-parameter tuning. Moving on...")
    param_grid = {}

if param_grid:
    # Create TimeSeriesSplit for time series cross-validation
    tscv = TimeSeriesSplit(n_splits=3)

    # Setup GridSearchCV
    grid_search = GridSearchCV(
        best_model,
        param_grid,
        cv=tscv,
        scoring='neg_root_mean_squared_error',
        n_jobs=-1,
        verbose=1
    )

    # Fit grid search
    grid_search.fit(train_df[features], train_df[target])

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best cross-validation score: {-grid_search.best_score_:.2f} RMSE")

    # Update best model
    best_model = grid_search.best_estimator_

# 8. FINAL MODEL EVALUATION
# -----------------------

print("\n== FINAL MODEL EVALUATION ==")

# Make predictions on test set
test_preds = best_model.predict(test_df[features])

# Calculate metrics
test_mae = mean_absolute_error(test_df[target], test_preds)
test_rmse = np.sqrt(mean_squared_error(test_df[target], test_preds))
test_r2 = r2_score(test_df[target], test_preds)

print(f"Test - MAE: {test_mae:.2f}, RMSE: {test_rmse:.2f}, R²: {test_r2:.4f}")

# Plot actual vs predicted for a single store/category
test_df['predicted'] = test_preds

single_test = test_df[(test_df['store'] == 'Store_1') &
                     (test_df['category'] == 'Electronics')]

plt.figure(figsize=(12, 6))
plt.plot(single_test['date'], single_test['sales'], label='Actual')
plt.plot(single_test['date'], single_test['predicted'], label='Predicted')
plt.title('Actual vs Predicted Sales: Store_1, Electronics')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig('actual_vs_predicted.png')

# 9. FEATURE IMPORTANCE ANALYSIS
# -----------------------------

print("\n== FEATURE IMPORTANCE ANALYSIS ==")

if hasattr(best_model[-1], 'feature_importances_'):
    # Get feature names from the preprocessor
    cat_features_transformed = best_model[0].transformers_[1][1].get_feature_names_out(cat_features)
    feature_names = np.array(num_features + cat_features_transformed.tolist())

    # Get feature importances
    importances = best_model[-1].feature_importances_

    # Sort by importance
    indices = np.argsort(importances)[::-1]

    # Print top 20 features
    print("\nTop 20 most important features:")
    for i in range(min(20, len(feature_names))):
        print(f"{feature_names[indices[i]]}: {importances[indices[i]]:.4f}")

    # Plot feature importances
    plt.figure(figsize=(12, 8))
    plt.title('Feature Importances')
    plt.bar(range(min(20, len(feature_names))),
            importances[indices[:20]],
            align='center')
    plt.xticks(range(min(20, len(feature_names))),
               feature_names[indices[:20]], rotation=90)
    plt.tight_layout()
    plt.savefig('feature_importances.png')

# 10. FUTURE SALES FORECASTING
# ---------------------------

print("\n== FUTURE SALES FORECASTING ==")

def forecast_future_sales(model, last_data, features, n_days=30):
    """Forecast sales for n days into the future for all store/category combinations."""
    # Get unique stores and categories
    stores = last_data['store'].unique()
    categories = last_data['category'].unique()

    # Get the last date in our data
    last_date = last_data['date'].max()

    # Create a dataframe for future dates
    future_dates = [last_date + timedelta(days=i+1) for i in range(n_days)]

    # Initialize list to store forecasts
    forecasts = []

    # For each store and category
    for store in stores:
        for category in categories:
            # Get store attributes
            store_attrs = last_data[last_data['store'] == store][['size_sqm', 'years_open', 'city']].iloc[0].to_dict()

            # Filter data for this store/category
            store_cat_data = last_data[(last_data['store'] == store) &
                                      (last_data['category'] == category)].sort_values('date')

            # For each future date
            for future_date in future_dates:
                # Create a row for the future date
                future_row = {
                    'date': future_date,
                    'store': store,
                    'category': category,
                    'promo': np.random.choice([0, 1], p=[0.9, 0.1]),  # Random promotion
                    'bad_weather': np.random.choice([0, 1], p=[0.85, 0.15]),  # Random weather
                    'size_sqm': store_attrs['size_sqm'],
                    'years_open': store_attrs['years_open'],
                    'city': store_attrs['city'],
                    'dayofweek': future_date.dayofweek,
                    'month': future_date.month,
                    'year': future_date.year,
                    'dayofyear': future_date.timetuple().tm_yday,
                    'dayofmonth': future_date.day,
                    'weekofyear': future_date.isocalendar()[1],
                    'is_weekend': 1 if future_date.dayofweek >= 5 else 0,
                    'is_holiday': 1 if ((future_date.month == 12 and future_date.day == 25) or
                                        (future_date.month == 1 and future_date.day == 1) or
                                        (future_date.month == 7 and future_date.day == 4)) else 0,
                    'month_sin': np.sin(2 * np.pi * future_date.month/12),
                    'month_cos': np.cos(2 * np.pi * future_date.month/12),
                    'dow_sin': np.sin(2 * np.pi * future_date.dayofweek/7),
                    'dow_cos': np.cos(2 * np.pi * future_date.dayofweek/7)
                }

                # Calculate lag features
                # We need to get recent sales for this store/category
                recent_sales = store_cat_data['sales'].tolist()

                if len(recent_sales) >= 28:
                    future_row['sales_lag7'] = recent_sales[-7]
                    future_row['sales_lag14'] = recent_sales[-14]
                    future_row['sales_lag28'] = recent_sales[-28]
                    future_row['sales_mean7'] = np.mean(recent_sales[-7:])
                    future_row['sales_mean28'] = np.mean(recent_sales[-28:])
                else:
                    # Not enough history, use what we have or default values
                    future_row['sales_lag7'] = recent_sales[-min(7, len(recent_sales))] if recent_sales else 0
                    future_row['sales_lag14'] = recent_sales[-min(14, len(recent_sales))] if recent_sales else 0
                    future_row['sales_lag28'] = recent_sales[-min(28, len(recent_sales))] if recent_sales else 0
                    future_row['sales_mean7'] = np.mean(recent_sales[-min(7, len(recent_sales)):]) if recent_sales else 0
                    future_row['sales_mean28'] = np.mean(recent_sales[-min(28, len(recent_sales)):]) if recent_sales else 0

                forecasts.append(future_row)

    # Convert to dataframe
    forecast_df = pd.DataFrame(forecasts)

    # Make predictions
    forecast_df['predicted_sales'] = model.predict(forecast_df[features])

    return forecast_df

# Generate forecasts for the next 30 days
forecast_df = forecast_future_sales(best_model, test_df, features, n_days=30)

# Show example forecasts
print("\nSample forecasts for next 30 days:")
print(forecast_df[['date', 'store', 'category', 'predicted_sales']].head(15))

# Plot forecasts for a specific store/category
single_forecast = forecast_df[(forecast_df['store'] == 'Store_1') &
                             (forecast_df['category'] == 'Electronics')]

# Combine historical and forecast data for plotting
hist_data = test_df[(test_df['store'] == 'Store_1') &
                   (test_df['category'] == 'Electronics')]

plt.figure(figsize=(14, 7))
plt.plot(hist_data['date'], hist_data['sales'], label='Historical', color='blue')
plt.plot(single_forecast['date'], single_forecast['predicted_sales'],
         label='Forecast', color='red', linestyle='--')
plt.title('Sales Forecast: Store_1, Electronics')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig('forecast.png')

# 11. MODEL DEPLOYMENT
# ------------------

print("\n== MODEL DEPLOYMENT ==")

# Save the model
model_filename = 'retail_sales_forecast_model.joblib'
joblib.dump(best_model, model_filename)
print(f"Model saved to {model_filename}")

# Create a simple function for making predictions
def predict_sales(model, store, category, date, promo=0, bad_weather=0):
    """Make a sales prediction for a specific store/category on a specific date."""
    # Get store info
    store_info_row = store_info[store_info['store'] == store].iloc[0]

    # Create features
    row = {
        'date': pd.to_datetime(date),
        'store': store,
        'category': category,
        'promo': promo,
        'bad_weather': bad_weather,
        'size_sqm': store_info_row['size_sqm'],
        'years_open': store_info_row['years_open'],
        'city': store_info_row['city']
    }

    # Convert to DataFrame
    df = pd.DataFrame([row])

    # Add time features
    df['dayofweek'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.isocalendar().week
    df['is_weekend'] = df['dayofweek'].apply(lambda x: 1 if x >= 5 else 0)
    df['is_holiday'] = ((df['month'] == 12) & (df['dayofmonth'] == 25)) | \
                     ((df['month'] == 1) & (df['dayofmonth'] == 1)) | \
                     ((df['month'] == 7) & (df['dayofmonth'] == 4))
    df['month_sin'] = np.sin(2 * np.pi * df['month']/12)
    df['month_cos'] = np.cos(2 * np.pi * df['month']/12)
    df['dow_sin'] = np.sin(2 * np.pi * df['dayofweek']/7)
    df['dow_cos'] = np.cos(2 * np.pi * df['dayofweek']/7)

    # Add lag features
    # In a real deployment, you would have a database of historical sales to pull from
    # For this demo, we'll use placeholder values from the test set
    store_cat_data = test_df[(test_df['store'] == store) &
                          (test_df['category'] == category)].sort_values('date')

    if len(store_cat_data) > 0:
        recent_sales = store_cat_data['sales'].tolist()
        df['sales_lag7'] = recent_sales[-min(7, len(recent_sales))]
        df['sales_lag14'] = recent_sales[-min(14, len(recent_sales))]
        df['sales_lag28'] = recent_sales[-min(28, len(recent_sales))]
        df['sales_mean7'] = np.mean(recent_sales[-min(7, len(recent_sales)):])
        df['sales_mean28'] = np.mean(recent_sales[-min(28, len(recent_sales)):])
    else:
        # Default values if no history
        df['sales_lag7'] = df['sales_lag14'] = df['sales_lag28'] = \
        df['sales_mean7'] = df['sales_mean28'] = 0

    # Make prediction
    prediction = model.predict(df[features])[0]

    return prediction

# Demonstrate prediction function
future_date = (test_df['date'].max() + timedelta(days=7)).strftime('%Y-%m-%d')
prediction = predict_sales(best_model, 'Store_1', 'Electronics', future_date, promo=1)
print(f"\nPredicted sales for Store_1, Electronics on {future_date} with promotion: ${prediction:.2f}")

# 12. MONITORING AND EVALUATION FRAMEWORK
# -------------------------------------

print("\n== MONITORING FRAMEWORK ==")

def calculate_deviation(actual, predicted):
    """Calculate percentage deviation between actual and predicted."""
    return (actual - predicted) / actual * 100

def monitor_model_performance(model, new_data):
    """Simulate monitoring model performance as new data arrives."""
    # Make predictions
    predictions = model.predict(new_data[features])

    # Calculate metrics
    mae = mean_absolute_error