### Arima + Splitting into micro-models approach

In [76]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [89]:
def sarimax_predict(channel_params):
    """
    Predicts revenue using SARIMAX model based on channel spending parameters.

    Parameters:
    - channel_params (dict): Dictionary with channel names as keys and spend values as values

    Returns:
    - float: Predicted revenue
    """
    # Load and prepare data
    df = pd.read_csv("final_mock_data.csv")
    df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values("Date")

    # One hot encode the channel column
    channel_encoder = OneHotEncoder(sparse_output=False)
    channel_encoded = channel_encoder.fit_transform(df[["channel"]])
    channel_encoded_df = pd.DataFrame(
        channel_encoded, columns=channel_encoder.get_feature_names_out(["channel"])
    )
    df = pd.concat([df, channel_encoded_df], axis=1)

    # Replace ad_spend with channel-specific values
    for col in channel_encoded_df.columns:
        df[col] = df[col] * df["ad_spend"]

    # Drop the original ad_spend column
    df = df.drop("ad_spend", axis=1)

    # Group by date and sum all numeric columns
    df = (
        df.groupby("Date")
        .agg(
            {
                "views": "sum",
                "leads": "sum",
                "new_accounts": "sum",
                "revenue": "sum",
                **{col: "sum" for col in channel_encoded_df.columns},
            }
        )
        .reset_index()
    )

    # Create monthly aggregated data
    monthly_df = (
        df.groupby(pd.Grouper(key="Date", freq="ME"))
        .agg(
            {
                "revenue": "sum",
                "views": "sum",
                "leads": "sum",
                "new_accounts": "sum",
                **{col: "sum" for col in channel_encoded_df.columns},
            }
        )
        .reset_index()
    )

    # Drop any rows with NaN values
    monthly_df = monthly_df.dropna()

    # Prepare model inputs
    y = monthly_df["revenue"]
    X = monthly_df[channel_encoded_df.columns]

    # Fit SARIMAX model
    model = SARIMAX(
        y,
        exog=X,
        order=(1, 1, 1),
        seasonal_order=(0, 0, 0, 0),
        enforce_stationarity=False,
        enforce_invertibility=False,
    )

    results = model.fit(disp=False)

    try:
        # Prepare channel parameters for prediction
        channel_df = pd.DataFrame(
            {f"channel_{channel}": [spend] for channel, spend in channel_params.items()}
        )

        # Get the forecast
        forecast = results.forecast(steps=1, exog=channel_df)

        # Ensure non-negative revenue
        if forecast.all() < 0:
            print("Warning: Model predicted negative revenue. Setting to 0.")
            return 0

        return float(forecast.iloc[0])

    except Exception as e:
        print(f"Error in prediction: {str(e)}")
        return 0

In [None]:
channel_params = {
    "Email": 0,
    "Facebook ads": 0,
    "Google banner ads": 0,
    "Influencer": 0,
    "Instagram Ads": 100,
    "LinkedIn": 10,
    "Newspaper ads": 0,
    "Radio ads": 0,
    "Sponsored search ads": 0,
    "TV ads": 0,
    "TikTok ads": 0,
}

predicted_revenue = sarimax_predict(channel_params)
print(predicted_revenue)

107531.43643916574




### Archive: Finding most ideal params for the ARIMAX model

In [None]:
def evaluate_arimax(params, X, y):
    """
    Evaluate ARIMAX model with given parameters using AIC
    Returns AIC and the fitted model, or (float('inf'), None) if model fails to converge
    """
    order, seasonal_order = params
    try:
        model = SARIMAX(
            y,
            exog=X,
            order=order,
            seasonal_order=seasonal_order,
            enforce_stationarity=False,
            enforce_invertibility=False,
        )
        results = model.fit(disp=False)
        return results.aic, results
    except:
        return float("inf"), None


# Define parameter grids
p = d = q = range(0, 3)  # for non-seasonal components
P = D = Q = range(0, 2)  # for seasonal components
s = [0, 3, 6, 12]  # seasonal period (12 for monthly data)

# First, let's check for seasonality
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt

# Decompose the time series
decomposition = seasonal_decompose(y_arimax, period=12, extrapolate_trend="freq")

# Plot the decomposition
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(15, 12))
decomposition.observed.plot(ax=ax1)
ax1.set_title("Original")
decomposition.trend.plot(ax=ax2)
ax2.set_title("Trend")
decomposition.seasonal.plot(ax=ax3)
ax3.set_title("Seasonal")
decomposition.resid.plot(ax=ax4)
ax4.set_title("Residual")
plt.tight_layout()
plt.show()

# Create parameter combinations and perform grid search as before
params_list = []
for p_val in p:
    for d_val in d:
        for q_val in q:
            order = (p_val, d_val, q_val)
            # Non-seasonal model
            params_list.append((order, (0, 0, 0, 0)))
            # Seasonal models
            for P_val in P:
                for D_val in D:
                    for Q_val in Q:
                        for s_val in s:
                            if (
                                s_val != 0
                            ):  # Skip if s=0 as it's already covered in non-seasonal
                                seasonal_order = (P_val, D_val, Q_val, s_val)
                                params_list.append((order, seasonal_order))

# Perform grid search
best_aic = float("inf")
best_params = None
best_model = None

print("Starting grid search...")
for params in params_list:
    order, seasonal_order = params

    aic, model = evaluate_arimax(params, X_arimax, y_arimax)

    if aic < best_aic:
        best_aic = aic
        best_params = params
        best_model = model
        # print(f"New best model found: ARIMAX{order}{seasonal_order} - AIC: {aic}")

print("\nBest model parameters:")
print(f"Order: {best_params[0]}")
print(f"Seasonal Order: {best_params[1]}")
print(f"AIC: {best_aic}")

# Update your arimax_results with the best model
arimax_results = best_model

# Print summary of the best model
print("\nBest Model Summary:")
print(arimax_results.summary())