In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
#Choices
store_number = 1
family = "Beverages"

# Load and preprocess data
train_df_filtered = train_df[(train_df['Store number'] == store_number) & (train_df['family'] == family)]
test_df_filtered = test_df[(test_df['Store number'] == store_number) & (test_df['family'] == family)]
train_df_filtered = train_df_filtered.set_index('date')
test_df_filtered = test_df_filtered.set_index('date')

# Normalize sales data for neural network
scaler = MinMaxScaler()
train_df_filtered['sales_scaled'] = scaler.fit_transform(train_df_filtered[['sales']])

# Auto-ARIMA to find best (p, d, q)
arima_model = auto_arima(train_df_filtered['sales'], seasonal=True, m=7, trace=True, suppress_warnings=True)
print(f"Selected ARIMA Order: {arima_model.order}")

# Fit ARIMA model
arima_fit = arima_model.fit(train_df_filtered['sales'])
train_df_filtered['arima_pred'] = arima_fit.predict_in_sample()
train_df_filtered['residuals'] = train_df_filtered['sales'] - train_df_filtered['arima_pred']

# Prepare training data for Neural Network
X_train = train_df_filtered[['on promotion']].values
y_train = train_df_filtered['residuals'].values.reshape(-1, 1)
scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)

# Train complex Neural Network
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    BatchNormalization(),
    Dropout(0.2),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train_scaled, epochs=50, batch_size=16, verbose=1)

# Forecast Future Sales on Test Data
future_dates = test_df_filtered.index
future_df = test_df_filtered.copy()

# ARIMA Prediction
future_df['arima_pred'] = arima_fit.predict(n_periods=len(future_dates))

# Neural Net Prediction (Residuals Correction)
X_test = future_df[['on promotion']].values
future_residuals_scaled = model.predict(X_test)
future_residuals = scaler_y.inverse_transform(future_residuals_scaled)
future_df['residuals_pred'] = future_residuals

# Final Sales Forecast
future_df['final_sales_pred'] = future_df['arima_pred'] + future_df['residuals_pred']

# Plot Results
plt.figure(figsize=(12, 5))
plt.plot(test_df_filtered.index, test_df_filtered['sales'], label='Actual Sales', color='blue')
plt.plot(future_df.index, future_df['final_sales_pred'], label='Hybrid Model Forecast', color='red', linestyle='dashed')
plt.plot(future_df.index, future_df['arima_pred'], label='ARIMA Only Forecast', color='green', linestyle='dotted')
plt.legend()
plt.title('Sales Forecast Comparison: Actual vs ARIMA vs Hybrid Model')
plt.show()

print(future_df[['date', 'final_sales_pred']])