In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, Dropout, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("sales_5000000.csv")
df = df[['Order Date', 'Total Profit']] 
df['Order Date'] = pd.to_datetime(df['Order Date'])
daily_df = df.groupby('Order Date')['Total Profit'].sum().reset_index()
monthly_df = daily_df.resample('ME', on='Order Date')['Total Profit'].sum().reset_index()
monthly_df.columns = ['Order Date', 'Total Profit']
monthly_df.set_index('Order Date', inplace=True)

In [3]:
Q1 = monthly_df['Total Profit'].quantile(0.25)
Q3 = monthly_df['Total Profit'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
monthly_df = monthly_df[(monthly_df['Total Profit'] >= lower_bound) & 
                      (monthly_df['Total Profit'] <= upper_bound)]

In [4]:
train_data, test_data = train_test_split(monthly_df, test_size= 0.2, shuffle=False)

In [5]:
from statsmodels.tsa.stattools import adfuller

# Check stationarity
result = adfuller(monthly_df['Total Profit'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

# If p > 0.05, the series is not stationary (use d > 0 in ARIMA/SARIMA)


ModuleNotFoundError: No module named 'statsmodels'

In [6]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_percentage_error

# Split data into training and testing sets
train_size = int(len(monthly_df) * 0.8)
train, test = monthly_df.iloc[:train_size], monthly_df.iloc[train_size:]

# Fit ARIMA(p,d,q), adjust (p,d,q) as needed
arima_model = ARIMA(train['Total Profit'], order=(2, 1, 2))  # d=1 if not stationary
arima_result = arima_model.fit()

# Forecast
forecast_arima = arima_result.forecast(steps=len(test))
mape_arima = mean_absolute_percentage_error(test['Total Profit'], forecast_arima)
mapa_arima = (1 - mape_arima) * 100
print(f"ARIMA MAPA (Accuracy): {mapa_arima:.2f}%")

# Plot
plt.figure(figsize=(12, 6))
plt.plot(train.index, train['Total Profit'], label='Train')
plt.plot(test.index, test['Total Profit'], label='Actual')
plt.plot(test.index, forecast_arima, label='Forecast (ARIMA)')
plt.title('ARIMA Forecast')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


array([[0.87638014],
       [0.87273679],
       [0.46494252],
       [0.89964861],
       [0.42641841],
       [0.8807608 ],
       [0.8668039 ],
       [0.46434173],
       [0.90221967],
       [0.40806572],
       [0.92247193],
       [0.98900127],
       [0.89434053],
       [0.43766566],
       [0.8225371 ],
       [0.48824541],
       [0.86426116],
       [0.89511126],
       [0.51743866],
       [0.87828209],
       [0.45746252],
       [0.922182  ],
       [0.88653958],
       [0.0690686 ],
       [0.88785146],
       [0.40681191],
       [0.87997365],
       [0.40239891],
       [0.94090423],
       [0.87327799],
       [0.36491614],
       [0.88634366],
       [0.50145826],
       [0.87992954],
       [0.88729827],
       [0.92594218],
       [0.39105671],
       [0.92407528],
       [0.44818019],
       [0.93258058],
       [0.84697928],
       [0.43334755],
       [0.90060935],
       [0.43024151],
       [0.86396627],
       [0.9038795 ],
       [0.8503903 ],
       [0.410

In [7]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

# SARIMA(p,d,q)(P,D,Q,s) – Seasonal component
# Assume monthly seasonality (s = 12)

sarima_model = SARIMAX(train['Total Profit'],
                       order=(1, 1, 1),
                       seasonal_order=(1, 1, 1, 12))
sarima_result = sarima_model.fit()

# Forecast
forecast_sarima = sarima_result.forecast(steps=len(test))
mape_sarima = mean_absolute_percentage_error(test['Total Profit'], forecast_sarima)
mapa_sarima = (1 - mape_sarima) * 100
print(f"SARIMA MAPA (Accuracy): {mapa_sarima:.2f}%")

# Plot
plt.figure(figsize=(12, 6))
plt.plot(train.index, train['Total Profit'], label='Train')
plt.plot(test.index, test['Total Profit'], label='Actual')
plt.plot(test.index, forecast_sarima, label='Forecast (SARIMA)', linestyle='--')
plt.title('SARIMA Forecast')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
