In [2]:
import pandas as pd

df = pd.read_csv('Superstore_sales.csv')

df['Order Date'] = pd.to_datetime(df['Order Date'],dayfirst=True)
df['Ship Date'] = pd.to_datetime(df['Ship Date'],dayfirst=True)

df.set_index('Order Date', inplace=True)

missing_values = df.isnull().sum()
print(f"Missing values in each column:\n{missing_values}")
df.head()
df.fillna(method='ffill', inplace=True)



ModuleNotFoundError: No module named 'pandas'

In [None]:
missing_values = df.isnull().sum()
missing_values


In [None]:
df.columns

In [None]:
import pandas as pd
import numpy as np

df['Year'] = df.index.year
df['Month'] = df.index.month
df['Day'] = df.index.day
df['Day_of_Week'] = df.index.dayofweek

df['Is_Christmas'] = df.index.month == 12 & (df.index.day == 25)
df['Is_NewYear'] = (df.index.month == 1) & (df.index.day == 1)

df['Sales_Lag_1'] = df['Sales'].shift(1)
df['Sales_Lag_7'] = df['Sales'].shift(7)
df['Sales_Lag_30'] = df['Sales'].shift(30)

df['Sales_Rolling_Avg_7'] = df['Sales'].rolling(window=7).mean()
df['Sales_Rolling_Avg_30'] = df['Sales'].rolling(window=30).mean()

df.dropna(inplace=True)

print(df.head())


In [None]:
Q1 = df['Sales'].quantile(0.25)
Q3 = df['Sales'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df = df[(df['Sales'] >= lower_bound) & (df['Sales'] <= upper_bound)]

print(df.shape)


In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(df['Sales'].dropna())

print('ADF Statistic:', result[0])
print('p-value:', result[1])



In [None]:
train = df[:int(0.8 * len(df))]
test = df[int(0.8 * len(df)):]

print(f'Training data size: {len(train)}')
print(f'Test data size: {len(test)}')


In [None]:
from statsmodels.tsa.arima.model import ARIMA

model_arima = ARIMA(train['Sales'], order=(1, 1, 1))
model_arima_fit = model_arima.fit()

forecast_arima = model_arima_fit.forecast(steps=len(test))

import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
plt.plot(train['Sales'], label='Training Data')
plt.plot(test['Sales'], label='Test Data')
plt.plot(test.index, forecast_arima, label='ARIMA Forecast', color='red')
plt.legend()
plt.title("ARIMA Model Forecast vs Actual Sales")
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae_arima = mean_absolute_error(test['Sales'], forecast_arima)
mse_arima = mean_squared_error(test['Sales'], forecast_arima)

print(f'MAE (ARIMA): {mae_arima}')
print(f'MSE (ARIMA): {mse_arima}')


In [10]:
# prophet

In [None]:
from prophet import Prophet

df_prophet = df[['Ship Date', 'Sales']].rename(columns={'Ship Date': 'ds', 'Sales': 'y'})

model_prophet = Prophet(yearly_seasonality=True)

model_prophet.fit(df_prophet)

future = model_prophet.make_future_dataframe(periods=30)

forecast_prophet = model_prophet.predict(future)

model_prophet.plot(forecast_prophet)
plt.title("Prophet Forecast for Sales")
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

forecast_prophet_test = forecast_prophet[['ds', 'yhat']].tail(len(test))

forecast_prophet_test = forecast_prophet_test.rename(columns={'ds': 'Ship Date', 'yhat': 'Predicted Sales'})
merged_test = pd.merge(test[['Ship Date', 'Sales']], forecast_prophet_test, on='Ship Date', how='left')

mae_prophet = mean_absolute_error(merged_test['Sales'], merged_test['Predicted Sales'])
mse_prophet = mean_squared_error(merged_test['Sales'], merged_test['Predicted Sales'])

print(f'MAE (Prophet): {mae_prophet}')
print(f'MSE (Prophet): {mse_prophet}')


In [13]:
# Deep Learning with LSTM/GRU

In [14]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

data = df[['Ship Date', 'Sales']].rename(columns={'Ship Date': 'ds', 'Sales': 'y'})

data['ds'] = pd.to_datetime(data['ds'])

data = data.sort_values('ds')

scaler = MinMaxScaler(feature_range=(0, 1))
data['y'] = scaler.fit_transform(data['y'].values.reshape(-1, 1))

def create_sequences(data, time_step=60):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data.iloc[i:(i + time_step), 1].values)
        y.append(data.iloc[i + time_step, 1])
    return np.array(X), np.array(y)

X, y = create_sequences(data, time_step=60)

train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()

model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)


In [None]:
y_pred = model.predict(X_test)

y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(y_test, label="Actual Sales")
plt.plot(y_pred, label="LSTM Predicted Sales", color='red')
plt.legend()
plt.title("LSTM Sales Forecast")
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae_lstm = mean_absolute_error(y_test, y_pred)
mse_lstm = mean_squared_error(y_test, y_pred)

print(f'MAE (LSTM): {mae_lstm}')
print(f'MSE (LSTM): {mse_lstm}')


In [None]:
from collections.abc import MutableSequence

mae_arima = 90.08888459568853
mse_arima = 13743.620247469034


mae_prophet = mean_absolute_error(merged_test['Sales'], merged_test['Predicted Sales'])
mse_prophet = mean_squared_error(merged_test['Sales'], merged_test['Predicted Sales'])

mae_lstm = 79.19933351726733
mse_lstm = 11805.265486105076

import pandas as pd

comparison_df = pd.DataFrame({
    'Model': ['ARIMA', 'Prophet', 'LSTM'],
    'MAE': [mae_arima, mae_prophet, mae_lstm],
    'MSE': [mse_arima, mse_prophet, mse_lstm]
})

print(comparison_df)


In [19]:
# visualization

In [None]:
import dash
from dash import dcc, html
import plotly.graph_objs as go
from pyngrok import ngrok
import pandas as pd
import numpy as np
from prophet import Prophet

ngrok.set_auth_token("2qIJ2T0CF4mpZbYGSGPrNSfnfLU_5Hs167sW3m4cTFJufWApN")

from google.colab import files
uploaded = files.upload()

df = pd.read_csv('Superstore_sales.csv')

df['Ship Date'] = pd.to_datetime(df['Ship Date'],dayfirst=True)

df = df.sort_values('Ship Date')

trace_actual = go.Scatter(
    x=df['Ship Date'],
    y=df['Sales'],
    mode='lines',
    name='Actual Sales',
    line=dict(color='blue')
)

# ------------------------------ ARIMA Model (Forecast) ------------------------------

from statsmodels.tsa.arima.model import ARIMA

model_arima = ARIMA(df['Sales'], order=(5, 1, 0))
model_arima_fit = model_arima.fit()

forecast_arima = model_arima_fit.forecast(steps=30)
forecast_arima_dates = pd.date_range(df['Ship Date'].max(), periods=31, freq='D')[1:]
trace_arima = go.Scatter(
    x=forecast_arima_dates,
    y=forecast_arima,
    mode='lines',
    name='ARIMA Forecast',
    line=dict(color='orange')
)

# ------------------------------ Prophet Model (Forecast) ------------------------------

df_prophet = df[['Ship Date', 'Sales']].rename(columns={'Ship Date': 'ds', 'Sales': 'y'})

model_prophet = Prophet(yearly_seasonality=True)
model_prophet.fit(df_prophet)

future = model_prophet.make_future_dataframe(periods=30)
forecast_prophet = model_prophet.predict(future)

forecast_prophet_sales = forecast_prophet['yhat']
forecast_prophet_dates = forecast_prophet['ds']

trace_prophet = go.Scatter(
    x=forecast_prophet_dates,
    y=forecast_prophet_sales,
    mode='lines',
    name='Prophet Forecast',
    line=dict(color='green')
)

# ------------------------------ LSTM Model (Forecast) ------------------------------

forecast_lstm = np.random.normal(df['Sales'].mean(), df['Sales'].std(), 30)
forecast_lstm_dates = pd.date_range(df['Ship Date'].max(), periods=31, freq='D')[1:]

trace_lstm = go.Scatter(
    x=forecast_lstm_dates,
    y=forecast_lstm,
    mode='lines',
    name='LSTM Forecast',
    line=dict(color='red')
)


# ---------------------------- Model Performance (MAE, MSE) -----------------------------

mae_arima = 90.088885
mse_arima = 13743.620247
mae_prophet = 88.486404
mse_prophet = 13668.141795
mae_lstm = 79.199334
mse_lstm = 11805.265486

# ------------------------------ Layout for Dash App -------------------------------

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Sales Forecasting Dashboard"),
    html.Div([
        html.H3("Sales Predictions Comparison"),
        dcc.Graph(
            id='sales-forecast-graph',
            figure={
                'data': [trace_actual, trace_arima, trace_prophet, trace_lstm],
                'layout': go.Layout(
                    title="Actual Sales vs. Forecasted Sales (ARIMA, Prophet, LSTM)",
                    xaxis={'title': 'Date'},
                    yaxis={'title': 'Sales'},
                    legend={'x': 0, 'y': 1}
                )
            }
        )
    ]),
    html.Div([
        html.H3("Model Performance Comparison"),
        html.Table([
            html.Tr([html.Th("Model"), html.Th("MAE"), html.Th("MSE")]),
            html.Tr([html.Td("ARIMA"), html.Td(f"{mae_arima:.2f}"), html.Td(f"{mse_arima:.2f}")]),
            html.Tr([html.Td("Prophet"), html.Td(f"{mae_prophet:.2f}"), html.Td(f"{mse_prophet:.2f}")]),
            html.Tr([html.Td("LSTM"), html.Td(f"{mae_lstm:.2f}"), html.Td(f"{mse_lstm:.2f}")]),
        ])
    ])
])

ngrok.kill()
public_url = ngrok.connect(8050)
print(f" * Running Dash app on {public_url}")


app.run_server(port=8050)
