In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv("../data/sales_data.csv")
df['data'] = pd.to_datetime(df['data'])
df.set_index('data', inplace=True)


In [4]:
df = df.asfreq('MS')   # Monthly frequency (adjust if needed)
df['venda'] = df['venda'].ffill()


In [5]:
sales = df['venda']
train = sales.iloc[:-12]
test = sales.iloc[-12:]


In [6]:
from statsmodels.tsa.arima.model import ARIMA              #for ARIMA

arima_model = ARIMA(train, order=(5,1,0))
arima_fit = arima_model.fit()
forecast_arima = arima_fit.forecast(steps=12)


  warn('Non-stationary starting autoregressive parameters'


In [7]:
from sklearn.preprocessing import MinMaxScaler                 #for LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

scaler = MinMaxScaler()
scaled = scaler.fit_transform(sales.values.reshape(-1,1))

X, y = [], []
for i in range(12, len(scaled)):
    X.append(scaled[i-12:i])
    y.append(scaled[i])

X, y = np.array(X), np.array(y)
X = X.reshape((X.shape[0], X.shape[1], 1))

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X.shape[1], 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=20, batch_size=16, verbose=0)

pred_scaled = model.predict(X[-12:])
forecast_lstm = scaler.inverse_transform(pred_scaled).flatten()


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 314ms/step


In [8]:
rmse_arima = np.sqrt(mean_squared_error(test, forecast_arima))
rmse_lstm = np.sqrt(mean_squared_error(test.values, forecast_lstm))

rmse_arima, rmse_lstm


(np.float64(86.66030924255934), np.float64(87.1368104217084))

ARIMA provides a strong baseline forecast for stable sales patterns.

LSTM performs better in capturing non-linear and volatile demand behavior.

Forecasts indicate upcoming demand trends, which can support inventory and pricing decisions.

Overall, LSTM shows improved performance over ARIMA based on RMSE comparison.

In [9]:
#example 
forecast_arima
forecast_lstm


array([125.15324, 136.35458, 140.23056, 136.10692, 141.24449, 137.58046,
       132.24088, 135.51889, 132.02563, 131.79385, 131.22458, 145.21165],
      dtype=float32)

In [10]:
print("Next 12 months predicted sales:")
print(forecast_arima)


Next 12 months predicted sales:
2015-08-01    111.629261
2015-09-01    105.816826
2015-10-01    100.976931
2015-11-01    139.320937
2015-12-01     82.528630
2016-01-01    123.477007
2016-02-01    110.806615
2016-03-01    107.810555
2016-04-01    110.823573
2016-05-01    115.782422
2016-06-01    104.231087
2016-07-01    114.845414
Freq: MS, Name: predicted_mean, dtype: float64


In [11]:
future_df = pd.DataFrame({
    "Date": test.index,
    "Predicted_Sales": forecast_arima
})

future_df


Unnamed: 0,Date,Predicted_Sales
2015-08-01,2015-08-01,111.629261
2015-09-01,2015-09-01,105.816826
2015-10-01,2015-10-01,100.976931
2015-11-01,2015-11-01,139.320937
2015-12-01,2015-12-01,82.52863
2016-01-01,2016-01-01,123.477007
2016-02-01,2016-02-01,110.806615
2016-03-01,2016-03-01,107.810555
2016-04-01,2016-04-01,110.823573
2016-05-01,2016-05-01,115.782422


In [12]:
model.predict(X[-6:])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step


array([[0.35837635],
       [0.36725986],
       [0.35779303],
       [0.35716492],
       [0.35562217],
       [0.3935275 ]], dtype=float32)