In [4]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

# Load Data
train_data = pd.read_csv(r'train.csv')

# Convert date to datetime and set as index
train_data['date'] = pd.to_datetime(train_data['date'])
train_data.set_index('date', inplace=True)

# Extract sales column
sales = train_data['sales']

# Perform ADF test
result = adfuller(sales)
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# Check if p-value is less than 0.05 to confirm stationarity
if result[1] < 0.05:
    print("The series is likely stationary.")
else:
    print("The series is likely non-stationary.")

# Difference data if necessary
if result[1] >= 0.05:
    sales_diff = sales.diff().dropna()
else:
    sales_diff = sales

# Fit ARIMA model
# Example order (1,0,1) - adjust based on your data
arima_model = ARIMA(sales_diff, order=(1,0,1))
arima_results = arima_model.fit()

print(arima_results.summary())

# Forecast
forecast_steps = 30  # Number of steps to forecast
forecast, stderr, conf_int = arima_results.forecast(steps=forecast_steps)

# Plot forecast
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
plt.plot(sales_diff, label='Original')
plt.plot(np.arange(len(sales_diff), len(sales_diff)+forecast_steps), forecast, label='Forecast', linestyle='--', marker='o')
plt.fill_between(np.arange(len(sales_diff), len(sales_diff)+forecast_steps), conf_int[:,0], conf_int[:,1], color='pink', alpha=0.3)
plt.legend()
plt.show()



KeyboardInterrupt



In [None]:
# Perform ADF test
result = adfuller(sales)
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# Check if p-value is less than 0.05 to confirm stationarity
if result[1] < 0.05:
    print("The series is likely stationary.")
else:
    print("The series is likely non-stationary.")


In [None]:
# Determine number of differences needed for stationarity
n_diffs = ndiffs(sales)
print(f"Number of differences needed: {n_diffs}")

# Difference data if necessary
if n_diffs > 0:
    sales_diff = sales.diff(n_diffs).dropna()
else:
    sales_diff = sales


In [None]:
# Fit AR model
ar_model = AutoReg(sales_diff, lags=1)  # Adjust lags as needed
ar_results = ar_model.fit()
print(ar_results.summary())


In [None]:
# Fit ARIMA model
arima_model = ARIMA(sales_diff, order=(1,0,1))  # Adjust order as needed
arima_results = arima_model.fit()
print(arima_results.summary())


In [None]:
# Forecast using ARIMA model
forecast_steps = 30  # Number of steps to forecast
forecast, stderr, conf_int = arima_results.forecast(steps=forecast_steps)

# Plot forecast
plt.figure(figsize=(10,6))
plt.plot(sales_diff, label='Original')
plt.plot(np.arange(len(sales_diff), len(sales_diff)+forecast_steps), forecast, label='Forecast', linestyle='--', marker='o')
plt.fill_between(np.arange(len(sales_diff), len(sales_diff)+forecast_steps), conf_int[:,0], conf_int[:,1], color='pink', alpha=0.3)
plt.legend()
plt.show()
