# Stock Price Forecasting with ARIMA and LSTM

## Objective
Build and compare time series forecasting models for stock price prediction.

**Dataset**: Daily stock prices (5 years)
**Models**: ARIMA, SARIMA, LSTM
**Metrics**: RMSE, MAE, MAPE

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

# Generate synthetic stock data
np.random.seed(42)
dates = pd.date_range('2019-01-01', '2024-01-01', freq='D')
n = len(dates)

# Generate realistic stock price with trend, seasonality, and noise
trend = np.linspace(100, 200, n)
seasonal = 10 * np.sin(np.linspace(0, 10*np.pi, n))
noise = np.random.normal(0, 5, n)
prices = trend + seasonal + noise
prices = np.maximum(prices, 50)  # Ensure positive prices

df = pd.DataFrame({
    'Date': dates,
    'Close': prices,
    'Volume': np.random.randint(1000000, 10000000, n)
})
df.set_index('Date', inplace=True)

print(f'Dataset shape: {df.shape}')
print(f'Date range: {df.index.min()} to {df.index.max()}')
print(f'Mean price: ${df.Close.mean():.2f}')
print(f'Price volatility (std): ${df.Close.std():.2f}')

In [2]:
# Stationarity test
result = adfuller(df['Close'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:', result[4])

if result[1] > 0.05:
    print('\nSeries is NON-STATIONARY. Differencing required.')
    df['Close_diff'] = df['Close'].diff().dropna()
else:
    print('\nSeries is STATIONARY.')

# Calculate returns
df['Returns'] = df['Close'].pct_change() * 100
df['MA_7'] = df['Close'].rolling(window=7).mean()
df['MA_30'] = df['Close'].rolling(window=30).mean()

print(f'\nAverage daily return: {df.Returns.mean():.3f}%')
print(f'Return volatility: {df.Returns.std():.3f}%')

In [3]:
# Train-test split (80-20)
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]

print(f'Training set: {len(train)} days')
print(f'Test set: {len(test)} days')

# Fit ARIMA model
model = ARIMA(train['Close'], order=(5,1,2))
model_fit = model.fit()

print('\nARIMA Model Summary:')
print(model_fit.summary())

# Forecast
forecast = model_fit.forecast(steps=len(test))
test['Forecast'] = forecast.values

# Calculate errors
rmse = np.sqrt(mean_squared_error(test['Close'], test['Forecast']))
mae = mean_absolute_error(test['Close'], test['Forecast'])
mape = np.mean(np.abs((test['Close'] - test['Forecast']) / test['Close'])) * 100

print(f'\nModel Performance:')
print(f'RMSE: ${rmse:.2f}')
print(f'MAE: ${mae:.2f}')
print(f'MAPE: {mape:.2f}%')