# Task 2: Develop Time Series Forecasting Models

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load the data
tsla_df = pd.read_csv('../data/TSLA_cleaned.csv', index_col='Date', parse_dates=True)

ModuleNotFoundError: No module named 'pmdarima'

## 1. Data Preparation

In [None]:
# Use the 'Close' price for forecasting
data = tsla_df[['Close']]

# Split the data into training and testing sets
train_data = data[:'2023']
test_data = data['2024':]

## 2. ARIMA Model

In [None]:
# Find the best ARIMA parameters using auto_arima
stepwise_fit = auto_arima(train_data['Close'], trace=True, suppress_warnings=True)
print(stepwise_fit.summary())

In [None]:
# Fit the ARIMA model
p, d, q = stepwise_fit.order
model = ARIMA(train_data['Close'], order=(p, d, q))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
# Make predictions
start = len(train_data)
end = len(train_data) + len(test_data) - 1
predictions_arima = model_fit.predict(start=start, end=end, typ='levels')
predictions_arima.index = test_data.index

In [None]:
# Plot the results
plt.figure(figsize=(14, 7))
plt.plot(train_data['Close'], label='Training Data')
plt.plot(test_data['Close'], label='Test Data')
plt.plot(predictions_arima, label='ARIMA Predictions')
plt.title('ARIMA Model - Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Evaluate the model
mae_arima = mean_absolute_error(test_data['Close'], predictions_arima)
rmse_arima = np.sqrt(mean_squared_error(test_data['Close'], predictions_arima))
mape_arima = np.mean(np.abs(predictions_arima - test_data['Close']) / np.abs(test_data['Close']))

print(f'ARIMA MAE: {mae_arima:.2f}')
print(f'ARIMA RMSE: {rmse_arima:.2f}')
print(f'ARIMA MAPE: {mape_arima:.2f}')

## 3. LSTM Model

### Data Preprocessing for LSTM

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

In [None]:
# Create sequences
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

time_step = 60
X_train, y_train = create_dataset(scaled_data[:len(train_data)], time_step)
X_test, y_test = create_dataset(scaled_data[len(train_data)-time_step:], time_step)

In [None]:
# Reshape input to be [samples, time steps, features] which is required for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

### Build and Train the LSTM Model

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model_lstm.add(LSTM(50, return_sequences=False))
model_lstm.add(Dense(25))
model_lstm.add(Dense(1))

model_lstm.compile(optimizer='adam', loss='mean_squared_error')

model_lstm.fit(X_train, y_train, batch_size=1, epochs=1)

### Make Predictions with LSTM

In [None]:
predictions_lstm_scaled = model_lstm.predict(X_test)
predictions_lstm = scaler.inverse_transform(predictions_lstm_scaled)

In [None]:
# Create a dataframe with the predictions
predictions_lstm_df = pd.DataFrame(predictions_lstm, index=test_data.index[:len(predictions_lstm)], columns=['Predictions'])

In [None]:
# Plot the results
plt.figure(figsize=(14, 7))
plt.plot(train_data['Close'], label='Training Data')
plt.plot(test_data['Close'], label='Test Data')
plt.plot(predictions_lstm_df['Predictions'], label='LSTM Predictions')
plt.title('LSTM Model - Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Evaluate the model
mae_lstm = mean_absolute_error(test_data['Close'][:len(predictions_lstm)], predictions_lstm)
rmse_lstm = np.sqrt(mean_squared_error(test_data['Close'][:len(predictions_lstm)], predictions_lstm))
mape_lstm = np.mean(np.abs(predictions_lstm - test_data['Close'][:len(predictions_lstm)].values) / np.abs(test_data['Close'][:len(predictions_lstm)].values))

print(f'LSTM MAE: {mae_lstm:.2f}')
print(f'LSTM RMSE: {rmse_lstm:.2f}')
print(f'LSTM MAPE: {mape_lstm:.2f}')

## 4. Model Comparison

In [None]:
print("ARIMA Model Metrics:")
print(f'MAE: {mae_arima:.2f}')
print(f'RMSE: {rmse_arima:.2f}')
print(f'MAPE: {mape_arima:.2f}')

print("LSTM Model Metrics:")
print(f'MAE: {mae_lstm:.2f}')
print(f'RMSE: {rmse_lstm:.2f}')
print(f'MAPE: {mape_lstm:.2f}')