In [None]:
# Cell 1: ARIMA Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Check if data is loaded before proceeding
try:
    data = pd.read_csv('../data/processed/daily_returns.csv', index_col=0, parse_dates=True)
except FileNotFoundError:
    print("Preprocessed data not found. Please run the EDA notebook first.")
    data = None

if data is not None:
    # Use pmdarima to find the best ARIMA parameters
    from pmdarima import auto_arima

    tsla_returns = data['Adj Close_TSLA'].dropna()
    print("\nTraining ARIMA model on TSLA daily returns...")

    # The 'd' parameter is set to 0 because returns are stationary
    arima_model = auto_arima(tsla_returns, start_p=1, start_q=1,
                            max_p=5, max_q=5, m=1,
                            start_P=0, seasonal=False,
                            d=0, trace=True,
                            error_action='ignore',
                            suppress_warnings=True,
                            stepwise=True)

    print("\nARIMA Model Summary:")
    print(arima_model.summary())

    # Forecast future returns (e.g., 60 trading days)
    n_periods = 60
    forecast, conf_int = arima_model.predict(n_periods=n_periods, return_conf_int=True)

    # Convert forecast to a pandas series with a future date index
    forecast_index = pd.date_range(start=tsla_returns.index[-1], periods=n_periods + 1, freq='B')[1:]
    forecast_series = pd.Series(forecast, index=forecast_index)
    conf_int_df = pd.DataFrame(conf_int, index=forecast_index, columns=['lower_bound', 'upper_bound'])

    # Plot the results
    plt.figure(figsize=(15, 7))
    plt.plot(tsla_returns[-180:], label='Historical Returns')
    plt.plot(forecast_series, label='ARIMA Forecast', color='red')
    plt.fill_between(conf_int_df.index, conf_int_df['lower_bound'], conf_int_df['upper_bound'], color='pink', alpha=0.3, label='Confidence Interval')
    plt.title('TSLA Daily Returns: ARIMA Forecast')
    plt.xlabel('Date')
    plt.ylabel('Daily Return')
    plt.legend()
    plt.grid(True)
    plt.show()

: 

In [None]:
# Cell 2: LSTM Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Check if data is loaded before proceeding
try:
    data = pd.read_csv('../data/processed/daily_returns.csv', index_col=0, parse_dates=True)
except FileNotFoundError:
    print("Preprocessed data not found. Please run the EDA notebook first.")
    data = None

if data is not None:
    from sklearn.preprocessing import MinMaxScaler
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense

    tsla_returns = data['Adj Close_TSLA'].values.reshape(-1, 1)

    # Normalize the data for the LSTM
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(tsla_returns)

    # Create a function to convert the time series into a supervised learning problem
    def create_dataset(dataset, look_back=1):
        dataX, dataY = [], []
        for i in range(len(dataset) - look_back - 1):
            a = dataset[i:(i + look_back), 0]
            dataX.append(a)
            dataY.append(dataset[i + look_back, 0])
        return np.array(dataX), np.array(dataY)

    # Split into training and testing sets
    train_size = int(len(scaled_data) * 0.8)
    train_data = scaled_data[0:train_size, :]
    test_data = scaled_data[train_size:len(scaled_data), :]

    # Reshape the data for the LSTM model
    look_back = 60
    X_train, y_train = create_dataset(train_data, look_back)
    X_test, y_test = create_dataset(test_data, look_back)

    # Reshape input to be [samples, time steps, features]
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    # Build the LSTM model
    print("\nBuilding and training the LSTM model...")
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=1)

    # Make predictions
    train_predict = model.predict(X_train)
    test_predict = model.predict(X_test)

    # Inverse transform predictions to original scale
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)
    y_train = scaler.inverse_transform([y_train])
    y_test = scaler.inverse_transform([y_test])

    # Plot the results
    plt.figure(figsize=(15, 7))
    plt.plot(scaler.inverse_transform(scaled_data), label='Original Returns')
    plt.plot(np.arange(look_back, len(train_predict) + look_back), train_predict, label='Training Prediction', color='green')
    plt.plot(np.arange(len(train_predict) + (2 * look_back) + 1, len(scaled_data) - 1), test_predict, label='Testing Prediction', color='red')
    plt.title('TSLA Daily Returns: LSTM Model Predictions')
    plt.xlabel('Time Step')
    plt.ylabel('Daily Return')
    plt.legend()
    plt.grid(True)
    plt.show()