# Importing necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf 

In [None]:
# Load the dataset (provide a direct link to the dataset)
df = pd.read_csv('Data/AAPL.csv')

# Display the first few rows of the dataset
print("Initial dataset:")
print(df.head())

# Cleaning Data

In [None]:
# check for duplicates
duplicate_dates = df[df.duplicated(subset='Date')]
print("Duplicate Dates:")
print(duplicate_dates)

# Check for missing values
print("\nChecking for missing values:")
print(df.isnull().sum())

# Exploratory Data Analysis (EDA)

In [None]:
# Convert 'Date' column to datetime format and set it as index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [None]:
# Plotting Histogram
plt.figure(figsize=(20, 8))
plt.hist(df['Adj Close'], bins=20, color='LimeGreen')
plt.title('Histogram of Apple Stock Prices')
plt.xlabel('Apple Stocks')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Plotting Scatter Plot
plt.figure(figsize=(20, 8))
sns.scatterplot(df['Adj Close'])
plt.title('Apple Stocks from 2021 to 2022')
plt.ylabel('Stocks')
plt.xlabel('Date')
plt.xticks(rotation=90)
plt.show()

In [None]:
# Plotting Time Series Trend
plt.figure(figsize=(20, 8))
plt.plot(df['Adj Close'])
plt.title('Apple Stocks Trend')
plt.show()

In [None]:
# Pairplot to visualize relationships between different features
sns.pairplot(df)
plt.title('Pairplot of Features')
plt.show()

In [None]:
# Correlation matrix to identify linear relationships
correlation_matrix = df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Matrix')
plt.show()

# Modeling

In [None]:
# Extract the 'Close' column for further analysis
closing_prices = df.filter(['Close']).values

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(closing_prices)

# Create training dataset
train_data = scaled_prices[:int(len(scaled_prices) * 0.95), :]
train_features, train_labels = [], []

# Prepare training data
for i in range(60, len(train_data)):
  train_features.append(train_data[i-60:i, 0])
  train_labels.append(train_data[i, 0])

train_features, train_labels = np.array(train_features), np.array(train_labels)

# Reshape the data for LSTM
train_features = np.reshape(train_features, (train_features.shape[0], train_features.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(train_features.shape[1], 1)))
model.add(LSTM(units=50, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model to the training data
model.fit(train_features, train_labels, epochs=25, batch_size=32)

# Create the testing dataset
test_data = scaled_prices[int(len(scaled_prices) * 0.95) - 60:, :]
x_test, y_test = [], closing_prices[int(len(closing_prices) * 0.95):, :]

# Prepare testing data
for i in range(60, len(test_data)):
  x_test.append(test_data[i-60:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Visualizing the results

In [None]:
training_set = df.iloc[:int(len(closing_prices) * 0.95)]
validation_set = df.iloc[int(len(closing_prices) * 0.95):]
validation_set.loc[:, 'Predictions'] = predictions.copy()

In [None]:
# Visualize the predicted prices compared to actual prices
plt.figure(figsize=(16, 8))
plt.title('Stock Price Prediction using LSTM')
plt.xlabel('Date')
plt.ylabel('Close Price USD ($)')
plt.plot(training_set['Close'], label='Training Data')
plt.plot(validation_set[['Close', 'Predictions']], label=['Actual Prices', 'Predicted Prices'])
plt.legend(loc='lower right')
plt.show()

# Evaluation

In [None]:
# Model Evaluation Metrics
mse_lstm = mean_squared_error(validation_set['Close'], validation_set['Predictions'])
rmse_lstm = np.sqrt(mse_lstm)
mae_lstm = mean_absolute_error(validation_set['Close'], validation_set['Predictions'])
mape_lstm = np.mean(np.abs((validation_set['Close'] - validation_set['Predictions']) / validation_set['Close'])) * 100
r2_lstm = r2_score(validation_set['Close'], validation_set['Predictions'])

print('\nLSTM Model Evaluation:')
print(f'MSE: {mse_lstm:.2f}')
print(f'RMSE: {rmse_lstm:.2f}')
print(f'MAE: {mae_lstm:.2f}')
print(f'MAPE: {mape_lstm:.2f}%')
print(f'R2 Score: {r2_lstm:.2f}')

# Additional Insights
start_date_predictions = validation_set.index[0]
print(f'The LSTM model predictions start from {start_date_predictions}')