<a href="https://www.kaggle.com/code/collinslemeke/tesla-stock-prediction?scriptVersionId=195742637" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Import all Necessary Libraries

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
# read the data for your use

data = pd.read_csv('/kaggle/input/tesla-stock-price-data/TSLA-2.csv')
print(data.columns) 
data

In [None]:
data['Date'] = pd.to_datetime(data['Date'])


data = data.sort_values('Date')


data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['DayOfWeek'] = data['Date'].dt.dayofweek
data['Quarter'] = data['Date'].dt.quarter


data.set_index('Date', inplace = True)

In [None]:
# features to be scaled, columns and extracted features
tsla = data[['Open', 'High', 'Low', 'Close', 'Volume', 'Year', 'Month', 'Day', 'DayOfWeek', 'Quarter']]
tsla

In [None]:
# perform MinMaxScaler wiht feature_range (0, 1)

scaler = MinMaxScaler(feature_range = (0, 1))
print(scaler)

cols_to_scale = ['Open', 'High', 'Low', 'Close', 'Volume', 'Year', 'Month', 'Day', 'DayOfWeek', 'Quarter']

tsla_scaled = tsla.copy()
tsla_scaled[cols_to_scale] = scaler.fit_transform(tsla[cols_to_scale])
tsla_scaled

### LSTM (Long Short-Term Memory) 

it's a type of Recurrent Neural Network (RNN) that is particularly good at handling sequential data, such as time series, where past information plays a key role in predicting future outcomes. In our case of stock price prediction, LSTM is useful because stock prices are often influenced by trends and patterns over time.


### Reshaping the Data for LSTM:

LSTM expects a 3D input in the format (samples, timesteps, features). This means you’ll need to restructure your data into sequences, where each sample will have a certain number of past data points (like 10 or 30 days) as input.

In [None]:
sequence_length = 10

X = []
y = []

for i in range(sequence_length, len(tsla_scaled)):
    X.append(tsla_scaled.iloc[i-sequence_length:i][cols_to_scale + ['Year', 'Month', 'Day', 'DayOfWeek', 'Quarter']].values)
    y.append(tsla_scaled.iloc[i]['Close'])
    
X, y = np.array(X), np.array(y)

X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))

print("Sahpe of X:", X.shape)
print("Shape of y:", y.shape)

### Building the LSTM Model:
Now that the data is preprocessed and reshaped, you can define and train an LSTM model.



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

model = Sequential()

model.add(LSTM(units = 64, return_sequences = True, input_shape = (sequence_length, X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dropout(0.2))
model.add(Dense(1))


optimizer = keras.optimizers.Adam(learning_rate = 0.001)
model.compile(optimizer = 'adam', loss = 'mse')

early_stopping = EarlyStopping(
    monitor = 'val_loss',
    patience = 5,
    restore_best_weights = True)

history = model.fit(
    X_train, y_train,
    epochs = 100,
    batch_size = 64,
    validation_data = (X_test, y_test),
    callbacks = [early_stopping]
)

### Visualise the Training Epochs

In [None]:
from matplotlib.ticker import MaxNLocator

def plot_enhanced_loss(history, save_path=None):
    # Set the style
    plt.style.use('seaborn-whitegrid')
    sns.set_palette("deep")

    # Create the plot
    fig, ax = plt.subplots(figsize=(12, 8))

    # Plot the losses
    epochs = range(1, len(history.history['loss']) + 1)
    ax.plot(epochs, history.history['loss'], 'b-', linewidth=2, label='Training Loss')
    ax.plot(epochs, history.history['val_loss'], 'r-', linewidth=2, label='Validation Loss')

    # Add moving averages
    window_size = 5
    train_ma = np.convolve(history.history['loss'], np.ones(window_size)/window_size, mode='valid')
    val_ma = np.convolve(history.history['val_loss'], np.ones(window_size)/window_size, mode='valid')
    ax.plot(epochs[window_size-1:], train_ma, 'b--', linewidth=1, alpha=0.7, label='Training MA')
    ax.plot(epochs[window_size-1:], val_ma, 'r--', linewidth=1, alpha=0.7, label='Validation MA')

    # Customize the plot
    ax.set_title('Model Loss Over Epochs', fontsize=20, fontweight='bold')
    ax.set_xlabel('Epochs', fontsize=14)
    ax.set_ylabel('Loss', fontsize=14)
    ax.legend(fontsize=12)

    # Set x-axis to show integer values
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    # Add annotations for minimum losses
    min_train_loss = min(history.history['loss'])
    min_val_loss = min(history.history['val_loss'])
    min_train_epoch = history.history['loss'].index(min_train_loss) + 1
    min_val_epoch = history.history['val_loss'].index(min_val_loss) + 1

    ax.annotate(f'Min: {min_train_loss:.4f}', xy=(min_train_epoch, min_train_loss),
                xytext=(10, 10), textcoords='offset points', ha='left', va='bottom',
                bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))

    ax.annotate(f'Min: {min_val_loss:.4f}', xy=(min_val_epoch, min_val_loss),
                xytext=(10, -10), textcoords='offset points', ha='left', va='top',
                bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))

    # Add a text box with additional information
    info_text = f"Total Epochs: {len(epochs)}\n" \
                f"Final Train Loss: {history.history['loss'][-1]:.4f}\n" \
                f"Final Val Loss: {history.history['val_loss'][-1]:.4f}"
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(0.05, 0.95, info_text, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', bbox=props)

    # Improve the layout
    plt.tight_layout()

    # Save the plot if a save path is provided
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    # Show the plot
    plt.show()

# Usage
plot_enhanced_loss(history, save_path='model_loss_plot.png')

### Make Predictions & Visualise Predictions

In [None]:
# calculate the metrics

y_pred = model.predict(X_test)

print()
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared Score: {r2}")

In [None]:
# Flatten arrays if they are not one-dimensional
y_test = np.ravel(y_test)
y_pred = np.ravel(y_pred)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Create a DataFrame for visualization
df = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred})

# Plot
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot 1: Metrics
axes[0, 0].text(0.5, 0.8, f'Mean Squared Error (MSE): {mse:.6f}', ha='center', va='center', fontsize=12)
axes[0, 0].text(0.5, 0.6, f'Mean Absolute Error (MAE): {mae:.6f}', ha='center', va='center', fontsize=12)
axes[0, 0].text(0.5, 0.4, f'R-squared Score: {r2:.6f}', ha='center', va='center', fontsize=12)
axes[0, 0].axis('off')

# Plot 2: True vs. Predicted Values (Scatter Plot)
sns.scatterplot(data=df, x='True Values', y='Predicted Values', ax=axes[0, 1], alpha=0.6)
axes[0, 1].plot([df['True Values'].min(), df['True Values'].max()],
                [df['True Values'].min(), df['True Values'].max()], color='red', linestyle='--')
axes[0, 1].set_title('True vs. Predicted Values')
axes[0, 1].set_xlabel('True Values')
axes[0, 1].set_ylabel('Predicted Values')

# Plot 3: Histogram of Errors
errors = y_test - y_pred
sns.histplot(errors, kde=True, ax=axes[1, 0])
axes[1, 0].set_title('Distribution of Errors')
axes[1, 0].set_xlabel('Error')
axes[1, 0].set_ylabel('Frequency')

# Plot 4: Predicted vs. True Values (Line Plot)
sns.lineplot(x=df.index, y='True Values', data=df, ax=axes[1, 1], label='True Values')
sns.lineplot(x=df.index, y='Predicted Values', data=df, ax=axes[1, 1], label='Predicted Values', linestyle='--')
axes[1, 1].set_title('Predicted vs. True Values')
axes[1, 1].set_xlabel('Index')
axes[1, 1].set_ylabel('Values')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

In [None]:
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error on Unseen Data: {mse}')