In [0]:
pip install tensorflow yfinance matplotlib pandas numpy scikit-learn


# Cell 1: Imports and Setup (Code)

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import yfinance as yf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, SimpleRNN, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')



# Cell 2: Load and Explore Data (Markdown + Code)

In [0]:
# Download data
df = pd.read_csv('/Workspace/Users/rsangramofficial@gmail.com/EDA/Tesla Stock/TSLA.csv')
print("Shape:", df.shape)
print(df.head(15))
print(df.tail())

Cell 3: Data Cleaning and Handling Missing Values (Markdown + Code)
Markdown:## 2. Data Cleaning
- Check for missing values.
- Stock data has gaps on non-trading days â†’ forward fill preserves trend.
- Different from non-time-series (e.g., mean imputation); ffill respects temporal order.

In [0]:
print("Missing values:\n", df.isna().sum())

# Handle missing (forward fill)
df['Adj Close'] = df['Adj Close'].fillna(method='ffill')
df = df.fillna(method='ffill')  # In case any others

# Use only Adj Close for prediction
data = df[['Adj Close']]

## 3. EDA and Visualization

In [0]:
plt.figure(figsize=(16, 8))

plt.subplot(2, 1, 1)
plt.plot(data['Adj Close'], label='Adj Close Price')
plt.title('Tesla Adjusted Close Price Over Time')
plt.legend()

plt.subplot(2, 1, 2)
plt.plot(df['Volume'], color='orange')
plt.title('Trading Volume Over Time')
plt.tight_layout()
plt.show()

# Daily returns
df['Daily Return'] = df['Adj Close'].pct_change()
plt.figure(figsize=(12, 4))
plt.hist(df['Daily Return'].dropna(), bins=100)
plt.title('Distribution of Daily Returns')
plt.show()

# Rolling statistics
df['Rolling Mean 30'] = df['Adj Close'].rolling(window=30).mean()
plt.figure(figsize=(14, 6))
plt.plot(df['Adj Close'], label='Adj Close')
plt.plot(df['Rolling Mean 30'], label='30-Day Rolling Mean', color='red')
plt.legend()
plt.title('Price with Rolling Mean')
plt.show()

## 4. Preprocessing
- Scale data to [0,1]
- Create sequences of 60 days to predict next day

In [0]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

seq_length = 60

def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        xs.append(data[i:i + seq_length])
        ys.append(data[i + seq_length])
    return np.array(xs), np.array(ys)

X, y = create_sequences(scaled_data, seq_length)
X = X.reshape((X.shape[0], seq_length, 1))

# Train-test split (80-20, chronological)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

# Build and Train LSTM Model (Code)

In [0]:
# LSTM Model
model_lstm = Sequential()
model_lstm.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(50, return_sequences=False))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(25))
model_lstm.add(Dense(1))

model_lstm.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model_lstm.summary()

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history_lstm = model_lstm.fit(
    X_train, y_train,
    batch_size=32,
    epochs=100,
    validation_data=(X_test, y_test),
    callbacks=[early_stop],
    verbose=1
)

# Save model
model_lstm.save('lstm_tesla_model.h5')

# Build and Train SimpleRNN Model (Code)

In [0]:
seq_length = 60
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dropout, Dense
from tensorflow.keras.optimizers import Adam

# SimpleRNN Model
model_rnn = Sequential()
model_rnn.add(SimpleRNN(50, return_sequences=True, input_shape=(seq_length, 1)))
model_rnn.add(Dropout(0.2))
model_rnn.add(SimpleRNN(50, return_sequences=False))
model_rnn.add(Dropout(0.2))
model_rnn.add(Dense(25))
model_rnn.add(Dense(1))

model_rnn.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model_rnn.summary()

history_rnn = model_rnn.fit(
    X_train, y_train,
    batch_size=32,
    epochs=100,
    validation_data=(X_test, y_test),
    callbacks=[early_stop],
    verbose=1
)

model_rnn.save('rnn_tesla_model.h5')

# Evaluation and Comparison (Code)

In [0]:
# ERROR FIX INSTRUCTION
# The variable 'model_lstm' is not defined because the cell that creates it (Cell 13) was not run.
# To fix: Run Cell 13 (which builds and trains the LSTM model) before running this cell.
# Do NOT change any code in this cell. Just ensure Cell 13 is executed first.

# Predictions (1-day ahead on test set)
pred_lstm = model_lstm.predict(X_test)
pred_lstm = scaler.inverse_transform(pred_lstm)
y_test_inv = scaler.inverse_transform(y_test)

pred_rnn = model_rnn.predict(X_test)
pred_rnn = scaler.inverse_transform(pred_rnn)

mse_lstm = mean_squared_error(y_test_inv, pred_lstm)
mse_rnn = mean_squared_error(y_test_inv, pred_rnn)

print(f"LSTM Test MSE: {mse_lstm:.2f}")
print(f"SimpleRNN Test MSE: {mse_rnn:.2f}")
print(f"LSTM performs better by {(mse_rnn - mse_lstm)/mse_rnn*100:.2f}% lower error")

# Plot predictions vs actual
test_dates = df.index[-len(y_test):]
plt.figure(figsize=(16, 6))
plt.plot(test_dates, y_test_inv, label='Actual')
plt.plot(test_dates, pred_lstm, label='LSTM Predicted')
plt.plot(test_dates, pred_rnn, label='SimpleRNN Predicted')
plt.title('Actual vs Predicted (Test Set)')
plt.legend()
plt.show()

# Multi-Day Predictions (1, 5, 10 days ahead) (Code)

In [0]:
def predict_future(model, last_sequence, days):
    predictions = []
    current = last_sequence.copy()
    for _ in range(days):
        pred = model.predict(current.reshape(1, seq_length, 1), verbose=0)
        predictions.append(pred[0, 0])
        current = np.append(current[1:], pred)
    return scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Last sequence
last_seq = scaled_data[-seq_length:]

future_days = [1, 5, 10]
print("LSTM Predictions:")
for days in future_days:
    pred = predict_future(model_lstm, last_seq, days)
    print(f"Next {days} day(s): {pred.flatten()[-1]:.2f}" if days == 1 else f"Day {days}: {pred.flatten()[-1]:.2f}")

print("\nSimpleRNN Predictions:")
for days in future_days:
    pred = predict_future(model_rnn, last_seq, days)
    print(f"Next {days} day(s): {pred.flatten()[-1]:.2f}" if days == 1 else f"Day {days}: {pred.flatten()[-1]:.2f}")

# Insights and Conclusion (Markdown)

## Insights & Conclusion
- LSTM consistently outperforms SimpleRNN (lower MSE) due to better handling of long-term dependencies via gating mechanisms.
- SimpleRNN suffers from vanishing gradients on long sequences.
- The models capture general trends but struggle with sudden market events.
- Limitations: Univariate model; real markets affected by news, macro events, sentiment.
- Improvements: Add features (volume, news sentiment via NLP), try GRU/Transformers, ensemble methods.
- Business Use: Supports algorithmic trading, risk management, portfolio decisions. Recursive forecasting enables 1/5/10-day outlook.

