# 📈 Stock Price Prediction with LSTM

**Project**: Time Series Forecasting - Deep Learning  
**Level**: Advanced  
**Dataset**: Stock Market Data (Synthetic)  

## 📋 Project Overview

This project predicts stock prices using Long Short-Term Memory (LSTM) neural networks. We'll learn:

- Time series forecasting fundamentals
- LSTM and RNN architectures
- Sequential data processing
- Technical indicators as features
- Financial ML challenges and ethics

⚠️ **Disclaimer**: This is for educational purposes only. Do NOT use for actual trading!

Let's build a stock prediction model! 📊

## 1. Import Libraries

In [None]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Preprocessing and metrics
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Technical indicators
import talib

# Utilities
import warnings
from datetime import datetime, timedelta
import random

warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ All libraries imported successfully!")
print(f"🧠 TensorFlow version: {tf.__version__}")
print(f"📈 Ready for stock price prediction!")

## 2. Data Generation and Exploration

In [None]:
# Generate synthetic stock data (realistic patterns)
np.random.seed(42)
tf.random.set_seed(42)

# Parameters
n_days = 1000  # About 3 years of trading days
initial_price = 100.0
volatility = 0.02  # 2% daily volatility
trend = 0.0005  # Slight upward trend

print(f"📊 Generating synthetic stock data...")
print(f"Trading days: {n_days:,}")
print(f"Initial price: ${initial_price:.2f}")
print(f"Daily volatility: {volatility:.1%}")

# Generate dates
start_date = datetime(2020, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(n_days)]

# Generate price using geometric Brownian motion with trends
prices = [initial_price]
volumes = []

for i in range(1, n_days):
    # Add some cyclical patterns and trends
    seasonal_factor = 1 + 0.1 * np.sin(2 * np.pi * i / 252)  # Yearly cycle
    weekly_factor = 1 + 0.05 * np.sin(2 * np.pi * i / 5)     # Weekly cycle
    
    # Random walk with drift
    random_change = np.random.normal(trend, volatility)
    
    # Apply factors
    price_change = random_change * seasonal_factor * weekly_factor
    new_price = prices[-1] * (1 + price_change)
    
    # Ensure price doesn't go negative
    new_price = max(new_price, 1.0)
    prices.append(new_price)

# Generate OHLC data
ohlc_data = []
for i, price in enumerate(prices):
    # Generate realistic OHLC from close price
    daily_volatility = np.random.uniform(0.005, 0.03)
    
    high = price * (1 + daily_volatility * np.random.uniform(0.3, 1.0))
    low = price * (1 - daily_volatility * np.random.uniform(0.3, 1.0))
    
    # Open is close of previous day with some gap
    if i == 0:
        open_price = price
    else:
        gap = np.random.normal(0, 0.01)
        open_price = prices[i-1] * (1 + gap)
    
    # Ensure OHLC relationships
    high = max(high, open_price, price)
    low = min(low, open_price, price)
    
    # Volume (inversely related to price changes)
    price_change = abs(price - (prices[i-1] if i > 0 else price)) / price
    base_volume = 1000000
    volume = int(base_volume * (1 + 2 * price_change) * np.random.uniform(0.5, 2.0))
    
    ohlc_data.append({
        'Date': dates[i],
        'Open': open_price,
        'High': high,
        'Low': low,
        'Close': price,
        'Volume': volume
    })

# Create DataFrame
df = pd.DataFrame(ohlc_data)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

print(f"\n📈 Stock dataset created successfully!")
print(f"Dataset shape: {df.shape}")
print(f"Date range: {df.index.min().date()} to {df.index.max().date()}")
print(f"Price range: ${df['Close'].min():.2f} - ${df['Close'].max():.2f}")
print(f"Average daily volume: {df['Volume'].mean():,.0f}")

In [None]:
# Display basic information
print("📊 Dataset Information:")
print(f"Total trading days: {len(df):,}")
print(f"Features: {list(df.columns)}")
print(f"Missing values: {df.isnull().sum().sum()}")

print(f"\n💰 Price Statistics:")
print(f"• Starting price: ${df['Close'].iloc[0]:.2f}")
print(f"• Ending price: ${df['Close'].iloc[-1]:.2f}")
print(f"• Total return: {(df['Close'].iloc[-1] / df['Close'].iloc[0] - 1):.1%}")
print(f"• Average daily return: {df['Close'].pct_change().mean():.3%}")
print(f"• Daily volatility: {df['Close'].pct_change().std():.3%}")
print(f"• Annualized volatility: {df['Close'].pct_change().std() * np.sqrt(252):.1%}")

print("\n📈 Statistical Summary:")
print(df.describe().round(2))

## 3. Exploratory Data Analysis

In [None]:
# Stock price visualization
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('📈 Stock Market Analysis', fontsize=16, fontweight='bold')

# Price chart
axes[0,0].plot(df.index, df['Close'], linewidth=1, color='blue')
axes[0,0].set_title('💰 Stock Price Over Time')
axes[0,0].set_xlabel('Date')
axes[0,0].set_ylabel('Price ($)')
axes[0,0].grid(True, alpha=0.3)

# Volume chart
axes[0,1].bar(df.index, df['Volume'], width=1, alpha=0.7, color='orange')
axes[0,1].set_title('📊 Trading Volume')
axes[0,1].set_xlabel('Date')
axes[0,1].set_ylabel('Volume')
axes[0,1].grid(True, alpha=0.3)

# Daily returns distribution
daily_returns = df['Close'].pct_change().dropna()
axes[1,0].hist(daily_returns, bins=50, alpha=0.7, color='green', density=True)
axes[1,0].axvline(daily_returns.mean(), color='red', linestyle='--', 
                  label=f'Mean: {daily_returns.mean():.3%}')
axes[1,0].set_title('📊 Daily Returns Distribution')
axes[1,0].set_xlabel('Daily Return')
axes[1,0].set_ylabel('Density')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Price vs Volume scatter
price_change = df['Close'].pct_change().abs()
axes[1,1].scatter(price_change, df['Volume'], alpha=0.5, color='purple')
axes[1,1].set_title('💹 Price Change vs Volume')
axes[1,1].set_xlabel('Absolute Price Change')
axes[1,1].set_ylabel('Volume')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Calculate key metrics
print(f"📊 Key Market Metrics:")
print(f"• Sharpe Ratio: {daily_returns.mean() / daily_returns.std() * np.sqrt(252):.2f}")
print(f"• Maximum Drawdown: {((df['Close'] / df['Close'].cummax()) - 1).min():.1%}")
print(f"• Positive Days: {(daily_returns > 0).mean():.1%}")
print(f"• Best Day: {daily_returns.max():.1%}")
print(f"• Worst Day: {daily_returns.min():.1%}")

In [None]:
# Technical indicators
print("🔧 Calculating technical indicators...")

# Moving averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_20'] = df['Close'].rolling(window=20).mean()
df['MA_50'] = df['Close'].rolling(window=50).mean()

# Bollinger Bands
df['BB_upper'] = df['MA_20'] + (df['Close'].rolling(window=20).std() * 2)
df['BB_lower'] = df['MA_20'] - (df['Close'].rolling(window=20).std() * 2)

# RSI (Relative Strength Index)
def calculate_rsi(prices, window=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

df['RSI'] = calculate_rsi(df['Close'])

# MACD
exp1 = df['Close'].ewm(span=12).mean()
exp2 = df['Close'].ewm(span=26).mean()
df['MACD'] = exp1 - exp2
df['MACD_signal'] = df['MACD'].ewm(span=9).mean()

# Volatility (rolling standard deviation)
df['Volatility'] = df['Close'].pct_change().rolling(window=20).std()

# Price momentum
df['Momentum'] = df['Close'] / df['Close'].shift(10) - 1

print(f"✅ Technical indicators calculated!")
print(f"New features: {[col for col in df.columns if col not in ['Open', 'High', 'Low', 'Close', 'Volume']]}")

# Remove NaN values
df_clean = df.dropna()
print(f"\n📊 Clean dataset shape: {df_clean.shape}")
print(f"Rows removed due to NaN: {len(df) - len(df_clean)}")