# Stock Market Predictor using LSTM

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set date range for training data
start = '2010-01-01'
end = '2019-12-31'

# Download Apple stock data
print("Downloading Apple stock data...")
df = yf.download('AAPL', start, end)
print(f"Data shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")

In [None]:
# Handle MultiIndex columns if present
if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.droplevel(1)  # Remove ticker level

# Display basic info about the data
print("Data Info:")
print(df.info())
print("\nFirst 5 rows:")
print(df.head())
print("\nLast 5 rows:")
print(df.tail())

In [None]:
# Visualize the closing price
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'])
plt.title('Apple Stock Closing Price (2010-2019)')
plt.xlabel('Year')
plt.ylabel('Closing Price ($)')
plt.grid(True)
plt.show()

In [None]:
# Create moving averages for visualization
ma100 = df['Close'].rolling(100).mean()
ma200 = df['Close'].rolling(200).mean()

plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'], label='Close Price', alpha=0.7)
plt.plot(df.index, ma100, label='100-day MA', alpha=0.8)
plt.plot(df.index, ma200, label='200-day MA', alpha=0.8)
plt.title('Apple Stock with Moving Averages')
plt.xlabel('Year')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Prepare data for training
# Use only the closing price for prediction
data = df[['Close']].values
print(f"Data shape for training: {data.shape}")

# Split data into training and testing
training_data_len = int(len(data) * 0.70)
train_data = data[:training_data_len]
test_data = data[training_data_len:]

print(f"Training data length: {len(train_data)}")
print(f"Testing data length: {len(test_data)}")

In [None]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data = scaler.fit_transform(train_data)

print(f"Scaled training data shape: {scaled_train_data.shape}")
print(f"Min value: {scaled_train_data.min()}, Max value: {scaled_train_data.max()}")

In [None]:
# Create training dataset with 100 days lookback
x_train = []
y_train = []
time_step = 100

for i in range(time_step, len(scaled_train_data)):
    x_train.append(scaled_train_data[i-time_step:i, 0])
    y_train.append(scaled_train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape for LSTM input
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

print(f"X_train shape: {x_train.shape}")
print(f"Y_train shape: {y_train.shape}")

In [None]:
# Build the LSTM model
model = Sequential()

# First LSTM layer
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))

# Second LSTM layer
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

# Third LSTM layer
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

# Fourth LSTM layer
model.add(LSTM(units=50))
model.add(Dropout(0.2))

# Output layer
model.add(Dense(units=1))

print("Model architecture:")
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
print("Model compiled successfully!")

In [None]:
# Train the model
print("Training the model...")
history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_split=0.1, verbose=1)
print("Training completed!")

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Save the model
model.save('keras_model.h5')
print("Model saved as 'keras_model.h5'")

In [None]:
# Prepare test data
# Get the last 100 days from training data
last_100_days = train_data[-time_step:]
test_inputs = np.concatenate([last_100_days, test_data])
test_inputs = scaler.transform(test_inputs)

print(f"Test inputs shape: {test_inputs.shape}")

In [None]:
# Create test dataset
x_test = []
y_test = test_data.flatten()

for i in range(time_step, len(test_inputs)):
    x_test.append(test_inputs[i-time_step:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

print(f"X_test shape: {x_test.shape}")
print(f"Y_test shape: {y_test.shape}")

In [None]:
# Make predictions
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

print(f"Predictions shape: {predictions.shape}")
print(f"Sample predictions: {predictions[:5].flatten()}")

In [None]:
# Calculate metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_test, predictions.flatten())
mae = mean_absolute_error(y_test, predictions.flatten())
rmse = np.sqrt(mse)

print(f"Mean Squared Error: {mse:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")

In [None]:
# Plot the results
plt.figure(figsize=(14, 8))

# Get the test dates
test_dates = df.index[training_data_len:]

plt.plot(test_dates, y_test, color='blue', label='Actual Stock Price', linewidth=2)
plt.plot(test_dates, predictions.flatten(), color='red', label='Predicted Stock Price', linewidth=2)

plt.title('Apple Stock Price Prediction', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Stock Price ($)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("\nPrediction vs Actual comparison completed!")
print(f"Model performance metrics saved.")
print(f"Trained model saved as 'keras_model.h5' - ready for deployment!")

In [None]:
# Test compatibility with app.py structure
print("Testing compatibility with app.py...")
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('keras_model.h5')
print("✓ Model loads successfully")

# Test prediction format used in app.py
sample_prediction = loaded_model.predict(x_test[:1])
print(f"✓ Sample prediction shape: {sample_prediction.shape}")
print(f"✓ Sample prediction value: {sample_prediction[0][0]:.4f}")

print("\n🎉 Notebook completed successfully!")
print("📊 All graphs generated")
print("🤖 Model trained and saved")
print("✅ Ready for Streamlit app deployment")