# Stock Price Prediction using LSTM
This notebook trains an LSTM model to predict if the stock price will go up the next day based on historical stock price data.


## Import Libraries

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error

ModuleNotFoundError: No module named 'sqlalchemy'

## Database Connection and Data Fetching

In [None]:
# Database connection setup using SQLAlchemy
db_url = "mysql://root:admin123@localhost/Stock_Price_Data"
engine = create_engine(db_url)

# Fetching data from the table into a DataFrame
query = "SELECT * FROM `500002`"
df = pd.read_sql(query, engine)

# Close the SQLAlchemy engine
engine.dispose()

## Data Preprocessing
Converting the Date column to datetime type and setting it as the index


In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Sorting the DataFrame by index (oldest to latest)
df.sort_index(inplace=True)

# Creating a new column 'Price_Up' indicating if the price will go up the next day
df['Price_Up'] = (df['Close Price'].shift(-1) > df['Close Price']).astype('float32')

# Select only 'Close Price' and 'Price_Up' columns
df = df[['Close Price', 'Price_Up']]

# Check for NaN values
assert not df.isnull().values.any(), "NaN values found in the dataset!"

# Normalize the data
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
assert not df_scaled.isnull().values.any(), "NaN values found after scaling!"

## Creating Sequences for LSTM
Convert data to supervised time series format

In [None]:
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data.iloc[i:(i + seq_length)].values
        y = data['Price_Up'].iloc[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs, dtype='float32'), np.array(ys, dtype='float32')

seq_length = 10  # Use the past 10 days to predict the next
X, y = create_sequences(df_scaled, seq_length)

## Splitting the Data into Train and Test Sets

In [None]:
train_size = int(0.7 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

## Building the LSTM Model

In [None]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]), kernel_initializer='he_uniform'))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True, kernel_initializer='he_uniform'))
model.add(Dropout(0.2))
model.add(LSTM(units=50, kernel_initializer='he_uniform'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))

optimizer = Adam(learning_rate=0.001, clipvalue=1.0)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

## Training the Model
Using Early Stopping to prevent overfitting

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stop])

## Evaluating the Model

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

## Predicting and Calculating RMSE

In [None]:
y_pred = model.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse:.4f}")