<a href="https://colab.research.google.com/github/SankarSivan/Stock-Price-Prediction-Apple/blob/main/Stock_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


In [None]:
import sklearn

In [None]:
# prompt: import data from github

url = 'https://raw.githubusercontent.com/SankarSivan/Stock-Price-Prediction-Apple/main/AAPL.csv'
df = pd.read_csv(url)
print(df.head())

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
# Check for missing values
print(df.isnull().sum())

In [None]:
# Forward fill missing values
df.fillna(method='ffill', inplace=True)

In [None]:
# Ensure no remaining nulls
print(df.isnull().sum())

In [None]:
df.duplicated()

In [None]:
df.info()

In [None]:
from datetime import date
df['Date'] = df['Date'].apply(pd.to_datetime)

In [None]:
df.columns

In [None]:
df.drop(columns= ['Open', 'High', 'Low', 'Adj Close', 'Volume'], inplace =True)

In [None]:
df.info()

In [None]:
df['Close'].plot(figsize=(12,6))

In [None]:
pip install tensorflow

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[['Close']])

In [None]:
# Create sequences
def create_sequences(data, seq_len=60, pred_gap=1):
    X, y = [], []
    for i in range(len(data) - seq_len - pred_gap + 1):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len+pred_gap-1])  # Predicting gap days ahead
    return np.array(X), np.array(y)

In [None]:
# Example: For 1, 5, 10 day forecasts
X_1, y_1 = create_sequences(scaled_data, pred_gap=1)
X_5, y_5 = create_sequences(scaled_data, pred_gap=5)
X_10, y_10 = create_sequences(scaled_data, pred_gap=10)

In [None]:
# Train-test split
X, y = create_sequences(scaled_data, pred_gap=1)
split = int(0.75 * len(X_1))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

## Simple RNN Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [None]:
def build_rnn_model(input_shape):
    model = Sequential()
    model.add(SimpleRNN(units=50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
rnn_model = build_rnn_model((X_train.shape[1], X_train.shape[2]))
rnn_model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2)

## LSTM Model

In [None]:
from tensorflow.keras.layers import LSTM, Dense, Dropout
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.optimizers import Adam

In [None]:
# model building
def build_lstm_model(input_shape, units = 50, Dropout_rate = 0.2, learning_rate = 0.001):
    model = Sequential()
    model.add(LSTM(units = units, return_sequences = False, input_shape = input_shape))
    model.add(Dropout(Dropout_rate))
    model.add(Dense(1))
    model.compile ( optimizer = Adam(learning_rate = learning_rate), loss = 'mse')
    return model

# Model Excution
lstm_model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
lstm_model.fit(X_train, y_train, epochs = 20, batch_size = 64, validation_split =0.2)

## Evaluate Models

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
def evaluate(model, X_test, y_test, scaler):
    pred_scaled = model.predict(X_test)
    pred = scaler.inverse_transform(np.concatenate([pred_scaled, np.zeros((len(pred_scaled), 1))], axis=1))[:,0]
    true = scaler.inverse_transform(np.concatenate([y_test.reshape(-1,1), np.zeros((len(y_test), 1))], axis=1))[:,0]
    rmse = np.sqrt(mean_squared_error(true, pred))
    mae = mean_absolute_error(true, pred)
    return rmse, mae

rmse_rnn, mae_rnn = evaluate(rnn_model, X_test, y_test, scaler)
rmse_lstm, mae_lstm = evaluate(lstm_model, X_test, y_test, scaler)

print(f"SimpleRNN RMSE: {rmse_rnn:.2f}, MAE: {mae_rnn:.2f}")
print(f"LSTM RMSE: {rmse_lstm:.2f}, MAE: {mae_lstm:.2f}")

In [None]:
# predict and inverse scale
y_pred = lstm_model.predict(X_test)
y_pred_rescaled = scaler.inverse_transform(np.concatenate([y_pred, np.zeros((len(y_pred),df.shape[1]-1))], axis = 1))[:,0]
y_test_rescaled = scaler.inverse_transform(np.concatenate([y_test.reshape(-1,1), np.zeros((len(y_test), df.shape[1]-1))], axis = 1)) [:,0]


In [None]:
# Model Evaluation
results = []

In [None]:
# plot the Actual vs Predicted
plt.figure(figsize = (12,5) )
plt.plot(y_test_rescaled, label = 'Actual')
plt.plot(y_pred_rescaled, label = 'Predicted')
plt.legend()
plt.title(' Stock Price Prediction')

# Hyperparameter Tuning with GridSearchCV

In [None]:
pip install scikeras tensorflow scikit-learn

In [None]:
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

### Manual Hyperparameter Tuning

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np

# Assuming X_train, X_test, y_train, y_test are already defined and scaled

# Define the hyperparameter grid
param_grid = {
    'units': [50, 75],
    'Dropout_rate': [0.1, 0.2],
    'learning_rate': [0.01]
}

best_loss = float('inf')
best_params = None
results = []

# Manual Grid Search
for units in param_grid['units']:
    for dropout_rate in param_grid['Dropout_rate']:
        for learning_rate in param_grid['learning_rate']:
            print(f"Training with units={units}, dropout_rate={dropout_rate}, learning_rate={learning_rate}")

            # Build the model with current hyperparameters
            model = Sequential()
            model.add(LSTM(units=units, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
            model.add(Dropout(dropout_rate))
            model.add(Dense(1))
            optimizer = Adam(learning_rate=learning_rate)
            model.compile(optimizer=optimizer, loss='mse')

            # Train the model
            history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2, verbose=0)

            # Evaluate the model on the test set
            loss = model.evaluate(X_test, y_test, verbose=0)

            print(f"Test Loss: {loss}")

            results.append({
                'units': units,
                'dropout_rate': dropout_rate,
                'learning_rate': learning_rate,
                'test_loss': loss
            })

            # Check if this is the best model
            if loss < best_loss:
                best_loss = loss
                best_params = {'units': units, 'dropout_rate': dropout_rate, 'learning_rate': learning_rate}

print("\nBest Hyperparameters:")
print(best_params)
print(f"Best Test Loss: {best_loss}")

# You can further analyze the results list to see all combinations and their performance
# print("\nAll Results:")
# for result in results:
#     print(result)