In [19]:
# Installs the yfinance library, which allows easy access to historical market data from Yahoo Finance.
!pip install yfinance



In [21]:
# Installs the streamlit library for building the user interface of the model.
!pip install streamlit



In [24]:
# fundamental package for numerical computations in Python.
import numpy as np

# A library used for data manipulation and analysis, especially for handling data in DataFrame format.
import pandas as pd

# library to download historical stock price data.
import yfinance as yf

# the load_model function from Keras, which is used to load a pre-trained deep learning model.
from keras.models import load_model

# create a web-based user interface.
import streamlit as st

# create plots and visualizations.
import matplotlib.pyplot as plt

In [26]:
# Define the start and end dates
start = '2012-01-01'
end = '2025-08-11'

# Define the stock ticker symbol, This ticker symbol is used to download the stock data.
stock = 'GooG'

try:
    # Downloads the stock data for the specified ticker within the defined date range.
    data = yf.download(stock, start=start, end=end)
    
    # Downloads the stock data for the specified ticker within the defined date range.
    if data.empty:
        #If the data is empty, print a message indicating no data was found.
        print(f"No data found for ticker {stock}.")
        
    # If data is successfully downloaded, proceed to print the first few rows.
    else:
        print(data.head())  # Display the first 5 rows of data to verify that it has been downloaded correctly.

# Catches any exceptions that occur during data download.
except Exception as e: 
    print(f"Failed to download data: {e}") # Prints an error message if the data download fails.

[*********************100%***********************]  1 of 1 completed

                 Open       High        Low      Close  Adj Close     Volume
Date                                                                        
2012-01-03  16.262545  16.641375  16.248346  16.573130  16.554291  147611217
2012-01-04  16.563665  16.693678  16.453827  16.644611  16.625692  114989399
2012-01-05  16.491436  16.537264  16.344486  16.413727  16.395069  131808205
2012-01-06  16.417213  16.438385  16.184088  16.189817  16.171415  108119746
2012-01-09  16.102144  16.114599  15.472754  15.503389  15.485767  233776981





In [27]:
# Display the first 10 rows of the data to verify the download
print(data.head(10))

                 Open       High        Low      Close  Adj Close     Volume
Date                                                                        
2012-01-03  16.262545  16.641375  16.248346  16.573130  16.554291  147611217
2012-01-04  16.563665  16.693678  16.453827  16.644611  16.625692  114989399
2012-01-05  16.491436  16.537264  16.344486  16.413727  16.395069  131808205
2012-01-06  16.417213  16.438385  16.184088  16.189817  16.171415  108119746
2012-01-09  16.102144  16.114599  15.472754  15.503389  15.485767  233776981
2012-01-10  15.684959  15.785831  15.365158  15.520326  15.502685  176483032
2012-01-11  15.529292  15.675993  15.470015  15.590563  15.572842   96359832
2012-01-12  15.721572  15.763166  15.604012  15.682219  15.664393   75289148
2012-01-13  15.598035  15.615220  15.468520  15.566403  15.548710   92637933
2012-01-17  15.740501  15.740501  15.583589  15.655818  15.638022   76658261


In [28]:
# Displays the last 10 rows of the downloaded stock data to inspect the most recent data points.
print(data.tail(10))

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2024-08-23  166.550003  167.949997  165.660004  167.429993  167.429993   
2024-08-26  168.154999  169.380005  166.320007  167.929993  167.929993   
2024-08-27  167.610001  168.244995  166.160004  166.380005  166.380005   
2024-08-28  166.779999  167.389999  163.279999  164.500000  164.500000   
2024-08-29  166.059998  167.630005  161.981995  163.399994  163.399994   
2024-08-30  164.220001  165.279999  163.410004  165.110001  165.110001   
2024-09-03  163.315002  163.380005  157.854996  158.610001  158.610001   
2024-09-04  158.074997  160.399994  157.440002  157.809998  157.809998   
2024-09-05  157.779999  161.014999  157.520004  158.600006  158.600006   
2024-09-06  158.690002  159.220001  151.934998  152.130005  152.130005   

              Volume  
Date                  
2024-08-23  14281600  
2024-08-26  11990300  
2024-08-27  1371820

In [29]:
# data.Close: Accesses the 'Close' prices from the downloaded stock data.
# rolling(100): Creates a rolling window of 100 days over the 'Close' prices.
# mean(): Calculates the mean of the 'Close' prices within each 100-day window, effectively creating a 100-day moving average.
# ma_100_days: Stores the calculated 100-day moving average of the 'Close' prices.


# Calculate the 100-day moving average of the stock's closing prices for trend analysis.
ma_100_days = data.Close.rolling(100).mean()

In [None]:
# Set the matplotlib backend to TkAgg to ensure plots are displayed correctly in a separate window.
plt.switch_backend('TkAgg')

# Creates a new figure for plotting with a specified size of 8 inches by 6 inches. The figsize parameter controls the dimensions of the plot.
plt.figure(figsize=(8,6))

# Plot the 100-day moving average in red and label it as '100 Days MA'.
plt.plot(ma_100_days, 'r', label='100 Days MA')

# Plot the closing prices in green and label it as 'Close Price'.
plt.plot(data.Close, 'g', label='Close Price')

# Add a legend to the plot to differentiate between the 100-day moving average and the closing price. 
plt.legend()

# Add a title and labels to the x-axis (Year) and y-axis (Price) to make the plot more informative.
plt.title('100 Days Moving Average and Close Price')
plt.xlabel('Year')
plt.ylabel('Price')

# Render and display the plot.
plt.show()

In [None]:
# Calculate the 200-day moving average of the stock's closing prices
# used in conjunction with other moving averages or indicators to make informed trading decisions or to understand the long-term trend of a stock's price.
ma_200_days = data.Close.rolling(200).mean()

In [None]:
# Create a new figure with a size of 8x6 inches for plotting moving averages and closing prices.
fig2 = plt.figure(figsize=(8,6))

# Plot the 100-day moving average in red and label it as '100 Days MA'.
plt.plot(ma_100_days, 'r', label='100 Days MA')

# Plot the 200-day moving average in blue and label it as '200 Days MA'.
plt.plot(ma_200_days, 'b',  label='200 Days MA')

# Plot the actual closing prices in green and label it as 'Close Price'.
plt.plot(data.Close, 'g', label='Close Price')

# Add a legend to differentiate between the 100-day moving average, 200-day moving average, and closing prices.
plt.legend()

# Add a title and labels to the x-axis (Year) and y-axis (Price) for clarity.
plt.title('100 and 200 Days Moving Average and Close Price')
plt.xlabel('Year')
plt.ylabel('Price')

# Display the plot showing the 100-day and 200-day moving averages along with the closing prices.
plt.show()

In [None]:
# Remove any rows with missing values from the dataset to ensure clean data.
data.dropna(inplace=True)

In [None]:
# Split the data into training (first 80%) and testing (remaining 20%) sets based on the closing prices.
data_train = pd.DataFrame(data.Close[0: int(len(data)*0.80)])
data_test = pd.DataFrame(data.Close[int(len(data)*0.80): len(data)])

In [None]:
# Display the first and last 5 rows of the data set and the sape of the number of observations and features. 
data

In [None]:
# Check the number of data points in the training datasets.
data_train.shape[0]

In [None]:
# Check the number of data points in the testing datasets.
data_test.shape[0]

In [None]:
#  library that provides tools for machine learning in Python.
from sklearn.preprocessing import MinMaxScaler

# This scaler normalizes the data, scaling each feature to a specified range (0 to 1 in this case).
scaler = MinMaxScaler(feature_range=(0,1))

In [None]:
# Normalize the training data to the range [0, 1] using MinMaxScaler
data_train_scale = scaler.fit_transform(data_test)

In [None]:
# Create sequences of 100 data points (features) and corresponding target values for training the LSTM model, 
x = []
y = []

for i in range(100, data_train_scale.shape[0]):
    x.append(data_train_scale[i-100:i])
    y.append(data_train_scale[i,0])
    
# Convert the lists to NumPy arrays.
x,y = np.array(x), np.array(y)

### Summary:
- `Data Cleaning`: The data is first cleaned by dropping any missing values.
- `Data Splitting`: The data is split into training (80%) and testing (20%) sets based on the closing prices.
- `Data Normalization`: The training data is then scaled to a range of [0, 1] using MinMaxScaler (although the code mistakenly scales the test data instead of the training data).
- `Sequence Preparation`: Finally, sequences of 100 consecutive data points are prepared for the LSTM model, with the next data point in the sequence serving as the target value. These sequences are then converted into NumPy arrays, ready for model training.

#### The below block of code builds a deep learning model using Keras, specifically an LSTM (Long Short-Term Memory) network, which is commonly used for time series forecasting like stock price prediction.


#### Explanation of Importing the necessary modules from Keras to build the LSTM model:

- `Sequential`: This is a linear stack of layers in Keras, where we can add layers sequentially to build the model.
- `LSTM`: This layer is an LSTM network layer, which is designed to handle sequential data and capture long-term dependencies.
- `Dropout`: This layer helps to prevent overfitting by randomly setting a fraction of input units to 0 at each update during training.
- `Dense`: This is a regular densely connected neural network layer, used as the output layer in this case.
- `Input`: Used to define the shape of the input data in the model.


In [None]:
# Import necessary modules from Keras to build the LSTM model
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Input

# allows us to add layers one by one in a sequential order.
model = Sequential()

# Define the input shape for the LSTM model, specifying 100 time steps and 1 feature per step.
# x.shape[1] corresponds to the number of time steps (which is 100, based on the data preparation earlier)
# 1 indicates that there's one feature (the scaled closing price).
model.add(Input(shape=(x.shape[1], 1))) 

# Add the first LSTM layer with 50 neurons, ReLU (Rectified Linear Unit) activation, 
# and sequence return enabled indicates that the LSTM layer should return the full sequence of outputs for the next LSTM layer.
model.add(LSTM(units=50, activation='relu', return_sequences=True))

# Dropout layer with a 20% dropout rate, meaning that 20% of the neurons will be randomly dropped during training to prevent overfitting.
model.add(Dropout(0.2))

# Add the second LSTM layer with 60 units and a Dropout layer with a 30% dropout rate.
model.add(LSTM(units=60, activation='relu', return_sequences=True))
model.add(Dropout(0.3))

# Add the third LSTM layer with 80 units and a Dropout layer with a 40% dropout rate.
model.add(LSTM(units=80, activation='relu', return_sequences=True))
model.add(Dropout(0.4))

# Add the fourth and final LSTM layer with 120 units and a Dropout layer with a 50% dropout rate.
model.add(LSTM(units=120, activation='relu'))
model.add(Dropout(0.5))

# Adds a Dense (fully connected) output layer with 1 neuron, 
# which will output the predicted value (the next day's stock price in this case).
model.add(Dense(units=1))

### Summary:
##### Model Architecture:
- The model consists of four LSTM layers, each with increasing numbers of units (50, 60, 80, 120), followed by Dropout layers to prevent overfitting.
- The return_sequences=True in the first three LSTM layers ensures that each layer passes the entire sequence of outputs to the next LSTM layer.
- The final LSTM layer outputs only the last value in the sequence, which is passed to a Dense layer to make the prediction.
- The Dropout rates increase with each layer to further mitigate overfitting, culminating in a final Dropout rate of 50%.

##### This architecture is designed to capture the temporal dependencies in the stock price data, and the final model output will be the predicted stock price for the next time step based on the input sequence.

In [None]:
# model.compile(): This method configures the model for training.
# optimizer='adam': Specifies the optimizer to use during training. In this case, it's the Adam optimizer, which is an adaptive learning rate optimization algorithm that is popular for training deep learning models due to its efficiency and low memory requirements.
# loss='mean_squared_error': Specifies the loss function. The Mean Squared Error (MSE) is used here, which measures the average squared difference between the predicted and actual values.


# Compile the model with the Adam optimizer and mean squared error as the loss function.
model.compile(optimizer = 'adam', loss='mean_squared_error')

In [None]:
# An epoch is one complete pass through the entire training dataset. Here, the model will be trained for 50 epochs.
# batch_size=32: Specifies the number of samples per gradient update. Here, 32 samples will be processed before the model's weights are updated.
# verbose=1 means that progress and training metrics will be printed to the console for each epoch.


# Train the model on the input data x and target values y for 50 epochs, using a batch size of 32, with verbose output enabled.
model.fit(x,y, epochs= 50, batch_size =32, verbose =1)

#### Summary:

#### Model Compilation:

- `The model is compiled` using the Adam optimizer and Mean Squared Error as the loss function, which is well-suited for regression tasks like stock price prediction.
Model Training:

- `The model is trained` on the input data for 50 epochs. During each epoch, the data is processed in batches of 32 samples. The training progress is displayed on the console, providing feedback on the loss value at each epoch.
Learning Process:

- `The model will adjust` its weights over 50 epochs to minimize the mean squared error between its predictions and the actual stock prices. The Adam optimizer helps in efficiently updating the weights based on the calculated gradients during each batch of training.

#### This process is essential for teaching the LSTM model to recognize patterns in the historical stock price data and make predictions about future prices.








In [None]:
# generates a summary of the model architecture in the deep learning model (Details about each layer)
model.summary()

In [None]:
# Extract the last 100 rows from the data_train DataFrame, representing the past 100 days of data.
pas_100_days = data_train.tail(100)

In [None]:
# Concatenate the last 100 rows from the training data with the test data,
# ensuring the model has access to recent history when making predictions on the test set.
data_test= pd.concat([pas_100_days, data_test], ignore_index=True)

In [None]:
data_test

In [None]:
pas_100_days

In [None]:
# Scale the test data using the previously defined scaler. This involves both fitting the scaler
# to the test data to compute necessary statistics (like min and max values) and then transforming
# the data accordingly. The result is stored in data_test_scale.
data_test_scale = scaler.fit_transform(data_test)

In [None]:
# Prepare the input sequences (x) and corresponding target values (y) for the model.
# The input sequences consist of 100 time steps, and the target value is the next time step's value.
x = []
y = []

for i in range(100, data_test_scale.shape[0]):
    # Append the sequence of the last 100 time steps to x
    x.append(data_test_scale[i-100:i])
    # Append the corresponding target value (the current time step's value) to y
    y.append(data_test_scale[i, 0])

# Convert the lists to NumPy arrays for efficient processing
x, y = np.array(x), np.array(y)

In [None]:
# Use the trained LSTM model to predict the next value in the time series for each sequence in x.
# The predictions are stored in the y_predict array.
y_predict = model.predict(x)

In [None]:
y_predict

In [None]:
# Retrieve the scaling factors applied to each feature during the scaling process.
# This attribute stores the scale values as a NumPy array.
scaler.scale_

In [None]:
scale = 1/scaler.scale_

In [None]:
y_predict = y_predict * scale

In [None]:
y = y * scale

In [None]:
plt.figure(figsize = (10,8)) 
plt.plot(data.index[-len(y_predict):],y_predict, color='red', linestyle='dashed', label = 'Predicted Price')
plt.plot(data.index[-len(y):],y , color='blue', label = 'Original Price') 
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Stock Price Predictions vs Original Values')
plt.legend()
plt.show()

In [None]:
model.save('Stock Prediction Model.keras')