Importing all the necessary libraries and functions used further.

In [None]:
import requests
import pandas as pd
from datetime import datetime
import time
import requests
import pandas as pd
from datetime import datetime
import time
import io
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model


Defining a function fetch_stock_data that makes use of web scraping to fetch historical stock data from yahoo finance for a specific company that the users chooses (for demonstration purposes we are currently using stock data for Apple(AAPL)).
The data is then saved to an Excel file in the save_to_excel function which are both being called by the main function 

In [None]:
def fetch_stock_data(stock_name):
    end_date = int(datetime.now().timestamp())
    start_date = int((datetime.now() - pd.DateOffset(days=1826)).timestamp())

    url = f"https://query1.finance.yahoo.com/v7/finance/download/{stock_name}?period1={start_date}&period2={end_date}&interval=1d&events=history"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"
    }

    retries = 3
    while retries > 0:
        try:
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                stock_data = pd.read_csv(io.StringIO(response.text))
                return stock_data
            else:
                print("Failed to fetch data")
                print(f"Status Code: {response.status_code}")
                return None
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            retries -= 1
            time.sleep(2)  # Wait before retrying

    print("Maximum retries exceeded. Unable to fetch data.")
    return None

def save_to_excel(stock_data, stock_name):
    if stock_data is not None:
        stock_data.to_excel(f"Historical Data/{stock_name}.xlsx", index=False)
        print(f"Stock data for {stock_name} saved successfully to the Excel file.")

if __name__ == "__main__":
    stock_name = input("Enter the stock ticker symbol: ")
    data = fetch_stock_data(stock_name)
    if data is not None:
        save_to_excel(data, stock_name)
    else:
        print("Data retrieval failed or invalid ticker symbol provided.")


Now we can define a dataframe that reads the excel file and we can do some data analysis on the existing data to identify what parameters we want to train our LSTM model on.
Firstly we analyze the Adjusted Close and Volume trends for the company by making various plots

In [None]:

df=pd.read_excel(f"Historical Data/{stock_name}.xlsx")
df.describe()
df.info()
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)  # Set 'Date' column as the index

# Plotting
plt.figure(figsize=(10, 6))  # Adjust the figure size as needed
plt.plot(df['Adj Close'])
plt.ylabel('Adj Close')
plt.xlabel('Date')
plt.title(f'Closing Price of {stock_name}')
plt.show()
df['Volume'].plot()
plt.ylabel('Volume')
plt.xlabel('Date')
plt.title(f"Sales Volume for {stock_name}")
plt.show()

Now we identify the daily return of the stock by using .pct_change() function which tells us how much of a daily difference here was in the adjusted close price  

In [None]:
df_daily_return = df['Adj Close'].pct_change()
df_daily_return
df_daily_return.plot( legend=True, linestyle='--', marker='o')
plt.show()
df_daily_return.hist(bins=50)
plt.xlabel('Daily Return')
plt.ylabel('Counts')
plt.title(f'{stock_name}')
plt.show()


Now we plot the moving average for the stock over 10,20 and 50 days.

In [None]:


moving_avg = pd.read_excel(f"Historical Data/{stock_name}.xlsx")

ma_day = [10, 20, 50]

for ma in ma_day:
    column_name = f"MA for {ma} days"
    moving_avg[column_name] = moving_avg['Adj Close'].rolling(ma).mean()

fig, axes = plt.subplots(nrows=1, ncols=1)
fig.set_figheight(8)
fig.set_figwidth(12)

moving_avg[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes)
axes.set_title(f'Stock Analysis for {stock_name}')

# Adjust spacing between subplots here
plt.subplots_adjust(wspace=0.3, hspace=0.3)  # You can modify these values as needed

plt.show()

Finally we plot the expected return and the risk involved in investing the partiular stock

In [None]:
closing_df = pd.read_excel(f"Historical Data/{stock_name}.xlsx")
column_name = 'Adj Close'
closing_df[column_name] = closing_df['Adj Close']
closing_df['Adj Close'] = pd.to_numeric(closing_df['Adj Close'], errors='coerce')
closing_df.dropna(subset=['Adj Close'], inplace=True)

tech_return = closing_df['Adj Close'].pct_change()
rets = tech_return.dropna()

area = np.pi * 20

plt.figure(figsize=(10, 8))
plt.scatter(rets.mean(), rets.std(), s=area)
plt.xlabel('Expected return')
plt.ylabel('Risk')

plt.annotate(rets.name, xy=(rets.mean(), rets.std()), xytext=(50, 50), textcoords='offset points', ha='right', va='bottom',
             arrowprops=dict(arrowstyle='-', color='blue', connectionstyle='arc3,rad=-0.3'))

Now we select the 'Adj Close' column to train our LSTM on, we would try and predict the future pries of the stock based on this data, we select this as our training data.

In [None]:
training_data=pd.read_excel(f'Historical Data/{stock_name}.xlsx')['Adj Close']
training_data

We now have to reshape the data to fit in our lstm model, we first change the model shape to a numpy array of size(-1,1) and then use min ax scaller to transform the data to shape (0,1) which will then go into our LSTM

In [None]:
scaled_data = training_data.values.reshape(-1, 1)
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(scaled_data)
scaled_data

Defining a feature set array that stores 

In [None]:
features_set = []
labels = []
for i in range(60, len(scaled_data)):
    features_set.append(scaled_data[i-60:i, 0])
    labels.append(scaled_data[i, 0])
features_set, labels = np.array(features_set), np.array(labels)


In [None]:
features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1))


In [None]:
# Create a new array containing scaled values from index 1543 to 2002
df=pd.read_excel(f'Historical Data/{stock_name}.xlsx')
data=df.filter(['Close'])
dataset=data.values
train_dataset_len=int(np.ceil(len(dataset)*.85))
test_data = scaled_data[train_dataset_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[train_dataset_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])

# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from sklearn.metrics import mean_squared_error
from keras.callbacks import ModelCheckpoint

def create_lstm_model():
    model = Sequential()
    model.add(LSTM(units=128, return_sequences=True, input_shape=(features_set.shape[1], 1)))
    model.add(Dropout(0.65))
    model.add(LSTM(units=64, return_sequences=True))
    model.add(Dropout(0.40))
    model.add(LSTM(units=64, return_sequences=True))
    model.add(Dropout(0.40))
    model.add(LSTM(units=32))
    model.add(Dropout(0.40))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Assuming you have scaled_data available
input_shape = (scaled_data.shape[1], 1)

# Create the LSTM model
lstm_model = create_lstm_model()

validation_split = 0.2 
split_index = int(len(features_set) * (1 - validation_split))
train_features, val_features = features_set[:split_index], features_set[split_index:]
train_labels, val_labels = labels[:split_index], labels[split_index:]

mc = ModelCheckpoint(f'LSTM({stock_name}).hdf5', monitor='loss', verbose=1, save_best_only=True, mode='min')
# Train the model
history = lstm_model.fit(features_set, labels, epochs=700, batch_size=392,callbacks=[mc],validation_data=(val_features, val_labels))



In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, confusion_matrix
predictions = lstm_model.predict(features_set)
mse = mean_squared_error(labels, predictions)
mae = mean_absolute_error(labels, predictions)
r2 = r2_score(labels, predictions)
print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared Score: {r2}')

In [None]:


model=load_model(f'LSTM({stock_name}).hdf5')
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
train = data[:train_dataset_len]
valid = data[train_dataset_len:]
valid_values=valid.values
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(20,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()


model.summary()

In [None]:
# Assuming test_data contains the last 60 days of the available data
x_test = []
x_test.append(test_data[-60:])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
predictions = []

for _ in range(15):
        # Predict the next day
        prediction = lstm_model.predict(x_test)
        
        # Append the prediction to the results
        predictions.append(prediction[0, 0])

        # Update x_test for the next prediction
        x_test = np.roll(x_test, -1, axis=1)
        x_test[0, -1, 0] = prediction

    # Inverse transform the predictions to get actual stock prices
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
plt.plot(predictions)

In [None]:
print(predictions)