# Import Necessary Libraries

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import warnings
warnings.filterwarnings("ignore")


# Access Data
###  Enter the stock ticker, starting date, and ending date. Noted that the first 80% of the date will be used for training, whereas the last 20% will be used for validation.
##### Example Inputs: "googl", "2010-01-01" , "2023-09-12"

In [None]:
Ticker_Symbol = input("Ticker of the Stock: ")
Start_Date = input("Start(YYYY-MM-DD): ")
End_Date = input("End(YYYY-MM-DD): ")

# Data Exploration

In [None]:
#Download the necessary data given the ticker, start and ending date
Data = yf.download(Ticker_Symbol, Start_Date, End_Date)
#Reset the index since the default has the index as "Date"
Data = Data.reset_index()
Data

In [None]:
#Check the data type of each column, and see if the memory usage could be further minimized, but here it already has been minimized so there's no use in doing so
Data.info()

In [None]:
#Check for missing data
Data.isnull().sum()

# Data Visualization

### Stock Price

In [None]:
#See the trend of the stock based on the closing price:
fig, ax = plt.subplots(figsize=(12,6))
plt.title("Closing Stock Price on Each Day")
ax.plot(Data["Date"], Data["Close"], "b-")
ax.set_ylabel("Stock Price")
ax.set_xlabel("Day")
plt.grid()
plt.show()

### Moving Average

In [None]:
#Calculate the moving average of the stock
MA60 = Data.Close.rolling(60).mean()
MA250 = Data.Close.rolling(250).mean()
fig, ax = plt.subplots(figsize = (12,6))
plt.plot(Data.Close, "b-", label = "Closing Price")
plt.plot(MA60, "r-", label = "MA60")
plt.plot(MA250, "g-", label = "MA250")
plt.legend()
plt.title("Stock Price")
plt.grid()
plt.show()

### Volume

In [None]:
fig, ax = plt.subplots(figsize = (12,6))
ax.bar(Data.Date, Data.Volume)
ax.set_ylabel("Volume")
ax.set_xlabel("Date")
plt.title("Volume Chart")
plt.grid()
plt.show()

### Daily Return

In [None]:
#How much Close Price changes from One day to the Next
Data["Daily_Return"] = Data["Close"].pct_change()
Data["Daily_Return"]

In [None]:
fig, ax = plt.subplots(figsize = (12,6))
plt.title("Stock Price Daily Return")
ax.plot(Data.Date, Data.Daily_Return)
ax.set_ylabel("Daily Return in %")
plt.grid()

plt.show()

In [None]:
#Distribution of Daily Return(Volatility)
Data.iloc[Data["Daily_Return"].argmax()] #find index location of the highest daily return
Data["Daily_Return"].hist(bins=100, color="blue")
plt.show()

### Cumulative Return

In [None]:
Data["Cumulative_Return"] = (1 + Data["Daily_Return"]).cumprod() - 1
Data["Cumulative_Return"]

In [None]:
fig, ax = plt.subplots(figsize = (12,6))
ax.plot(Data["Date"],Data["Cumulative_Return"], "-g")
ax.set_title("Cumulative Daily Return")
ax.set_ylabel("Cumulative Daily Return (%)")
plt.grid()
plt.show()

# Data Preprocessing

### Splitting Data into Training and Validation Sets

In [None]:
Data.set_index('Date', inplace=True)
Data 

In [None]:
# 70 % will be used for Training
Close = Data["Close"]
Close_Value = Close.values
Close_Value = Close_Value.reshape(-1,1)
Training_Data_Leng = math.ceil(len(Close_Value) * 0.7)
Training_Data_Leng



### Scaling Data

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
PriceData = scaler.fit_transform(Close_Value)
PriceData

### Creating Sequences

In [None]:
X_train, Y_train = [], []
Backcandles = 60
TrainData = PriceData[:Training_Data_Leng]
for i in range(Backcandles, len(TrainData)):
    X_train.append(TrainData[i - Backcandles : i, 0])
    Y_train.append(TrainData[i,0])
X_train, Y_train = np.array(X_train), np.array(Y_train)
print(X_train.shape, Y_train.shape)

# Model Building, Compiling, Training

In [None]:
Model = Sequential([
    LSTM(50, return_sequences = True, input_shape = (X_train.shape[1], 1)),
    (Dropout(0.2)),
    LSTM((50)),
    (Dropout(0.2)),
    (Dense(32)),
    (Dense(1))
])
Model.compile(optimizer = "adam", loss = "mean_squared_error")
Model.fit(X_train,Y_train, batch_size = 32, epochs = 10)

In [None]:
Model.summary()

In [None]:
Test_Data = PriceData[Training_Data_Leng - Backcandles:, :]
x_test, y_test = [], Close_Value[Training_Data_Leng:, :]
for i in range(Backcandles, len(Test_Data)):
    x_test.append(Test_Data[i-Backcandles:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
x_test.shape

# Results of Prediction

In [None]:
Pred = Model.predict(x_test)
Pred = scaler.inverse_transform(Pred)
RMSE = np.sqrt(np.mean(Pred- y_test) ** 2)
RMSE


### Prediction Results

In [None]:
TrainSet, ValidSet = Close[:Training_Data_Leng], Close[Training_Data_Leng:]
ValidSet = pd.DataFrame(ValidSet)
ValidSet["Prediction"] = Pred
ValidSet

In [None]:
ValidSet.reset_index()

### Visualization Results

In [None]:
plt.figure(figsize=(12,8))
plt.plot(ValidSet["Close"], "blueviolet")
plt.plot(ValidSet["Prediction"], "red")
plt.legend(["Actual Price", "Prediction"])
plt.ylabel("Price in USD")
plt.title("Predicted Stock Market Price vs Actual Price")
plt.show()

In [None]:
pip install streamlit