# **Stock Price Prediction**

In [None]:
!pip install -q yfinance

In [None]:
!pip install --upgrade wandb

In [None]:
import wandb

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_api")
    wandb.login(key=api_key)
    anony = None
except:
    anony = "must"
    print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
%matplotlib inline

from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

from datetime import datetime

tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']
end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

for stock in tech_list:
    globals()[stock] = yf.download(stock, start, end)
    

company_list = [AAPL, GOOG, MSFT, AMZN]
company_name = ["APPLE", "GOOGLE", "MICROSOFT", "AMAZON"]

for company, com_name in zip(company_list, company_name):
    company["company_name"] = com_name
    
df = pd.concat(company_list, axis=0)
df.tail(10)

In [None]:
color_pal=sns.color_palette()
color_pal

In [None]:
plt.figure(figsize=(15, 12))
plt.subplots()

for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Open'].plot()
    plt.ylabel('Open')
    plt.xlabel(None)
    plt.title(f"Opening Price of {tech_list[i - 1]}")
    
plt.tight_layout()

In [None]:
for company in company_list:
    company['Daily Return'] = company['Adj Close'].pct_change()

plt.figure(figsize=(15, 12))
plt.subplots()

for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Daily Return'].plot()
    plt.ylabel('Daily Return')
    plt.xlabel(None)
    plt.title(f"Daily Return of {tech_list[i - 1]}")
    
plt.tight_layout()

In [None]:
plt.figure(figsize=(15, 12))
plt.subplots()

for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Close'].plot()
    plt.ylabel('Close')
    plt.xlabel(None)
    plt.title(f"Closing Price of {tech_list[i - 1]}")
    
plt.tight_layout()

In [None]:
##This is an example:
##df['Open:30 days rolling']=df['Open'].rolling(window=10, min_periods=1).mean()

# **What is Simple Moving Average (SMA):**

A Simple Moving Average is the average (mean) of a set of data points within a specified rolling window or time period. It is called "simple" because it gives equal weight to all data points within the window. The formula for calculating the SMA for a time series dataset is straightforward:

$$SMA_t = \frac{X_{t-1} + X_{t-2} + \ldots + X_{t-n}}{n}$$

Where:
- $SMA_t$ is the Simple Moving Average at time $t$.
- $X_{t-1}, X_{t-2}, ......, X_{t-n}$ are the data points within the rolling window (typically, $n$ previous data points).
- $n$ is the number of data points included in the calculation (the window size).

In [None]:
window=[10,30,50,60]

for ma_avg in window:
    for company in company_list:
        column=f"Simple Moving Averages for {ma_avg}"
        company[column]=company['Open'].rolling(ma_avg, min_periods=1).mean() #we need to use an aggregate function so here we are using average
        
plt.figure(figsize=(15, 12))

AAPL[['Open', 'Simple Moving Averages for 10', 'Simple Moving Averages for 30', 'Simple Moving Averages for 50', 'Simple Moving Averages for 60']].plot()
plt.title('Apple')

GOOG[['Open', 'Simple Moving Averages for 10', 'Simple Moving Averages for 30', 'Simple Moving Averages for 50', 'Simple Moving Averages for 60']].plot()
plt.title('Google')

MSFT[['Open', 'Simple Moving Averages for 10', 'Simple Moving Averages for 30', 'Simple Moving Averages for 50', 'Simple Moving Averages for 60']].plot()
plt.title('Microsoft')

AMZN[['Open', 'Simple Moving Averages for 10', 'Simple Moving Averages for 30', 'Simple Moving Averages for 50', 'Simple Moving Averages for 60']].plot()
plt.title('Amazon')

plt.tight_layout()

# **What is Exponential Moving Average?**

**Exponential Moving Average (EMA)** is a commonly used statistical calculation for analyzing and forecasting time series data, particularly in finance and economics. It is a type of moving average that gives more weight to recent data points, making it more responsive to recent changes in the data compared to a simple moving average (SMA).

The formula for calculating the Exponential Moving Average (EMA) is as follows:

$$EMA_t = \alpha \cdot X_t + (1 - \alpha) \cdot EMA_{t-1}$$

Where:
- $EMA_t$ is the EMA at time $t$.
- $X_t$ is the value of the time series data at time $t$.
- $EMA_{t-1}$ is the EMA at the previous time period $(t-1)$.
- $\alpha$ is the smoothing factor, often referred to as the "weight" or "smoothing coefficient," and it is calculated using the formula: $\alpha = \frac{2}{N+1}$, where $N$ is the number of time periods for which you want to calculate the EMA. Alternatively, you can express $\alpha$ as a percentage by multiplying it by 100.

Here's how the EMA calculation works:
1. Start with an initial EMA value, often taken as the SMA for the first $N$ time periods.
2. For each subsequent time period, calculate the EMA using the formula above, where $\alpha$ gives more weight to the most recent data point, $X_t$, and $(1 - \alpha)$ gives weight to the previous EMA, $EMA_{t-1}$.

In [None]:
sf=[0.1,0.2,0.3]

for s_factor in sf:
    for company in company_list:
        column=f"Exponential Moving Averages for {s_factor}"
        company[column]=company['Open'].ewm(alpha=s_factor, adjust=False).mean() #we need to use an aggregate function so here we are using average
        
plt.figure(figsize=(15, 12))

AAPL[['Open', 'Exponential Moving Averages for 0.1','Exponential Moving Averages for 0.2', 'Exponential Moving Averages for 0.3']].plot()
plt.title('APPLE')

GOOG[['Open', 'Exponential Moving Averages for 0.1','Exponential Moving Averages for 0.2', 'Exponential Moving Averages for 0.3']].plot()
plt.title('GOOGLE')

MSFT[['Open', 'Exponential Moving Averages for 0.1','Exponential Moving Averages for 0.2', 'Exponential Moving Averages for 0.3']].plot()
plt.title('MICROSOFT')

AMZN[['Open', 'Exponential Moving Averages for 0.1','Exponential Moving Averages for 0.2', 'Exponential Moving Averages for 0.3']].plot()
plt.title('AMAZON')

plt.tight_layout()

In [None]:
list=['AAPL', 'GOOG', 'MSFT', 'AMZN']
closing=pdr

In [None]:
data = df.filter(['Close'])
#The values property returns all values in the DataFrame. 
#The return value is a 2-dimensional array with one array for each row.
new_df=data.values
training_data_len = int(np.ceil( len(new_df) * .95 ))

training_data_len

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(new_df)
scaled=scaler.fit_transform(new_df)

In [None]:
length=len(scaled)

In [None]:
train_data = scaled[0:training_data_len, :]

X_train = []
y_train = []


for i in range(60, length):  
    #to add the previous values to X_train of the prediction days 
    X_train.append(scaled[i - 60:i, 0])
    #adding current values to y train of the prediction day 
    y_train.append(scaled[i, 0])

# Convert the x_train and y_train lists to numpy arrays
X_train, y_train = np.array(X_train), np.array(y_train)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [None]:
from keras.layers import LSTM
from keras.layers import Dense
from keras.models import Sequential

In [None]:
run=wandb.init(project='predicting-stock-prices-from-lstm',
              config={
                  'learning_rate':0.005,
                  'batch_size':5,
                  'epochs': 8})

config=wandb.config
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (X_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
optimizer=tf.keras.optimizers.Adam(config.learning_rate)
model.compile(optimizer, loss='MSE')

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
model.summary()

In [None]:
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint
wandb_callbacks = [
        WandbMetricsLogger(),
        WandbModelCheckpoint(filepath="my_model_{epoch:02d}"),
    ]

In [None]:
model.fit(
    X_train,
    y_train,
    batch_size=config.batch_size, 
    epochs=config.epochs,
    callbacks=[wandb_callbacks],
)

In [None]:
loss_per_epoch=model.history.history['loss']
plt.plot(range(len(loss_per_epoch)), loss_per_epoch)
#Losses significantly decreased

In [None]:
test_data = scaled[training_data_len - 60: , :]
# Create the data sets x_test and y_test
X_test = []
y_test = new_df[training_data_len:, :]
for i in range(60, len(test_data)):
    X_test.append(test_data[i-60:i, 0])
    
# Convert the data to a numpy array
X_test = np.array(X_test)

# Reshape the data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1 ))

# Get the models predicted price values 
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse

In [None]:
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

valid

In [None]:
model.save('Stock_Prices.h5')