In [1]:
%env TF_CPP_MIN_LOG_LEVEL=3

env: TF_CPP_MIN_LOG_LEVEL=3


In [2]:
!pip install -q tensorflow yfinance ta

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ta (setup.py) ... [?25l[?25hdone


In [6]:
import ta
import sklearn
import warnings
import tensorflow
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from sklearn import metrics
from keras import layers

In [5]:
warnings.filterwarnings('ignore')

# **Extracting Data**

In [12]:
def extract_data(ticker, start_date = '2000-01-01', end_date = '2024-07-18'):
  # Getting Data since 2000 to 2023
  data = yf.download(ticker, start = start_date, end = end_date)

  data.columns = ['open', 'high', 'low', 'close', 'adj close', 'volume']

  data.index.name = 'date'

  return data

In [13]:
df = extract_data('MSFT')

[*********************100%%**********************]  1 of 1 completed


In [14]:
df.shape

(6173, 6)

In [15]:
df.head()

Unnamed: 0_level_0,open,high,low,close,adj close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,58.6875,59.3125,56.0,58.28125,36.000641,53228400
2000-01-04,56.78125,58.5625,56.125,56.3125,34.784519,54119000
2000-01-05,55.5625,58.1875,54.6875,56.90625,35.151283,64059600
2000-01-06,56.09375,56.9375,54.1875,55.0,33.973793,54976600
2000-01-07,54.3125,56.125,53.65625,55.71875,34.417763,62013600


In [16]:
df.tail()

Unnamed: 0_level_0,open,high,low,close,adj close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-07-11,462.980011,464.779999,451.549988,454.700012,454.700012,23111200
2024-07-12,454.329987,456.359985,450.649994,453.549988,453.549988,16311300
2024-07-15,453.299988,457.26001,451.429993,453.959991,453.959991,14429400
2024-07-16,454.220001,454.299988,446.660004,449.519989,449.519989,17175700
2024-07-17,442.589996,444.850006,439.179993,443.519989,443.519989,21778000


In [17]:
def feature_engeneering(df):
  df_copy = df.copy()

  # Target variable - Closing Percentage Change
  df_copy['cpc'] = df_copy['close'].pct_change(1)

  # Shift of financial asset price columns
  df_copy['op'] = df_copy['open'].shift(1)
  df_copy['hi'] = df_copy['high'].shift(1)
  df_copy['lo'] = df_copy['low'].shift(1)
  df_copy['clo'] = df_copy['close'].shift(1)

  df_copy['vol'] = df_copy['volume'].shift(1)

  # Simple Moving Average (SMA)
  df_copy['SMA 15'] = df_copy[['close'].rolling(15).mean().shift(1)]
  df_copy['SMA 60'] = df_copy[['close'].rolling(60).mean().shift(1)]

  # Moving Standard Deviation (MSD)
  df_copy['MSD 15'] = df_copy[['roi'].rolling(15).std().shift(1)]
  df_copy['MSD 60'] = df_copy[['roi'].rolling(60).std().shift(1)]

  # Volume Weighted Average Price (VWAP)
  vwap = ta.volume.VolumeWeightedAveragePrice(high = df['high'],
                                              low = df['low'],
                                              close = df['close'],
                                              volume = df['volume'],
                                              window = 5)

  df_copy['VWAP'] = vwap.vwap.shift(1)

  # Relative Strength Index (RSI)
  RSI = ta.momentum.RSIIndicator(df_copy['close'], window = 5, fillna = False)
  df_copy['RSI'] = RSI.rsi().shift(1)

  return df_copy.dropna()