In [128]:
%%bash
pip install yfinance
pip install ta



In [129]:
import yfinance as yf
apple_data = yf.download(tickers = "AAPL",  # list of tickers
            period = "10y",         # time period
            interval = "1d",       # trading interval
            ignore_tz = True,      # ignore timezone when aligning data from different exchanges?
            prepost = False) 
# Create a new column called 'Price Change' that indicates whether the stock price went up or down by the end of the day
apple_data['Price Change'] = ['Up' if apple_data['Close'][i] > apple_data['Open'][i] else 'Down' for i in range(len(apple_data))]
print(apple_data.shape)
apple_data.head()

[*********************100%***********************]  1 of 1 completed
(2517, 7)


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Price Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-04-12,15.505357,15.505357,15.324643,15.35,13.257505,238613200,Down
2013-04-15,15.25,15.281786,14.983929,14.994643,12.95059,317520000,Down
2013-04-16,15.056071,15.236071,15.020357,15.222857,13.147695,305771200,Up
2013-04-17,15.009643,15.021429,14.218214,14.385714,12.424669,945056000,Down
2013-04-18,14.463929,14.4925,13.919286,14.001786,12.093077,666299200,Down


In [130]:
# Exponential smoothing
alpha = 0.3
import numpy as np

numeric_cols = apple_data.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
    apple_data[col] = apple_data[col].ewm(alpha=0.3).mean()

In [131]:
# Generate technical indicators using the TA-Lib library
from ta.utils import dropna
import ta
import pandas as pd

apple_data = dropna(apple_data)

# List of technical indicators to calculate
indicators = ['SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'BollingerBands', 'Stochastic_Oscillator', 'Williams_R', 'Price_ROC', 'OBV']

# Create a new DataFrame to store the technical indicators
technical_indicators = pd.DataFrame(index=apple_data.index)

# Calculate technical indicators
technical_indicators['SMA7'] = ta.trend.SMAIndicator(close=apple_data['Close'], window=7).sma_indicator()
technical_indicators['SMA21'] = ta.trend.SMAIndicator(close=apple_data['Close'], window=21).sma_indicator()
technical_indicators['EMA7'] = ta.trend.EMAIndicator(close=apple_data['Close'], window=7).ema_indicator()
technical_indicators['EMA21'] = ta.trend.EMAIndicator(close=apple_data['Close'], window=21).ema_indicator()

macd = ta.trend.MACD(close=apple_data['Close'], window_slow=26, window_fast=12, window_sign=9).macd()
macdsignal = ta.trend.MACD(close=apple_data['Close'], window_slow=26, window_fast=12, window_sign=9).macd_signal()
technical_indicators['MACD'] = macd
technical_indicators['MACD_signal'] = macdsignal

technical_indicators['RSI'] = ta.momentum.RSIIndicator(close=apple_data['Close'], window=14).rsi()
technical_indicators['ADX'] = ta.trend.ADXIndicator(high=apple_data['High'], low=apple_data['Low'], close=apple_data['Close'], window=14).adx()

technical_indicators['BB_upper'], technical_indicators['BB_middle'], technical_indicators['BB_lower'] = ta.volatility.BollingerBands(close=apple_data['Close'], window=20).bollinger_mavg(), ta.volatility.BollingerBands(close=apple_data['Close'], window=20).bollinger_hband(), ta.volatility.BollingerBands(close=apple_data['Close'], window=20).bollinger_lband()

# Adding Stochastic Oscillator, Williams %R, Price Rate of Change, and On Balance Volume
# Stochastic Oscillator has a window of 2 weeks
technical_indicators['Stochastic_Oscillator'] = ta.momentum.StochasticOscillator(high=apple_data['High'], low=apple_data['Low'], close=apple_data['Close'], window=14).stoch()
# Williams %R has a window of 2 weeks
technical_indicators['Williams_R'] = ta.momentum.WilliamsRIndicator(high=apple_data['High'], low=apple_data['Low'], close=apple_data['Close'], lbp=14).williams_r()
technical_indicators['Price_ROC'] = ta.momentum.ROCIndicator(close=apple_data['Close'], window=12).roc()
technical_indicators['OBV'] = ta.volume.OnBalanceVolumeIndicator(close=apple_data['Close'], volume=apple_data['Volume']).on_balance_volume()



  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [132]:
technical_indicators['Price Change'] = apple_data['Price Change']
technical_indicators = technical_indicators.dropna()
technical_indicators.reset_index(inplace=True)
technical_indicators.rename(columns={'index': 'Date'}, inplace=True)
technical_indicators.sample(10)

Unnamed: 0,Date,SMA7,SMA21,EMA7,EMA21,MACD,MACD_signal,RSI,ADX,BB_upper,BB_middle,BB_lower,Stochastic_Oscillator,Williams_R,Price_ROC,OBV,Price Change
1895,2020-12-07,119.559079,118.106912,120.12146,118.314897,1.257614,0.802578,72.277535,21.689489,118.226851,121.776994,114.676709,89.379063,-10.620937,2.851017,62677520000.0,Up
776,2016-06-28,23.877065,24.359852,23.85139,24.15736,-0.207433,-0.118445,25.161117,28.695555,24.340042,25.099998,23.580086,13.242001,-86.757999,-4.903576,25292450000.0,Up
2330,2022-08-30,168.023092,167.009834,167.197368,165.340421,3.391433,4.740652,50.1076,57.091471,167.416726,174.274638,160.558815,10.032529,-89.967471,-2.664165,65647880000.0,Down
424,2015-02-04,28.671399,27.804465,28.775598,28.15437,0.352007,0.118321,75.628422,28.463707,27.835875,29.296792,26.374959,90.563001,-9.436999,8.863188,30422230000.0,Up
1687,2020-02-11,79.555839,78.926178,79.720219,78.284528,1.739382,2.011468,76.674773,67.207874,79.020296,80.591544,77.449049,78.717595,-21.282405,0.942871,53165850000.0,Down
694,2016-03-02,24.180656,23.989678,24.263579,24.219569,-0.154124,-0.319803,59.902047,45.29897,23.989448,24.459725,23.519171,90.066653,-9.933347,4.445545,26227360000.0,Up
542,2015-07-24,31.898002,31.565361,31.776816,31.695357,-0.001042,-0.078296,47.220608,27.297462,31.549396,32.314152,30.784639,50.957175,-49.042825,1.097952,30591260000.0,Down
1114,2017-10-30,39.47666,39.115545,39.607612,39.325981,0.150884,0.037494,73.450926,23.742475,39.148693,40.057268,38.240119,87.396606,-12.603394,3.496687,39134720000.0,Up
1263,2018-06-05,47.124671,46.60832,47.177034,46.310123,0.893689,0.932122,81.843047,47.687238,46.71084,47.769533,45.652147,85.759872,-14.240128,1.887664,41925560000.0,Up
762,2016-06-08,24.659503,23.954627,24.578859,24.413948,-0.023561,-0.196388,55.669889,33.561152,23.980434,25.293452,22.667416,85.02293,-14.97707,4.924487,26810210000.0,Down


In [135]:
# Perform normalization
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
technical_indicators_scaled = pd.DataFrame(scaler.fit_transform(technical_indicators.drop(columns=['Date', 'Price Change'])), columns=technical_indicators.drop(columns=['Date', 'Price Change']).columns)

technical_indicators_scaled

Unnamed: 0,SMA7,SMA21,EMA7,EMA21,MACD,MACD_signal,RSI,ADX,BB_upper,BB_middle,BB_lower,Stochastic_Oscillator,Williams_R,Price_ROC,OBV
0,0.007610,0.006012,0.007307,0.004593,0.473240,0.466560,0.607205,0.018430,0.006299,0.005127,0.008594,0.486303,0.486303,0.436093,0.076088
1,0.007820,0.006207,0.007514,0.004732,0.473450,0.466120,0.624587,0.014538,0.006446,0.005028,0.009001,0.543784,0.543784,0.461460,0.081636
2,0.008054,0.006360,0.007735,0.004882,0.473753,0.465835,0.640595,0.010513,0.006551,0.005039,0.009203,0.616812,0.616812,0.507830,0.087191
3,0.008285,0.006464,0.007923,0.005027,0.473959,0.465652,0.646196,0.008692,0.006587,0.005085,0.009227,0.793163,0.793163,0.528118,0.092394
4,0.008447,0.006488,0.008011,0.005139,0.473792,0.465469,0.619518,0.006528,0.006567,0.005047,0.009226,0.743517,0.743517,0.537343,0.087447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2479,0.891807,0.875519,0.896039,0.890840,0.723289,0.689521,0.881980,0.506582,0.876706,0.886223,0.846907,0.967111,0.967111,0.661771,0.985218
2480,0.896602,0.879454,0.900502,0.894884,0.732108,0.701303,0.882277,0.526885,0.880607,0.892133,0.848573,0.948695,0.948695,0.640989,0.986022
2481,0.901795,0.883257,0.904290,0.898725,0.737678,0.711952,0.885746,0.545997,0.884650,0.897171,0.851458,0.959032,0.959032,0.621156,0.986788
2482,0.905761,0.886935,0.906231,0.901881,0.735399,0.719971,0.831421,0.554610,0.888742,0.899927,0.856877,0.897011,0.897011,0.601854,0.986037
