In [176]:
%%bash
pwd
pip install yfinance
pip install ta

/home/jovyan


In [177]:
import yfinance as yf
apple_data = yf.download(tickers = "AAPL",  # list of tickers
            period = "10y",         # time period
            interval = "1d",       # trading interval
            ignore_tz = True,      # ignore timezone when aligning data from different exchanges?
            prepost = False) 
# Create a new column called 'Price Change' that indicates whether the stock price went up or down by the end of the day
apple_data['Price Change'] = ['Up' if apple_data['Close'][i] > apple_data['Open'][i] else 'Down' for i in range(len(apple_data))]
print(apple_data.shape)
apple_data.head()

[*********************100%***********************]  1 of 1 completed
(2517, 7)


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Price Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-04-15,15.25,15.281786,14.983929,14.994643,12.95059,317520000,Down
2013-04-16,15.056071,15.236071,15.020357,15.222857,13.147693,305771200,Up
2013-04-17,15.009643,15.021429,14.218214,14.385714,12.424668,945056000,Down
2013-04-18,14.463929,14.4925,13.919286,14.001786,12.093078,666299200,Down
2013-04-19,13.856071,14.271429,13.753571,13.9475,12.046194,609274400,Up


In [178]:
# Exponential smoothing
alpha = 0.3
import numpy as np

numeric_cols = apple_data.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
    apple_data[col] = apple_data[col].ewm(alpha=0.3).mean()

In [179]:
# Generate technical indicators using the TA-Lib library
from ta.utils import dropna
import ta
import pandas as pd

apple_data = dropna(apple_data)

# List of technical indicators to calculate
indicators = ['SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'BollingerBands', 'Stochastic_Oscillator', 'Williams_R', 'Price_ROC', 'OBV']

# Create a new DataFrame to store the technical indicators
technical_indicators = pd.DataFrame(index=apple_data.index)

# Calculate technical indicators
technical_indicators['SMA7'] = ta.trend.SMAIndicator(close=apple_data['Close'], window=7).sma_indicator()
technical_indicators['SMA21'] = ta.trend.SMAIndicator(close=apple_data['Close'], window=21).sma_indicator()
technical_indicators['EMA7'] = ta.trend.EMAIndicator(close=apple_data['Close'], window=7).ema_indicator()
technical_indicators['EMA21'] = ta.trend.EMAIndicator(close=apple_data['Close'], window=21).ema_indicator()

macd = ta.trend.MACD(close=apple_data['Close']).macd()
macdsignal = ta.trend.MACD(close=apple_data['Close']).macd_signal()
technical_indicators['MACD'] = macd
technical_indicators['MACD_signal'] = macdsignal

technical_indicators['RSI'] = ta.momentum.RSIIndicator(close=apple_data['Close'], window=14).rsi()
technical_indicators['ADX'] = ta.trend.ADXIndicator(high=apple_data['High'], low=apple_data['Low'], close=apple_data['Close'], window=14).adx()

technical_indicators['BB_upper'], technical_indicators['BB_middle'], technical_indicators['BB_lower'] = ta.volatility.BollingerBands(close=apple_data['Close'], window=20).bollinger_mavg(), ta.volatility.BollingerBands(close=apple_data['Close'], window=20).bollinger_hband(), ta.volatility.BollingerBands(close=apple_data['Close'], window=20).bollinger_lband()

# Adding Stochastic Oscillator, Williams %R, Price Rate of Change, and On Balance Volume
# Stochastic Oscillator has a window of 2 weeks
technical_indicators['Stochastic_Oscillator'] = ta.momentum.StochasticOscillator(high=apple_data['High'], low=apple_data['Low'], close=apple_data['Close'], window=14).stoch()
# Williams %R has a window of 2 weeks
technical_indicators['Williams_R'] = ta.momentum.WilliamsRIndicator(high=apple_data['High'], low=apple_data['Low'], close=apple_data['Close'], lbp=14).williams_r()
technical_indicators['Price_ROC'] = ta.momentum.ROCIndicator(close=apple_data['Close'], window=12).roc()
technical_indicators['OBV'] = ta.volume.OnBalanceVolumeIndicator(close=apple_data['Close'], volume=apple_data['Volume']).on_balance_volume()

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [180]:
technical_indicators = technical_indicators.dropna()
technical_indicators.reset_index(inplace=True)
technical_indicators.rename(columns={'index': 'Date'}, inplace=True)
technical_indicators.sample(10)

Unnamed: 0,Date,SMA7,SMA21,EMA7,EMA21,MACD,MACD_signal,RSI,ADX,BB_upper,BB_middle,BB_lower,Stochastic_Oscillator,Williams_R,Price_ROC,OBV
1704,2020-03-09,72.598119,76.920602,73.065981,75.45424,-1.529509,-0.800513,25.931617,52.534752,76.768614,84.037934,69.499293,13.141655,-86.858345,-11.642712,52077010000.0
2098,2021-09-29,145.632675,149.419781,145.866637,147.775987,-0.965473,-0.160406,28.98364,41.857549,149.369444,155.869331,142.869558,7.938223,-92.061777,-4.916097,64075160000.0
1565,2019-08-19,50.77718,51.226368,50.952356,50.90093,0.130441,0.206591,60.027635,31.439708,51.228305,52.653595,49.803015,47.885554,-52.114446,-1.635446,45168670000.0
2130,2021-11-12,150.14508,148.572779,149.790324,148.673361,1.127497,1.13965,60.241977,34.256575,148.844238,152.308431,145.380045,46.772321,-53.227679,0.603985,64869090000.0
2307,2022-07-29,153.367916,147.009216,153.351213,148.671887,3.334848,2.233482,83.606283,42.441506,147.45636,157.900714,137.012006,93.512993,-6.487007,8.426557,65414850000.0
1995,2021-05-04,133.23229,131.431486,132.755842,131.114817,1.899105,2.14842,56.295619,49.183357,131.803973,136.677274,126.930672,17.229739,-82.770261,-1.067987,63177970000.0
400,2014-12-31,28.054501,28.207254,28.052595,28.069469,0.030504,0.068556,50.137355,27.572468,28.164205,29.124002,27.204408,57.884318,-42.115682,-0.015946,28945990000.0
1406,2018-12-31,39.361564,42.027513,39.593492,41.953011,-2.346562,-2.343591,10.508732,82.698523,41.893748,46.148014,37.639483,22.007805,-77.992195,-8.508892,40474390000.0
1648,2019-12-16,67.1607,66.405737,67.314428,66.119092,1.245482,1.295968,87.315022,70.120693,66.440873,67.788186,65.093559,93.065703,-6.934297,3.146057,49601600000.0
143,2013-12-23,19.858336,19.62258,19.8381,19.604972,0.301777,0.361183,69.188936,58.024148,19.675412,20.557126,18.793699,46.577486,-53.422514,-0.24492,15940730000.0


In [181]:
# Perform normalization
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
technical_indicators_scaled = pd.DataFrame(scaler.fit_transform(technical_indicators.drop(columns=['Date'])), columns=technical_indicators.drop(columns=['Date']).columns)

technical_indicators_scaled

Unnamed: 0,SMA7,SMA21,EMA7,EMA21,MACD,MACD_signal,RSI,ADX,BB_upper,BB_middle,BB_lower,Stochastic_Oscillator,Williams_R,Price_ROC,OBV
0,0.007820,0.006208,0.007514,0.004636,0.475545,0.468823,0.643115,0.049035,0.006447,0.005029,0.009001,0.543694,0.543694,0.461446,0.081636
1,0.008054,0.006360,0.007735,0.004796,0.475703,0.468426,0.658407,0.047720,0.006552,0.005041,0.009203,0.616741,0.616741,0.507823,0.087191
2,0.008285,0.006465,0.007923,0.004949,0.475774,0.468123,0.663756,0.048322,0.006587,0.005086,0.009227,0.793162,0.793162,0.528115,0.092394
3,0.008447,0.006488,0.008010,0.005069,0.475479,0.467816,0.636320,0.048374,0.006567,0.005049,0.009225,0.743516,0.743516,0.537341,0.087447
4,0.008532,0.006441,0.007930,0.005123,0.474538,0.467364,0.566003,0.038167,0.006468,0.004822,0.009265,0.607014,0.607014,0.514156,0.082111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2479,0.896602,0.879454,0.900502,0.894885,0.732108,0.701303,0.882277,0.526662,0.880607,0.892133,0.848573,0.948695,0.948695,0.640989,0.986022
2480,0.901795,0.883257,0.904290,0.898726,0.737678,0.711952,0.885746,0.545783,0.884650,0.897171,0.851458,0.959032,0.959032,0.621156,0.986788
2481,0.905761,0.886935,0.906231,0.901882,0.735399,0.719971,0.831421,0.554401,0.888742,0.899927,0.856877,0.897011,0.897011,0.601854,0.986037
2482,0.908128,0.890604,0.906491,0.904305,0.725843,0.724286,0.763705,0.558073,0.892569,0.900749,0.863817,0.798347,0.798347,0.576489,0.985298


In [182]:
# Create two copies of the technical_indicators DataFrame
technical_indicators_binary = technical_indicators.copy()
technical_indicators_signed = technical_indicators.copy()

# Add a new column called "Label" to each DataFrame, initialized with 0
technical_indicators_binary['Label'] = 0
technical_indicators_signed['Label'] = 0

# Define a function to label the data based on the fixed number of days (d)
def label_data_fixed_days(row_index, d, close_prices, binary=True):
    if row_index + d >= len(close_prices):
        return None
    current_price = close_prices.iloc[row_index]
    future_price = close_prices.iloc[row_index + d]
    if binary:
        return 1 if future_price > current_price else 0
    else:
        return 1 if future_price > current_price else -1

# Iterate over the rows of the DataFrame and apply the label_data_fixed_days function
d = 10  # Choose the appropriate number of days
for idx in range(len(technical_indicators)):
    binary_label = label_data_fixed_days(idx, d, apple_data['Close'], binary=True)
    signed_label = label_data_fixed_days(idx, d, apple_data['Close'], binary=False)
    if binary_label is not None and signed_label is not None:
        technical_indicators_binary.loc[technical_indicators_binary.index[idx], 'Label'] = binary_label
        technical_indicators_signed.loc[technical_indicators_signed.index[idx], 'Label'] = signed_label

# Remove the rows without a label (the last d rows)
technical_indicators_binary = technical_indicators_binary[:-d]
technical_indicators_signed = technical_indicators_signed[:-d]

print("Technical Indicators with Binary Labels:")
print(technical_indicators_binary.head())
print("\nTechnical Indicators with Signed Labels:")
print(technical_indicators_signed.head())

Technical Indicators with Binary Labels:
        Date       SMA7      SMA21       EMA7      EMA21      MACD  \
0 2013-05-31  15.819689  15.884247  15.854073  15.718508  0.162860   
1 2013-06-03  15.858033  15.908804  15.890066  15.743921  0.164971   
2 2013-06-04  15.895848  15.925611  15.920716  15.768352  0.165912   
3 2013-06-05  15.922257  15.929380  15.935013  15.787402  0.161984   
4 2013-06-06  15.936182  15.921695  15.921839  15.796031  0.149436   

   MACD_signal        RSI        ADX   BB_upper  BB_middle   BB_lower  \
0     0.189083  65.961848  13.922930  15.904342  16.291899  15.516784   
1     0.184261  67.389231  13.815190  15.921259  16.293875  15.548642   
2     0.180591  67.888503  13.864480  15.926953  16.301498  15.552409   
3     0.176870  65.327625  13.868770  15.923664  16.295193  15.552135   
4     0.171383  58.764015  13.032459  15.907752  16.257080  15.558425   

   Stochastic_Oscillator  Williams_R  Price_ROC           OBV  Label  
0              54.908326  -4

In [183]:
# Save the data to CSV files
technical_indicators_binary.to_csv('/home/jovyan/technical_indicators_binary.csv', index=False)
technical_indicators_signed.to_csv('/home/jovyan/technical_indicators_signed.csv', index=False)