# Data extraction

In [47]:
import yfinance as yf
from datetime import datetime, timedelta

tickers = yf.Tickers("AAPL")
end_date = datetime(2024, 6, 1)
start_date = end_date - timedelta(days=100)
data = tickers.history(start=start_date, end=end_date)
closing_price = data['Close']
print(closing_price)



[*********************100%***********************]  1 of 1 completed

Ticker            AAPL
Date                  
2024-02-22  183.263214
2024-02-23  181.424316
2024-02-26  180.072495
2024-02-27  181.533661
2024-02-28  180.330917
...                ...
2024-05-24  189.095657
2024-05-28  189.105606
2024-05-29  189.404221
2024-05-30  190.399567
2024-05-31  191.355103

[70 rows x 1 columns]





# Feature Construction

In [58]:
import pandas as pd
import numpy as np

# Daily Return
daily_return = closing_price.pct_change()
daily_return.columns = ['ret_1d']

# 5-Day Return
ret_5d = closing_price.pct_change(5)
ret_5d.columns = ['ret_5d']

# 10-Day Return
ret_10d = closing_price.pct_change(10)
ret_10d.columns=['ret_10d']
# 5-Day Volatility (Std Dev of Returns)
vol_5d = closing_price.pct_change().rolling(window=5).std()
vol_5d.columns=['vol_5d']

# 10-Day Volatility
vol_10d = closing_price.pct_change().rolling(window=10).std()
vol_10d.columns=['vol_10d']

# Momentum (10d)
momentum_10d = closing_price - closing_price.shift(10)
momentum_10d.columns=['momentum_10d']

# SMA_10/SMA_50 Ratio
sma_10 = closing_price.rolling(window=10).mean()
sma_50 = closing_price.rolling(window=50).mean()
sma_ratio = sma_10/sma_50
sma_ratio.columns = ['sma_ratio_10_50']

# Z-score (20d)
rolling_mean = closing_price.rolling(window=20).mean()
rolling_std = closing_price.rolling(window=20).std()
z_score_20d = (closing_price - rolling_mean)/rolling_std
z_score_20d.columns = ['z_score_20d']
# RSI (14d)
delta = closing_price.diff()
gain = delta.where(delta > 0, 0.0)
loss = -delta.where(delta < 0, 0.0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain/avg_loss
rsi_14 = 100 - (100 / (1 + rs))
rsi_14.columns = ['rsi_14']
# Features DataFrame

y =  closing_price.pct_change(-1)*100
y.columns = ['y']
features = pd.concat([
    daily_return,
    ret_5d,
    ret_10d,
    vol_5d,
    vol_10d,
    sma_ratio,
    z_score_20d,
    rsi_14,
    y

],axis=1).dropna()
features.head()



Unnamed: 0_level_0,ret_1d,ret_5d,ret_10d,vol_5d,vol_10d,sma_ratio_10_50,z_score_20d,rsi_14,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-05-02,0.022032,0.018483,0.03586,0.01878,0.013969,0.981456,1.071761,43.96431,-5.644028
2024-05-03,0.059816,0.083166,0.111394,0.030386,0.021461,0.99223,3.019514,64.992992,0.919054
2024-05-06,-0.009107,0.04732,0.095695,0.031811,0.022334,1.00153,2.147229,68.126998,-0.378278
2024-05-07,0.003797,0.070862,0.09287,0.0283,0.022388,1.010377,1.928064,71.608638,-0.186069
2024-05-08,0.001864,0.079386,0.081174,0.027087,0.022459,1.018323,1.720721,74.006098,-0.991494
