In [58]:
# install yahoo! finance api
!pip install -q yfinance



In [2]:
# import required packages
import datetime as dt
import yfinance as yf

In [60]:
# download 521 days of Apple stock data
aapl = yf.Ticker('AAPLE')
date_from = str(dt.date.today() - dt.timedelta(days=521))
date_to = str(dt.date.today())

print(f'Downloading Apple stock data from {date_from} to {date_to}.')
data = yf.download("AAPL", start=date_from, end=date_to)

Downloading Apple stock data from 2021-12-27 to 2023-06-01.
[*********************100%***********************]  1 of 1 completed


In [61]:
# view data 
data.reset_index(inplace=True)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-12-27,177.089996,180.419998,177.070007,180.330002,178.768845,74919600
1,2021-12-28,180.160004,181.330002,178.529999,179.289993,177.737823,79144300
2,2021-12-29,179.330002,180.630005,178.139999,179.380005,177.827057,62348900
3,2021-12-30,179.470001,180.570007,178.089996,178.199997,176.657288,59773000
4,2021-12-31,178.089996,179.229996,177.259995,177.570007,176.032745,64062300


In [55]:
# check how many rows have null values
len(data[data.isna().any(axis=1)])

3109

In [82]:
def create_df(data, start = date_from, end = date_to):
    
    # since we have missing data for some days, create new df with consecutive dates and join with previous df

    # create column of dates
    date = pd.date_range(start, end)

    # create dataframe by joining stock prices
    data = pd.DataFrame(index=date).join(data.set_index('Date'),how='left')
    
    # initialize empty list
    ls = []
    
    for i in range(500):
        # create rolling window of 21 days 
         ls.append(list((data['Close'].iloc[i:i+21]).T))
        
    cols = [f'x{i}' for i in range(1, 21)] + ['y']
    res = pd.DataFrame(ls, columns = cols)
    return res

res = create_df(data, start, end)
res

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x12,x13,x14,x15,x16,x17,x18,x19,x20,y
0,180.330002,179.289993,179.380005,178.199997,177.570007,,,182.009995,179.699997,174.919998,...,172.169998,,,172.190002,175.080002,175.529999,172.190002,173.070007,,
1,179.289993,179.380005,178.199997,177.570007,,,182.009995,179.699997,174.919998,172.000000,...,,,172.190002,175.080002,175.529999,172.190002,173.070007,,,
2,179.380005,178.199997,177.570007,,,182.009995,179.699997,174.919998,172.000000,172.169998,...,,172.190002,175.080002,175.529999,172.190002,173.070007,,,,169.800003
3,178.199997,177.570007,,,182.009995,179.699997,174.919998,172.000000,172.169998,,...,172.190002,175.080002,175.529999,172.190002,173.070007,,,,169.800003,166.229996
4,177.570007,,,182.009995,179.699997,174.919998,172.000000,172.169998,,,...,175.080002,175.529999,172.190002,173.070007,,,,169.800003,166.229996,164.509995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,,,173.500000,171.770004,173.559998,173.750000,172.570007,,,172.070007,...,172.690002,175.050003,175.160004,,,174.199997,171.559998,171.839996,172.990005,175.429993
496,,173.500000,171.770004,173.559998,173.750000,172.570007,,,172.070007,172.070007,...,175.050003,175.160004,,,174.199997,171.559998,171.839996,172.990005,175.429993,
497,173.500000,171.770004,173.559998,173.750000,172.570007,,,172.070007,172.070007,172.690002,...,175.160004,,,174.199997,171.559998,171.839996,172.990005,175.429993,,
498,171.770004,173.559998,173.750000,172.570007,,,172.070007,172.070007,172.690002,175.050003,...,,,174.199997,171.559998,171.839996,172.990005,175.429993,,,


In [76]:
# check to see how many ground truth labels are missing 
len(res[res['y'].isna()])

157

In [83]:
# impute na's with previous day's value 
res.fillna(method='ffill',axis = 1, inplace=True)

# impute na's with next day's value 
res.fillna(method='bfill',axis = 1, inplace=True)
res

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x12,x13,x14,x15,x16,x17,x18,x19,x20,y
0,180.330002,179.289993,179.380005,178.199997,177.570007,177.570007,177.570007,182.009995,179.699997,174.919998,...,172.169998,172.169998,172.169998,172.190002,175.080002,175.529999,172.190002,173.070007,173.070007,173.070007
1,179.289993,179.380005,178.199997,177.570007,177.570007,177.570007,182.009995,179.699997,174.919998,172.000000,...,172.169998,172.169998,172.190002,175.080002,175.529999,172.190002,173.070007,173.070007,173.070007,173.070007
2,179.380005,178.199997,177.570007,177.570007,177.570007,182.009995,179.699997,174.919998,172.000000,172.169998,...,172.169998,172.190002,175.080002,175.529999,172.190002,173.070007,173.070007,173.070007,173.070007,169.800003
3,178.199997,177.570007,177.570007,177.570007,182.009995,179.699997,174.919998,172.000000,172.169998,172.169998,...,172.190002,175.080002,175.529999,172.190002,173.070007,173.070007,173.070007,173.070007,169.800003,166.229996
4,177.570007,177.570007,177.570007,182.009995,179.699997,174.919998,172.000000,172.169998,172.169998,172.169998,...,175.080002,175.529999,172.190002,173.070007,173.070007,173.070007,173.070007,169.800003,166.229996,164.509995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,173.500000,173.500000,173.500000,171.770004,173.559998,173.750000,172.570007,172.570007,172.570007,172.070007,...,172.690002,175.050003,175.160004,175.160004,175.160004,174.199997,171.559998,171.839996,172.990005,175.429993
496,173.500000,173.500000,171.770004,173.559998,173.750000,172.570007,172.570007,172.570007,172.070007,172.070007,...,175.050003,175.160004,175.160004,175.160004,174.199997,171.559998,171.839996,172.990005,175.429993,175.429993
497,173.500000,171.770004,173.559998,173.750000,172.570007,172.570007,172.570007,172.070007,172.070007,172.690002,...,175.160004,175.160004,175.160004,174.199997,171.559998,171.839996,172.990005,175.429993,175.429993,175.429993
498,171.770004,173.559998,173.750000,172.570007,172.570007,172.570007,172.070007,172.070007,172.690002,175.050003,...,175.160004,175.160004,174.199997,171.559998,171.839996,172.990005,175.429993,175.429993,175.429993,175.429993


In [84]:
# sanity check to make sure we don't have any more na's
len(res[res.isna().any(axis=1)])

0

In [85]:
# save as csv 
res.to_csv('apple_stock.csv')