In [42]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [22]:
def calc_sma_and_bollinger_bands(df, sma_length):
    #Create a sma column for the df, setting all the values to NaN
    df["SMA"+str(sma_length)] = np.nan
    #Create an upper and lower bolinger band columns for the df, setting all the values to NaN
    df["Upper_Bollinger_Band"] = np.nan
    df["Lower_Bollinger_Band"] = np.nan

    #Create variables for the sum and sma
    sma_sum = 0
    sma = 0
    std_dev_sum = 0

    #Loop through the df
    for i in range(0, len(df.index)):
        #If i is less then 20 just sum the value and let the sma stay NaN
        if i < sma_length:
            sma_sum += df.iloc[i]["Close_VET"]

        #If i is greater than 20 add the next value and subtract the value from 20 places before
        elif i > sma_length:
            #Add the next day
            sma_sum += df.iloc[i]["Close_VET"]

            #Subtract the day 20 days before
            sma_sum -= df.iloc[i-sma_length]["Close_VET"]

            #Get the sma and set the sma column
            sma = sma_sum / float(sma_length)
            df["SMA"+str(sma_length)].iat[i] = sma
            
            #Reset standard deviation variables
            std_dev_sum = 0
            
            #Calculate std deviation
            for j in range(20):
                #Find average distance from moving average squared
                std_dev_sum += (df["Close_VET"][i-j] - sma)**2
            
            #Divide sum by num elements in the sum and take the square root
            std_dev = (std_dev_sum / float(sma_length))**0.5
            
            
            #Set the upper and lower bollinger bands 2 std deviations from the average
            df["Upper_Bollinger_Band"].iat[i] = sma + 2 * std_dev
            df["Lower_Bollinger_Band"].iat[i] = sma - 2 * std_dev

In [23]:
#Get the data
vet = yf.Ticker("VET-USD")
btc = yf.Ticker("BTC-USD")

#Get max daily data
hist_vet = vet.history(period="max", interval = "1d")
hist_btc = btc.history(period="max", interval = "1d")

#Drop unecessary cols
hist_vet.drop(["Dividends", "Stock Splits"], axis=1, inplace=True)
hist_btc.drop(["Dividends", "Stock Splits"], axis=1, inplace=True)

#Combine the two dfs into 1
hist = hist_vet.join(hist_btc, lsuffix="_VET", rsuffix="_BTC")

hist.head()

Unnamed: 0_level_0,Open_VET,High_VET,Low_VET,Close_VET,Volume_VET,Open_BTC,High_BTC,Low_BTC,Close_BTC,Volume_BTC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-08-03,0.015041,0.01573,0.014938,0.015307,46250600,7562.140137,7562.140137,7328.649902,7434.390137,4627150000
2018-08-04,0.015218,0.015944,0.012377,0.013333,27059500,7438.669922,7497.490234,6984.069824,7032.850098,4268390000
2018-08-05,0.013428,0.014296,0.012696,0.014081,19746000,7031.080078,7102.77002,6940.700195,7068.47998,3679110000
2018-08-06,0.013963,0.014308,0.01293,0.013424,18774100,7062.939941,7166.549805,6890.540039,6951.799805,3925900000
2018-08-07,0.013315,0.014457,0.012163,0.012899,21022400,6958.319824,7146.560059,6748.240234,6753.120117,4682800000


In [39]:
#Define sma size
sma_length = 20

#Get Bollinger Bands and SMA
calc_sma_and_bollinger_bands(hist, sma_length)

#Clip the front of the df with the NaN vals
hist = hist.iloc[sma_length+1:]
hist

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["SMA"+str(sma_length)] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Upper_Bollinger_Band"] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Lower_Bollinger_Band"] = np.nan


Unnamed: 0_level_0,Open_VET,High_VET,Low_VET,Close_VET,Volume_VET,Open_BTC,High_BTC,Low_BTC,Close_BTC,Volume_BTC,SMA20,Upper_Bollinger_Band,Lower_Bollinger_Band
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-01-17,0.004006,0.004323,0.003951,0.004262,11020121,3651.871094,3680.135986,3621.960938,3678.563965,5464420383,0.004132,0.004534,0.003730
2019-01-18,0.004260,0.004286,0.004009,0.004076,6375463,3677.990479,3682.520020,3637.080811,3657.839355,5002961727,0.004128,0.004531,0.003726
2019-01-19,0.004086,0.004256,0.004062,0.004138,5533946,3652.377930,3758.533447,3652.377930,3728.568359,5955691380,0.004124,0.004525,0.003723
2019-01-20,0.004123,0.004243,0.003915,0.004012,5761503,3725.446045,3743.387939,3583.019531,3601.013672,5582489560,0.004126,0.004524,0.003728
2019-01-21,0.004019,0.004234,0.004017,0.004130,6977052,3600.372803,3608.840820,3558.537109,3576.032471,5004347059,0.004125,0.004523,0.003728
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-08,0.155279,0.175926,0.151907,0.175682,1212996945,63344.066406,67673.742188,63344.066406,67566.828125,41125608330,0.140236,0.166977,0.113496
2021-11-09,0.175981,0.187511,0.170706,0.171228,1471849814,67549.734375,68530.335938,66382.062500,66971.828125,42357991721,0.142694,0.171277,0.114112
2021-11-10,0.170916,0.182355,0.150294,0.163078,1249491239,66953.335938,68789.625000,63208.113281,64995.230469,48730828378,0.144389,0.173578,0.115199
2021-11-11,0.162841,0.168964,0.158756,0.165503,740858843,64978.890625,65579.015625,64180.488281,64949.960938,35880633236,0.146163,0.175950,0.116376


In [57]:
hist = hist.iloc[:100]

In [92]:
#Define the number of historical days to use as features
num_hist_days = 2

#Create empty list of col names
column_names = []
#Create columns beforehand to use iat later
for j in range(num_hist_days):
        for key in hist.keys():    
            column_names.append("t-" + str(num_hist_days-j) + "_" + str(key))
            
            
# Create empty df to push data into
full_data = pd.DataFrame(columns=column_names, index=range(hist.shape[0]))        

full_data

Unnamed: 0,t-2_Open_VET,t-2_High_VET,t-2_Low_VET,t-2_Close_VET,t-2_Volume_VET,t-2_Open_BTC,t-2_High_BTC,t-2_Low_BTC,t-2_Close_BTC,t-2_Volume_BTC,...,t-1_Close_VET,t-1_Volume_VET,t-1_Open_BTC,t-1_High_BTC,t-1_Low_BTC,t-1_Close_BTC,t-1_Volume_BTC,t-1_SMA20,t-1_Upper_Bollinger_Band,t-1_Lower_Bollinger_Band
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,,,,,,,,,,,...,,,,,,,,,,
96,,,,,,,,,,,...,,,,,,,,,,
97,,,,,,,,,,,...,,,,,,,,,,
98,,,,,,,,,,,...,,,,,,,,,,


In [95]:
#First loop for rows past num_hist_days in original df (row indexer)
for i in range(num_hist_days, hist.shape[0]):
    #Second loop for creating historical features 
    for j in range(num_hist_days):
        #Third loop for copying all columns
        for k in range(len(hist.keys())):    
            full_data.iat[i-num_hist_days, k] = hist.iloc[i-j][hist.keys()[k]]

In [96]:

#TODO: NOT DONE YET, data alignment is still needed look below

full_data

Unnamed: 0,t-2_Open_VET,t-2_High_VET,t-2_Low_VET,t-2_Close_VET,t-2_Volume_VET,t-2_Open_BTC,t-2_High_BTC,t-2_Low_BTC,t-2_Close_BTC,t-2_Volume_BTC,...,t-1_Close_VET,t-1_Volume_VET,t-1_Open_BTC,t-1_High_BTC,t-1_Low_BTC,t-1_Close_BTC,t-1_Volume_BTC,t-1_SMA20,t-1_Upper_Bollinger_Band,t-1_Lower_Bollinger_Band
0,0.00426,0.004286,0.004009,0.004076,6375463.0,3677.990479,3682.52002,3637.080811,3657.839355,5002961727.0,...,,,,,,,,,,
1,0.004086,0.004256,0.004062,0.004138,5533946.0,3652.37793,3758.533447,3652.37793,3728.568359,5955691380.0,...,,,,,,,,,,
2,0.004123,0.004243,0.003915,0.004012,5761503.0,3725.446045,3743.387939,3583.019531,3601.013672,5582489560.0,...,,,,,,,,,,
3,0.004019,0.004234,0.004017,0.00413,6977052.0,3600.372803,3608.84082,3558.537109,3576.032471,5004347059.0,...,,,,,,,,,,
4,0.004129,0.004575,0.004117,0.004568,14820175.0,3575.081299,3620.746582,3539.721436,3604.577148,5313623556.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.006977,0.007092,0.006756,0.006758,14291177.0,5399.365723,5633.802246,5389.408691,5572.362305,15867308108.0,...,,,,,,,,,,
96,0.006758,0.006925,0.006197,0.006437,20123195.0,5571.508301,5642.044434,5418.263184,5464.866699,17048033399.0,...,,,,,,,,,,
97,0.00644,0.006625,0.005946,0.006003,15495653.0,5466.524414,5542.238281,5181.338867,5210.515625,15330283408.0,...,,,,,,,,,,
98,,,,,,,,,,,...,,,,,,,,,,
