In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import Utils

In [2]:
btc = pd.read_csv("BTC-USD.csv")  # Read in file

In [3]:
btc

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-01-11,908.114990,919.447998,762.765015,777.757019,777.757019,310928992
1,2017-01-12,775.177979,826.245972,755.755981,804.833984,804.833984,222326000
2,2017-01-13,803.737000,829.000977,780.002991,823.984009,823.984009,168968000
3,2017-01-14,825.142029,835.085022,812.455994,818.411987,818.411987,93063296
4,2017-01-15,818.142029,823.307007,812.870972,821.797974,821.797974,71013600
...,...,...,...,...,...,...,...
1822,2022-01-07,43153.570313,43153.570313,41077.445313,41557.902344,41557.902344,84196607520
1823,2022-01-08,41561.464844,42228.941406,40672.277344,41733.941406,41733.941406,28066355845
1824,2022-01-09,41734.726563,42663.949219,41338.160156,41911.601563,41911.601563,21294384372
1825,2022-01-10,41910.230469,42199.484375,39796.570313,41821.261719,41821.261719,32104232331


In [4]:
# Prepare dataframe for transformations

btc = btc[['Date', 'Adj Close', 'Volume']]  # Select only needed columns

btc = btc.sort_values(by=['Date'], ascending = False).reset_index(drop=True)  # Sort data by date descending

In [17]:
# Feature Engineering

btc['Close_-1'] = btc['Adj Close'].shift(-1)  # Create a new column showing previous Adj Close

btc['Delta'] = (btc['Adj Close'] - btc['Close_-1']) / btc['Close_-1']  # Create a column showing day-over-day change as %

new_col = {'Delta_Class_Numeric':[]}
new_col = pd.DataFrame(new_col)
for x in np.arange(0,len(btc)):
    if btc.loc[x]['Delta'] >= 0.02:
        new_col.loc[x] = 1
    elif btc.loc[x]['Delta'] <= -0.02:
        new_col.loc[x] = -1
    else:
        new_col.loc[x] = 0
        
btc['Delta_Class_Numeric'] = new_col

btc['Close_-1_Delta'] = btc['Delta'].shift(-1)  # Create a new column showing previous Delta

In [19]:
# Create a new column that indicates if there was a run of 3 consecutive days of gains

new_col = {'Gain_Run':[]}
new_col = pd.DataFrame(new_col)
for x in np.arange(0,len(btc)-3):
    a = btc.loc[x]['Close_-1_Delta']
    b = btc.loc[x + 1]['Close_-1_Delta']
    c = btc.loc[x + 2]['Close_-1_Delta']
    if (a > 0) & (b > 0) & (c > 0):
        new_col.loc[x] = 1
    else:
        new_col.loc[x] = 0
        
btc['Gain_Run'] = new_col

In [20]:
# Create a new column that indicates if there was a run of 3 consecutive days of losses

new_col = {'Gain_Loss':[]}
new_col = pd.DataFrame(new_col)
for x in np.arange(0,len(btc)-3):
    a = btc.loc[x]['Close_-1_Delta']
    b = btc.loc[x + 1]['Close_-1_Delta']
    c = btc.loc[x + 2]['Close_-1_Delta']
    if (a < 0) & (b < 0) & (c < 0):
        new_col.loc[x] = 1
    else:
        new_col.loc[x] = 0
        
btc['Gain_Loss'] = new_col

In [21]:
# Sort data back to ascending by date

btc = btc.sort_values(by=['Date'], ascending = True).reset_index(drop=True)

In [22]:
# Feature engineering part 2 - create columns that are date ascending dependent

btc['7-Day_Delta_AVG'] = btc['Close_-1_Delta'].rolling(7).mean()

btc['30-Day_Delta_AVG'] = btc['Close_-1_Delta'].rolling(30).mean()

btc['7-Day_High'] = btc['Close_-1'].rolling(7).max()

btc['30-Day_High'] = btc['Close_-1'].rolling(30).max()

btc['365-Day_High'] = btc['Close_-1'].rolling(365).max()

btc['7-Day_Low'] = btc['Close_-1'].rolling(7).min()

btc['30-Day_Low'] = btc['Close_-1'].rolling(30).min()

btc['365-Day_Low'] = btc['Close_-1'].rolling(365).min()

btc['Diff_from_7-Day_High'] = (btc['Close_-1'] - btc['7-Day_High']) / btc['7-Day_High']

btc['Diff_from_7-Day_Low'] = (btc['Close_-1'] - btc['7-Day_Low']) / btc['7-Day_Low']

btc['7-Day_AVG_Price'] = btc['Close_-1'].rolling(7).mean()

btc['30-Day_AVG_Price'] = btc['Close_-1'].rolling(30).mean()

btc['50-Day_AVG_Price'] = btc['Close_-1'].rolling(50).mean()

btc['100-Day_AVG_Price'] = btc['Close_-1'].rolling(100).mean()

btc['7-Day_AG_to_30-Day_AVG'] = (btc['7-Day_AVG_Price'] - btc['30-Day_AVG_Price']) / btc['30-Day_AVG_Price']

btc['30-Day_AG_to_50-Day_AVG'] = (btc['30-Day_AVG_Price'] - btc['50-Day_AVG_Price']) / btc['50-Day_AVG_Price']

btc['50-Day_AG_to_100-Day_AVG'] = (btc['50-Day_AVG_Price'] - btc['100-Day_AVG_Price']) / btc['100-Day_AVG_Price']

btc['7_to_30_5-Day_AVG'] = btc['7-Day_AG_to_30-Day_AVG'].rolling(5).mean()

btc['30_to_50_5-Day_AVG'] = btc['30-Day_AG_to_50-Day_AVG'].rolling(5).mean()

btc['50_to_100_5-Day_AVG'] = btc['50-Day_AG_to_100-Day_AVG'].rolling(5).mean()

In [24]:
btc = btc.sort_values(by=['Date'], ascending = False).reset_index(drop=True)

In [25]:
btc

Unnamed: 0,Date,Adj Close,Volume,Close_-1,Delta,Delta_Class_Numeric,Close_-1_Delta,Gain_Run,Gain_Loss,7-Day_Delta_AVG,...,7-Day_AVG_Price,30-Day_AVG_Price,50-Day_AVG_Price,100-Day_AVG_Price,7-Day_AG_to_30-Day_AVG,30-Day_AG_to_50-Day_AVG,50-Day_AG_to_100-Day_AVG,7_to_30_5-Day_AVG,30_to_50_5-Day_AVG,50_to_100_5-Day_AVG
0,2022-01-11,41474.167969,31108497408,41821.261719,-0.008299,0.0,-0.002155,0.0,0.0,-0.014710,...,42807.459264,46878.698828,49523.212188,54863.891680,-0.086846,-0.053399,-0.097344,-0.064477,-0.057238,-0.086561
1,2022-01-10,41821.261719,32104232331,41911.601563,-0.002155,0.0,0.004257,0.0,0.0,-0.017079,...,43469.867188,47130.073698,49861.396485,54922.793945,-0.077662,-0.054778,-0.092155,-0.054211,-0.058829,-0.080869
2,2022-01-09,41911.601563,21294384372,41733.941406,0.004257,0.0,0.004236,0.0,0.0,-0.018710,...,44246.098214,47307.797136,50217.108360,54984.847344,-0.064719,-0.057935,-0.086710,-0.045013,-0.060447,-0.074962
3,2022-01-08,41733.941406,28066355845,41557.902344,0.004236,0.0,-0.037141,0.0,1.0,-0.015057,...,45096.508371,47505.736459,50544.821094,55005.416875,-0.050714,-0.060127,-0.081094,-0.037869,-0.062264,-0.068767
4,2022-01-07,41557.902344,84196607520,43160.929688,-0.037141,-1.0,-0.009366,0.0,1.0,-0.012390,...,45774.871652,47803.966276,50852.505781,55005.481485,-0.042446,-0.059949,-0.075501,-0.031427,-0.064649,-0.062182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1822,2017-01-15,821.797974,71013600,818.411987,0.004137,0.0,-0.006762,0.0,0.0,,...,,,,,,,,,,
1823,2017-01-14,818.411987,93063296,823.984009,-0.006762,0.0,0.023794,0.0,0.0,,...,,,,,,,,,,
1824,2017-01-13,823.984009,168968000,804.833984,0.023794,1.0,0.034814,,,,...,,,,,,,,,,
1825,2017-01-12,804.833984,222326000,777.757019,0.034814,1.0,,,,,...,,,,,,,,,,
