In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression

In [16]:


crypto_df = pd.read_csv(
    Path("../Resources/BitcoinData.csv"),
    index_col='Date', 
    infer_datetime_format=True, 
    parse_dates=True
)


crypto_df.head()



Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-10-30,20809.80881,20910.0964,20520.35815,20621.7479,64733980000.0,397080000000.0
2022-10-29,20595.37894,21003.55731,20563.937,20811.98837,78150630000.0,398209800000.0
2022-10-28,20292.40585,20733.95162,20074.037,20608.2579,86808560000.0,390706400000.0
2022-10-27,20766.80989,20860.75336,20249.7547,20283.97975,97985670000.0,395221800000.0
2022-10-26,20081.82064,20943.08464,20065.93368,20754.02397,111641900000.0,393580900000.0


In [6]:
#crypto_df = crypto_df.sort_values(by=['Date'], ascending = True)

In [7]:
crypto_df.fillna(0, inplace=True)

lags = 5

for i in range(0, lags):
    crypto_df['Lag_' + str(i+1)] = crypto_df['Close'].shift(i+1)
    
crypto_df['Returns'] = crypto_df['Close'].pct_change()
    
crypto_df.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap,Lag_1,Lag_2,Lag_3,Lag_4,Lag_5,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-10-30,20809.80881,20910.0964,20520.35815,20621.7479,64733980000.0,397080000000.0,,,,,,
2022-10-29,20595.37894,21003.55731,20563.937,20811.98837,78150630000.0,398209800000.0,20621.7479,,,,,0.009225
2022-10-28,20292.40585,20733.95162,20074.037,20608.2579,86808560000.0,390706400000.0,20811.98837,20621.7479,,,,-0.009789
2022-10-27,20766.80989,20860.75336,20249.7547,20283.97975,97985670000.0,395221800000.0,20608.2579,20811.98837,20621.7479,,,-0.015735
2022-10-26,20081.82064,20943.08464,20065.93368,20754.02397,111641900000.0,393580900000.0,20283.97975,20608.2579,20811.98837,20621.7479,,0.023173
2022-10-25,19330.2129,20388.4725,19252.5634,20110.87556,62423040000.0,375485700000.0,20754.02397,20283.97975,20608.2579,20811.98837,20621.7479,-0.030989
2022-10-24,19564.5146,19578.3784,19191.1109,19335.3937,57568930000.0,370900500000.0,20110.87556,20754.02397,20283.97975,20608.2579,20811.98837,-0.03856
2022-10-23,19203.7979,19669.50723,19112.1147,19548.7249,29334480000.0,369183600000.0,19335.3937,20110.87556,20754.02397,20283.97975,20608.2579,0.011033
2022-10-22,19162.5376,19248.5639,19119.8239,19202.85297,51155990000.0,367465100000.0,19548.7249,19335.3937,20110.87556,20754.02397,20283.97975,-0.017693
2022-10-21,19042.7405,19231.1974,18743.2771,19166.3286,56719070000.0,365349700000.0,19202.85297,19548.7249,19335.3937,20110.87556,20754.02397,-0.001902


In [8]:
crypto_df['Direction'] = np.sign(crypto_df['Returns'])

crypto_df = crypto_df.dropna()
crypto_df = crypto_df.drop(crypto_df.index[0:5])

##remove rows with any values that are not infinite
crypto_df = crypto_df[np.isfinite(crypto_df).all(1)]

crypto_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap,Lag_1,Lag_2,Lag_3,Lag_4,Lag_5,Returns,Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-10-20,19123.62910,19328.00080,18942.90900,19042.93090,5.044882e+10,3.664468e+11,19166.3286,19202.85297,19548.72490,19335.39370,20110.87556,-0.006438,-1.0
2022-10-19,19329.17630,19337.07970,19115.23310,19124.98920,6.107692e+10,3.682275e+11,19042.9309,19166.32860,19202.85297,19548.72490,19335.39370,0.004309,1.0
2022-10-18,19546.01540,19685.47556,19116.74420,19351.06970,6.274522e+10,3.732188e+11,19124.9892,19042.93090,19166.32860,19202.85297,19548.72490,0.011821,1.0
2022-10-17,19256.40623,19650.75750,19164.44404,19558.11170,4.874478e+10,3.717332e+11,19351.0697,19124.98920,19042.93090,19166.32860,19202.85297,0.010699,1.0
2022-10-16,19066.05740,19397.58735,19066.05740,19259.62065,3.143498e+10,3.672814e+11,19558.1117,19351.06970,19124.98920,19042.93090,19166.32860,-0.015262,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-11-04,10.80010,10.80010,10.80010,10.80010,0.000000e+00,0.000000e+00,10.7480,10.89900,10.92000,10.92500,10.81500,0.004847,1.0
2012-11-03,10.64250,10.64250,10.64250,10.64250,0.000000e+00,0.000000e+00,10.8001,10.74800,10.89900,10.92000,10.92500,-0.014592,-1.0
2012-11-02,10.46880,10.46880,10.46880,10.46880,0.000000e+00,0.000000e+00,10.6425,10.80010,10.74800,10.89900,10.92000,-0.016321,-1.0
2012-11-01,10.57000,10.57000,10.57000,10.57000,0.000000e+00,0.000000e+00,10.4688,10.64250,10.80010,10.74800,10.89900,0.009667,1.0


In [9]:
X = crypto_df[['Lag_1', 'Lag_2']]
y = crypto_df['Direction']

crypto_df.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap,Lag_1,Lag_2,Lag_3,Lag_4,Lag_5,Returns,Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-10-20,19123.6291,19328.0008,18942.909,19042.9309,50448820000.0,366446800000.0,19166.3286,19202.85297,19548.7249,19335.3937,20110.87556,-0.006438,-1.0
2022-10-19,19329.1763,19337.0797,19115.2331,19124.9892,61076920000.0,368227500000.0,19042.9309,19166.3286,19202.85297,19548.7249,19335.3937,0.004309,1.0
2022-10-18,19546.0154,19685.47556,19116.7442,19351.0697,62745220000.0,373218800000.0,19124.9892,19042.9309,19166.3286,19202.85297,19548.7249,0.011821,1.0
2022-10-17,19256.40623,19650.7575,19164.44404,19558.1117,48744780000.0,371733200000.0,19351.0697,19124.9892,19042.9309,19166.3286,19202.85297,0.010699,1.0
2022-10-16,19066.0574,19397.58735,19066.0574,19259.62065,31434980000.0,367281400000.0,19558.1117,19351.0697,19124.9892,19042.9309,19166.3286,-0.015262,-1.0
2022-10-15,19182.2476,19219.5647,19026.2672,19054.8847,55450160000.0,366826900000.0,19259.62065,19558.1117,19351.0697,19124.9892,19042.9309,-0.01063,-1.0
2022-10-14,19380.65578,19891.3268,19096.9897,19191.0274,94607310000.0,374023200000.0,19054.8847,19259.62065,19558.1117,19351.0697,19124.9892,0.007145,1.0
2022-10-13,19155.03423,19454.6764,18290.6714,19380.6893,66306380000.0,364703100000.0,19191.0274,19054.8847,19259.62065,19558.1117,19351.0697,0.009883,1.0
2022-10-12,19060.6544,19206.1014,19024.1102,19157.97735,57229000000.0,366398500000.0,19380.6893,19191.0274,19054.8847,19259.62065,19558.1117,-0.011491,-1.0
2022-10-11,19133.07734,19256.37998,18889.95635,19045.5416,58945070000.0,365469500000.0,19157.97735,19380.6893,19191.0274,19054.8847,19259.62065,-0.005869,-1.0


In [10]:
start_test = pd.Timestamp("2012-11-03")

X_train = X[X.index < start_test]
X_test = X[X.index >= start_test]
y_train = y[y.index < start_test]
y_test = y[y.index >= start_test]



In [11]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

In [12]:
lr_accuracy = accuracy_score(y_test, y_pred)
lr_accuracy

0.5424567188788129

In [13]:
pred = (1.0 + (y_pred == y_test))/2.0
hit_rate = np.mean(pred)
print('Logistic Regression - Hit Rate of Bitcoin: {:.4f}'.format(hit_rate))

Logistic Regression - Hit Rate of Bitcoin: 0.7712
