### BTC 


In [368]:
import pandas as pd
import numpy as np
import datetime
from cryptocmd import CmcScraper
# Plots
# ==============================================================================
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
plt.style.use('ggplot')
# Bitcoin colors
# ==============================================================================
palette_btc = {'orange': '#f7931a',
               'white' : '#ffffff',
               'gray'  : '#4d4d4d',
               'blue'  : '#0d579b',
               'green' : '#329239'
              }

# Modelling and Forecasting
# ==============================================================================
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import backtesting_forecaster
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

#Edition IC3 added
#===============================================================================
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import jaccard_score
from sklearn.metrics import classification_report, confusion_matrix

%matplotlib inline

In [369]:
# Data download
# ==============================================================================

# Scraper is initialized, symbol, start and end of download are included

coin = 'BTC'
start_time = '01-03-2023'
end_time = '16-09-2023'

scraper = CmcScraper(coin , start_time, end_time)

# Transform collected data into a dataframe
data = scraper.get_dataframe()
cdf = data
data.sort_values(by='Date', ascending=True, inplace=True)

pd.set_option('display.max_columns', None)
display(data)
pd.reset_option('display.max_columns')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
198,2023-03-01,23150.929958,23880.632463,23088.626700,23646.549899,2.466284e+10,4.565082e+11
197,2023-03-02,23647.019473,23739.138077,23245.021433,23475.466738,2.038640e+10,4.532266e+11
196,2023-03-03,23476.633731,23479.348094,22213.237594,22362.679332,2.606240e+10,4.317655e+11
195,2023-03-04,22362.923027,22405.176954,22198.981009,22353.350074,1.116601e+10,4.316073e+11
194,2023-03-05,22354.143598,22613.685231,22307.143313,22435.514197,1.331700e+10,4.332162e+11
...,...,...,...,...,...,...,...
4,2023-09-11,25831.715501,25883.947003,24930.297494,25162.655272,1.460001e+10,4.902380e+11
3,2023-09-12,25160.657925,26451.938498,25133.078909,25833.343060,1.865728e+10,5.033280e+11
2,2023-09-13,25837.554963,26376.113469,25781.123728,26228.324088,1.307208e+10,5.110474e+11
1,2023-09-14,26228.277860,26774.623357,26171.451852,26539.674442,1.381136e+10,5.171408e+11


In [370]:
# Data preparation
# ==============================================================================
data['date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d %H:%M:%S')
data = data.loc[:, ['date', 'Open', 'Close', 'High', 'Low']]
data = data.rename({'Open': 'open', 'Close': 'close', 'High': 'high', 'Low': 'low'}, 
                    axis=1)
data = data.set_index('date')
data = data.asfreq('D')
data = data.sort_index()

In [371]:
# Dict with Bitcoin halvings info
# ==============================================================================
btc_halving = {'halving'              : [0, 1 , 2, 3, 4],
               'date'                 : ['2009-01-03', '2012-11-28', 
                                         '2016-07-09', '2020-05-11', np.nan],
               'reward'               : [50, 25, 12.5, 6.25, 3.125],
               'halving_block_number' : [0, 210000, 420000 ,630000, 840000]
              }

In [372]:
# Next halving calculation
# The remaining blocks according to the coinmarketcap.com website for 
# the next halving as of 2022-01-14 are taken as a starting point
# ==============================================================================
remaining_blocks = 121400
blocks_per_day = 144

days = remaining_blocks / blocks_per_day

next_halving = pd.to_datetime('2022-01-14', format='%Y-%m-%d') + datetime.timedelta(days=days)
next_halving = next_halving.replace(microsecond=0, second=0, minute=0, hour=0)
next_halving = next_halving.strftime('%Y-%m-%d')

btc_halving['date'][-1] = next_halving


In [373]:
# Include rewards and countdown to next halving in dataset
# ==============================================================================
data['reward'] = np.nan
data['countdown_halving'] = np.nan

for i in range(len(btc_halving['halving'])-1):
     
    # Start and end date of each halving
    if btc_halving['date'][i] < data.index.min().strftime('%Y-%m-%d'):
        start_date = data.index.min().strftime('%Y-%m-%d')
    else:
        start_date = btc_halving['date'][i]
        
    end_date = btc_halving['date'][i+1]
    mask = (data.index >= start_date) & (data.index < end_date)
        
    # Fill column 'reward' with mining rewards
    data.loc[mask, 'reward'] = btc_halving['reward'][i]
    
    # Fill column 'countdown_halving' with remaining days
    time_to_next_halving = pd.to_datetime(end_date) - pd.to_datetime(start_date)
    
    data.loc[mask, 'countdown_halving'] = np.arange(time_to_next_halving.days)[::-1][:mask.sum()]

In [374]:
# Check that the data have been created correctly
# ==============================================================================
print('Second halving:', btc_halving['date'][2])
#display(data.loc['2016-07-08':'2016-07-09'])
display(data.loc[start_time:'2016-07-09'])
print('')
print('Third halving:', btc_halving['date'][3])
display(data.loc['2020-05-10':'2020-05-11'])
print('')
print('Next halving:', btc_halving['date'][4])
data.tail(2)

Second halving: 2016-07-09


Unnamed: 0_level_0,open,close,high,low,reward,countdown_halving
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1



Third halving: 2020-05-11


Unnamed: 0_level_0,open,close,high,low,reward,countdown_halving
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1



Next halving: 2024-05-06


Unnamed: 0_level_0,open,close,high,low,reward,countdown_halving
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-09-14,26228.27786,26539.674442,26774.623357,26171.451852,6.25,234.0
2023-09-15,26533.818838,26608.693365,26840.497925,26240.700625,6.25,233.0


In [375]:
# Interactive candlestick chart with Plotly
# ==============================================================================
candlestick = go.Candlestick(
                  x     = data.index,
                  open  = data.open,
                  close = data.close,
                  low   = data.low,
                  high  = data.high,
              ) 

fig = go.Figure(data=[candlestick])

fig.update_layout(
    width       = 900,
    height      = 450,
    title       = dict(text='<b>%s/USD Chart</b>' %coin, font=dict(size=30)),
    yaxis_title = dict(text='Price (USD)', font=dict(size=15)),
    margin      = dict(l=10, r=20, t=80, b=20),
    shapes      = [dict(x0=btc_halving['date'][2], x1=btc_halving['date'][2], 
                        y0=0, y1=1, xref='x', yref='paper', line_width=2),
                   dict(x0=btc_halving['date'][3], x1=btc_halving['date'][3], 
                        y0=0, y1=1, xref='x', yref='paper', line_width=2),
                   dict(x0=btc_halving['date'][4], x1=btc_halving['date'][4], 
                        y0=0, y1=1, xref='x', yref='paper', line_width=2)
                  ],
    annotations = [dict(x=btc_halving['date'][2], y=1, xref='x', yref='paper',
                      showarrow=False, xanchor='left', text='Second halving'),
                   dict(x=btc_halving['date'][3], y=1, xref='x', yref='paper',
                      showarrow=False, xanchor='left', text='Third halving'),
                   dict(x=btc_halving['date'][4], y=1, xref='x', yref='paper',
                      showarrow=False, xanchor='left', text='Fourth halving')
                  ],
    xaxis_rangeslider_visible = False,
)

fig.show()

In [376]:
#print(cdf.head())

#X = np.asarray(cdf[['Date','Open','High','Low','Volume','Market Cap']])
#y = np.asarray(cdf[['Close']])

X = cdf[['Date','Open','High','Low','Volume','Market Cap']]

Date_time = np.asarray(X[['Date']])

#print(Date_time)
normal_time = []

for i in Date_time:
    item = str(i)
    tival = [int(item[2:6:]) ,  int(item[7:9:]) , int(item[10:12:])]
    normal_time.append(tival)

print(normal_time)
print(normal_time[0])
print(normal_time[-1])

import datetime

d1 = datetime.date(normal_time[0][0], normal_time[0][1], normal_time[0][2])
d2 = datetime.date(normal_time[-1][0], normal_time[-1][1], normal_time[-1][2])
x_days = int((d2 - d1).days + 1)

date_data = np.arange(x_days)
print(date_data)

cdf = cdf.drop('Date', axis=1)
cdf['Date'] = np.array(date_data)

columns_titles = ['Date','Open','High','Low','Close','Volume','Market Cap']
cdf=cdf.reindex(columns=columns_titles)

print(cdf.head())

[[2023, 3, 1], [2023, 3, 2], [2023, 3, 3], [2023, 3, 4], [2023, 3, 5], [2023, 3, 6], [2023, 3, 7], [2023, 3, 8], [2023, 3, 9], [2023, 3, 10], [2023, 3, 11], [2023, 3, 12], [2023, 3, 13], [2023, 3, 14], [2023, 3, 15], [2023, 3, 16], [2023, 3, 17], [2023, 3, 18], [2023, 3, 19], [2023, 3, 20], [2023, 3, 21], [2023, 3, 22], [2023, 3, 23], [2023, 3, 24], [2023, 3, 25], [2023, 3, 26], [2023, 3, 27], [2023, 3, 28], [2023, 3, 29], [2023, 3, 30], [2023, 3, 31], [2023, 4, 1], [2023, 4, 2], [2023, 4, 3], [2023, 4, 4], [2023, 4, 5], [2023, 4, 6], [2023, 4, 7], [2023, 4, 8], [2023, 4, 9], [2023, 4, 10], [2023, 4, 11], [2023, 4, 12], [2023, 4, 13], [2023, 4, 14], [2023, 4, 15], [2023, 4, 16], [2023, 4, 17], [2023, 4, 18], [2023, 4, 19], [2023, 4, 20], [2023, 4, 21], [2023, 4, 22], [2023, 4, 23], [2023, 4, 24], [2023, 4, 25], [2023, 4, 26], [2023, 4, 27], [2023, 4, 28], [2023, 4, 29], [2023, 4, 30], [2023, 5, 1], [2023, 5, 2], [2023, 5, 3], [2023, 5, 4], [2023, 5, 5], [2023, 5, 6], [2023, 5, 7], [202

In [377]:
#print(cdf[['Open']])
#print(cdf[['Close']])
#print(cdf.head())

In [378]:
close_data = np.asarray(cdf[['Close']]).astype('float')
open_data = np.asarray(cdf[['Open']]).astype('float')

Cross = []

for n , i in enumerate(open_data):
    if int(open_data[n]) >= int(close_data[n]):
        res = 0
    else :
        res = 1
    Cross.append(res)

cdf['Cross'] = np.array(Cross)

print(cdf.head())

     Date          Open          High           Low         Close  \
198     0  23150.929958  23880.632463  23088.626700  23646.549899   
197     1  23647.019473  23739.138077  23245.021433  23475.466738   
196     2  23476.633731  23479.348094  22213.237594  22362.679332   
195     3  22362.923027  22405.176954  22198.981009  22353.350074   
194     4  22354.143598  22613.685231  22307.143313  22435.514197   

           Volume    Market Cap  Cross  
198  2.466284e+10  4.565082e+11      1  
197  2.038640e+10  4.532266e+11      0  
196  2.606240e+10  4.317655e+11      0  
195  1.116601e+10  4.316073e+11      0  
194  1.331700e+10  4.332162e+11      1  



Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)



In [379]:
x_data = np.asarray(cdf[['Date','Open','Close','High','Low','Volume','Market Cap']])
y_data = np.asarray(cdf[['Cross']])

In [380]:
min_max_scaler = MinMaxScaler()

sc_data_x = min_max_scaler.fit_transform(x_data)
sc_data_y = min_max_scaler.fit_transform(y_data)

In [381]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( sc_data_x, sc_data_y, test_size=0.2)
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

Train set: (159, 7) (159, 1)
Test set: (40, 7) (40, 1)


In [382]:
from sklearn import svm
#'linear','poly','rbf','sigmoid'
kernel = 'poly'
clf = svm.SVC(kernel=kernel)
clf.fit(X_train, y_train) 
yhat = clf.predict(X_test)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



In [383]:
svm_yaht = yhat

cnf_matrix = confusion_matrix(y_test, yhat, labels=[0,1])
np.set_printoptions(precision=2)

In [384]:
f1 = f1_score(y_test,yhat) 
js = jaccard_score(y_test,yhat)

svm = [f1 , js] 
print('SVM F1 score : ' +  str(svm[0]) , '\n' + 'SVM Jaccard score : ' + str(svm[1]))

SVM F1 score : 0.962962962962963 
SVM Jaccard score : 0.9285714285714286


In [385]:
print(cdf.head())
print(X_test)
#clf.predict()

     Date          Open          High           Low         Close  \
198     0  23150.929958  23880.632463  23088.626700  23646.549899   
197     1  23647.019473  23739.138077  23245.021433  23475.466738   
196     2  23476.633731  23479.348094  22213.237594  22362.679332   
195     3  22362.923027  22405.176954  22198.981009  22353.350074   
194     4  22354.143598  22613.685231  22307.143313  22435.514197   

           Volume    Market Cap  Cross  
198  2.466284e+10  4.565082e+11      1  
197  2.038640e+10  4.532266e+11      0  
196  2.606240e+10  4.317655e+11      0  
195  1.116601e+10  4.316073e+11      0  
194  1.331700e+10  4.332162e+11      1  
[[0.64 0.94 0.91 0.92 0.96 0.14 0.91]
 [0.04 0.14 0.02 0.13 0.05 0.51 0.02]
 [0.16 0.73 0.71 0.71 0.75 0.14 0.7 ]
 [0.23 0.9  0.9  0.89 0.95 0.15 0.89]
 [0.37 0.59 0.58 0.58 0.64 0.09 0.58]
 [0.33 0.83 0.77 0.83 0.8  0.21 0.77]
 [0.21 0.84 0.89 0.89 0.9  0.3  0.88]
 [0.41 0.58 0.59 0.58 0.63 0.11 0.59]
 [0.42 0.59 0.62 0.62 0.65 0.17 0.6

In [386]:
"""scraper = CmcScraper(coin ,'16-09-2023')

# Transform collected data into a dataframe
data = scraper.get_dataframe()
cdf = data
data.sort_values(by='Date', ascending=True, inplace=True)

pd.set_option('display.max_columns', None)
display(data)
pd.reset_option('display.max_columns')"""

"scraper = CmcScraper(coin ,'16-09-2023')\n\n# Transform collected data into a dataframe\ndata = scraper.get_dataframe()\ncdf = data\ndata.sort_values(by='Date', ascending=True, inplace=True)\n\npd.set_option('display.max_columns', None)\ndisplay(data)\npd.reset_option('display.max_columns')"

In [389]:
print(cdf.tail())
print(cdf.head())
last = cdf.tail(1)[['Date','Open','Close','High','Low','Volume','Market Cap']]
print(last)

   Date          Open          High           Low         Close        Volume  \
4   194  25831.715501  25883.947003  24930.297494  25162.655272  1.460001e+10   
3   195  25160.657925  26451.938498  25133.078909  25833.343060  1.865728e+10   
2   196  25837.554963  26376.113469  25781.123728  26228.324088  1.307208e+10   
1   197  26228.277860  26774.623357  26171.451852  26539.674442  1.381136e+10   
0   198  26533.818838  26840.497925  26240.700625  26608.693365  1.147974e+10   

     Market Cap  Cross  
4  4.902380e+11      0  
3  5.033280e+11      1  
2  5.110474e+11      1  
1  5.171408e+11      1  
0  5.185173e+11      1  
     Date          Open          High           Low         Close  \
198     0  23150.929958  23880.632463  23088.626700  23646.549899   
197     1  23647.019473  23739.138077  23245.021433  23475.466738   
196     2  23476.633731  23479.348094  22213.237594  22362.679332   
195     3  22362.923027  22405.176954  22198.981009  22353.350074   
194     4  22354.1