# Bitcoin Movement prediction

## Building Daily Dataset

In [84]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from pytrends.request import TrendReq
from pytrends import dailydata

import warnings
warnings.filterwarnings("ignore")

### Getting Crypto Data

In [85]:
btc = yf.download(tickers='BTC-USD', period = 'max', interval = '1d')
eth = yf.download(tickers='ETH-USD', period = 'max', interval = '1d')
bnb = yf.download(tickers='BNB-USD', period = 'max', interval = '1d')
ada = yf.download(tickers='ADA-USD', period = 'max', interval = '1d')
lite = yf.download(tickers='LTC-USD', period = 'max', interval = '1d') 

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [86]:
btc.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2014-09-19,424.102997,427.834991,384.532013,394.79599,394.79599,37919700


### Features Engineering

In [87]:
def prep(df):
  df['Close+1'] = df['Close'].shift(periods=-1)
  df['Close-7'] = df['Close'].shift(periods=7)
  df['Close-30'] = df['Close'].shift(periods=30)
  
  df['Return+1'] = round(((df['Close+1'] / df['Close']) - 1) * 100, 4)
  df['Return-7'] = round(((df['Close'] / df['Close-7']) - 1) * 100, 4)
  df['Return-30'] = round(((df['Close'] / df['Close-30']) - 1) * 100, 4)
  df['Return'] = round(((df['Close'] / df['Open']) - 1) * 100, 4)
  
  df['SMA30'] = abs(df['Return']).rolling(30).mean()
  
  df.loc[df['Return+1'] >= (df['SMA30']/2), 'target_multi'] = 1     #Buy
  df.loc[df['Return+1'] <= (-df['SMA30']/2), 'target_multi'] = -1    #Sell
  df.loc[df['Return+1'].between(-df['SMA30']/2, df['SMA30']/2, inclusive = False), 'target_multi'] = 0    #Keep
  
  df.loc[df['Return+1'] <= 0, 'target_bin'] = 0
  df.loc[df['Return+1'] > 0, 'target_bin'] = 1
  
  df['Month'] = df.index.month
  df['DayOfW'] = df.index.dayofweek
  
  df = df[['Close', 'Return', 'Return-7', 'Return+1', 'Return-30', 'target_multi', 'target_bin', 'Month', 'DayOfW']]
  df = df.dropna()
  
  df = df.loc['2017-01-01':]
  return df

In [88]:
data = prep(btc)
data.head(5)

Unnamed: 0_level_0,Close,Return,Return-7,Return+1,Return-30,target_multi,target_bin,Month,DayOfW
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-01-01,998.325012,3.5974,11.3975,2.3464,28.3287,1.0,1.0,1,6
2017-01-02,1021.75,2.3165,12.5759,2.162,32.4961,1.0,1.0,1,0
2017-01-03,1043.839966,2.177,11.8562,10.6233,34.8853,1.0,1.0,1,1
2017-01-04,1154.72998,10.564,18.3221,-12.241,52.1985,-1.0,0.0,1,2
2017-01-05,1013.380005,-12.3927,4.0969,-10.9711,32.6025,-1.0,0.0,1,3


### Calcul du return sur tous les fonds

In [89]:
def return_calc(df, name):
  df['Return_' + name] = round(((df['Close'] / df['Open']) - 1) * 100, 4)
  return df

In [90]:
list_df = [eth, bnb, ada, lite]
name_df = ['eth', 'bnb', 'ada', 'lite']
i = 0
l = []

for df in list_df:
  df['Return_' + name_df[i]] = round(((df['Close'] / df['Open']) - 1) * 100, 4)
  df['Close_' + name_df[i]] = df['Close']
  l.append(df.drop(columns=['Open', 'Close', 'High', 'Low', 'Adj Close', 'Volume']))
  i += 1

### Joining Stock Return

In [91]:
for df in l:
  data = data.join(df)

In [92]:
data.head(5)

Unnamed: 0_level_0,Close,Return,Return-7,Return+1,Return-30,target_multi,target_bin,Month,DayOfW,Return_eth,Close_eth,Return_bnb,Close_bnb,Return_ada,Close_ada,Return_lite,Close_lite
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-01-01,998.325012,3.5974,11.3975,2.3464,28.3287,1.0,1.0,1,6,,,,,,,4.2149,4.51159
2017-01-02,1021.75,2.3165,12.5759,2.162,32.4961,1.0,1.0,1,0,,,,,,,2.8424,4.64894
2017-01-03,1043.839966,2.177,11.8562,10.6233,34.8853,1.0,1.0,1,1,,,,,,,-0.6375,4.6262
2017-01-04,1154.72998,10.564,18.3221,-12.241,52.1985,-1.0,0.0,1,2,,,,,,,4.4046,4.83785
2017-01-05,1013.380005,-12.3927,4.0969,-10.9711,32.6025,-1.0,0.0,1,3,,,,,,,-12.6496,4.29272


### Adding Google Trends daily

In [93]:
from datetime import date

day = int(date.today().strftime('%d'))
mo = int(date.today().strftime('%m'))
#daily_trend = dailydata.get_daily_data("bitcoin", start_year=2017, start_mon=1, stop_year=2022, stop_mon=4)

In [94]:
#daily_trend.to_csv("DailyTrends.csv")
daily_trend = pd.read_csv("DailyTrends.csv")
daily_trend.index = daily_trend["date"]
daily_trend["Trend_Bitcoins"] = daily_trend["bitcoin"]
daily_trend = daily_trend.drop(columns=["date", "bitcoin_unscaled", "bitcoin_monthly", "isPartial", "scale", "bitcoin"])
daily_trend

Unnamed: 0_level_0,Trend_Bitcoins
date,Unnamed: 1_level_1
2017-01-01,2.10
2017-01-02,3.65
2017-01-03,3.10
2017-01-04,3.55
2017-01-05,5.00
...,...
2022-04-24,11.05
2022-04-25,13.94
2022-04-26,14.62
2022-04-27,13.94


In [95]:
data = data.join(daily_trend)
data.tail(5)

Unnamed: 0_level_0,Close,Return,Return-7,Return+1,Return-30,target_multi,target_bin,Month,DayOfW,Return_eth,Close_eth,Return_bnb,Close_bnb,Return_ada,Close_ada,Return_lite,Close_lite,Trend_Bitcoins
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2022-06-17,20471.482422,0.4207,-29.6121,-7.1018,-28.7211,-1.0,0.0,6,4,1.7352,1086.519287,2.9484,215.8918,2.5693,0.487392,6.663,47.684914,
2022-06-18,19017.642578,-7.1106,-32.9439,8.0748,-37.2652,1.0,1.0,6,5,-8.5367,993.63678,-8.6985,197.042999,-6.3987,0.456182,-0.9111,47.247257,
2022-06-19,20553.271484,8.1131,-23.2017,0.2251,-29.6139,0.0,1.0,6,6,13.5148,1127.656494,9.1212,214.920532,6.2272,0.484364,14.9989,54.323166,
2022-06-20,20599.537109,0.2246,-8.3952,0.5391,-30.0103,0.0,1.0,6,0,-0.0012,1127.642456,0.9561,216.934265,1.6222,0.492261,-2.6074,52.905594,
2022-06-21,20710.597656,0.5647,-6.7376,-2.6638,-31.7017,-1.0,0.0,6,1,-0.2383,1124.824585,1.1976,219.498627,-2.6542,0.479204,1.1511,53.507679,


#### Calcul de la correlation entre les fonds

In [96]:
def pearson(df, name):
  window_size = 5
  for x in name:
    df['R_' + str(x)] = df['Close'].rolling(window=window_size, center=False).corr(df['Close_' + str(x)])
  return df

In [97]:
df_corr = pearson(data, name_df)
df_corr.tail(5)

Unnamed: 0_level_0,Close,Return,Return-7,Return+1,Return-30,target_multi,target_bin,Month,DayOfW,Return_eth,...,Close_bnb,Return_ada,Close_ada,Return_lite,Close_lite,Trend_Bitcoins,R_eth,R_bnb,R_ada,R_lite
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-06-17,20471.482422,0.4207,-29.6121,-7.1018,-28.7211,-1.0,0.0,6,4,1.7352,...,215.8918,2.5693,0.487392,6.663,47.684914,,0.989679,0.877011,0.30791,0.137151
2022-06-18,19017.642578,-7.1106,-32.9439,8.0748,-37.2652,1.0,1.0,6,5,-8.5367,...,197.042999,-6.3987,0.456182,-0.9111,47.247257,,0.997017,0.96892,0.831552,0.4075
2022-06-19,20553.271484,8.1131,-23.2017,0.2251,-29.6139,0.0,1.0,6,6,13.5148,...,214.920532,6.2272,0.484364,14.9989,54.323166,,0.979437,0.986642,0.986288,0.352662
2022-06-20,20599.537109,0.2246,-8.3952,0.5391,-30.0103,0.0,1.0,6,0,-0.0012,...,216.934265,1.6222,0.492261,-2.6074,52.905594,,0.931141,0.967812,0.937647,0.401251
2022-06-21,20710.597656,0.5647,-6.7376,-2.6638,-31.7017,-1.0,0.0,6,1,-0.2383,...,219.498627,-2.6542,0.479204,1.1511,53.507679,,0.972212,0.994289,0.912578,0.700935


Drop nan

In [98]:
data_droped = df_corr.dropna()
print("Data before drop", data.shape)
print("Data after drop", data_droped.shape)
data.isna().sum()

data = data_droped

Data before drop (1998, 22)
Data after drop (1628, 22)


In [99]:
data = data[data.columns.drop(list(data.filter(regex='Close')))]

### Adding Blockchain Data

In [100]:
import os
import functools as ft

In [101]:
data['Timestamp'] =  data.index.strftime('%Y-%m-%d')
data['Date'] = data.index

dir = 'BlockchainData'
l = list()
for path in os.listdir(dir):
    f = os.path.join(dir, path)
    l.append(f)

dfs = []
for filename in l:
  temp = pd.read_csv(filename, index_col=None, header=0)
  temp['Timestamp'] = pd.to_datetime(temp['Timestamp']).apply(lambda x: x.strftime('%Y-%m-%d'))
  temp = temp.drop_duplicates(subset='Timestamp', keep='first')
  dfs.append(temp)

block_features = ft.reduce(lambda left, right: pd.merge(left, right, on='Timestamp'), dfs)
df_final = data.merge(block_features, how='left', on='Timestamp')
df_final = df_final.drop(columns='Timestamp')

In [102]:
df_final

Unnamed: 0,Return,Return-7,Return+1,Return-30,target_multi,target_bin,Month,DayOfW,Return_eth,Return_bnb,...,Date,avg-block-size,cost-per-transaction,hash-rate,miners-revenue,mvrv,my-wallet-n-users,n-transactions-per-block,nvt,transaction-fees-usd
0,10.4617,-6.5967,1.1626,12.4782,0.0,1.0,11,0,3.1564,10.5248,...,2017-11-13,,,,,,,,,
1,1.1319,-7.1193,10.2444,16.8638,1.0,1.0,11,1,6.5879,-5.7243,...,2017-11-14,,,,,,,,,
2,10.2608,-1.9324,7.6023,27.7692,1.0,1.0,11,2,-1.3632,-3.6101,...,2017-11-15,,,,,,,,,
3,7.4892,10.1925,-2.0669,40.4277,-1.0,0.0,11,3,-0.7554,1.3964,...,2017-11-16,,,,,,,,,
4,-1.8409,16.4827,1.0528,37.8898,0.0,1.0,11,4,0.6745,-4.4106,...,2017-11-17,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1623,-0.0230,-0.6236,2.5058,-11.0024,1.0,1.0,4,6,-0.4975,-0.6026,...,2022-04-24,0.850438,192.063434,2.399333e+08,4.263655e+07,1.617103,82484075.0,1298.198830,3.929012,266427.553606
1624,2.4972,-0.9012,-5.7858,-9.0841,-1.0,0.0,4,0,2.9560,1.3079,...,2022-04-25,1.168047,145.232338,2.273052e+08,4.028992e+07,1.607772,82489623.0,1712.450617,2.524912,501917.133698
1625,-5.7628,-8.1568,2.9479,-18.5881,1.0,1.0,4,1,-6.6684,-4.6469,...,2022-04-26,1.236938,128.941199,1.964366e+08,3.523924e+07,1.652366,82504407.0,1952.121429,2.425340,446260.344927
1626,2.9402,-5.1560,1.3575,-16.7350,1.0,1.0,4,2,2.8584,1.5260,...,2022-04-27,1.062319,147.717000,2.431303e+08,4.268253e+07,1.561197,82522143.0,1688.852941,2.423229,436397.895185


#### Export dataset to CSV

In [103]:
df_final.to_csv("DailyDataset.csv")