In [2]:
import pandas as pd
import numpy as np


### 1. **Price-Based Features**
   - **Historical prices**: Daily opening, closing, high, and low prices.
   - **Volatility**: Measures of how much the price fluctuates over a given time period.
   - **Moving averages**: Short-term and long-term moving averages (e.g., 7-day, 30-day, 90-day).

### 2. **Volume-Based Features**
   - **Trade volume**: Total number of coins traded in a specific time period.
   - **Volume averages**: Moving averages of trade volumes.

### 3. **Market Sentiment**
   - **Social media sentiment**: Analysis of sentiments expressed on platforms like Twitter, Reddit, or specific cryptocurrency forums.
   - **News sentiment**: Sentiment analysis of news articles and press releases related to the cryptocurrency.

### 4. **Technical Indicators**
   - **Relative Strength Index (RSI)**: Measures the speed and change of price movements.
   - **Moving Average Convergence Divergence (MACD)**: Shows the relationship between two moving averages of prices.
   - **Bollinger Bands**: Provides a relative understanding of high and low prices.
   - **On-Balance Volume (OBV)**: Uses volume flow to predict changes in stock price.

### 5. **Market Data**
   - **Market cap**: Total market value of the crypto's circulating supply.
   - **Dominance percentage**: Percentage of total market cap that is comprised of the altcoin.

# Enhancements with Different Approaches 
 - **Application of Lags** 
 - **Technical indicators Interprerations turned features**    
 - **Tweets Filtering (Coin Specific + Bot precautions)** 

# Ethereum

In [27]:
ETH=pd.read_excel(r"C:\Users\sdas1\Desktop\TCD\Dissertation\Altcoin\Altcoin-Price-Prediction\Ethereum\2022Ethereum_Original_LowCloseRemoved.xlsx")

In [11]:
ETH

Unnamed: 0,Date,Gold_Price,Oil_Price,Nasdaq_Open,Nasdaq_Volume,S&P500_Open,S&P500_Volume,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment,Ethereum_Change
0,2016-01-01,1060.5,37.04,4897.649902,2218420000,2038.20,4304880000,434.334,3.173093e+07,6.521422e+09,13583,0.238774,0.000000,1
1,2016-01-02,1060.5,37.04,4897.649902,2218420000,2038.20,4304880000,433.438,3.117175e+07,6.461269e+09,13691,0.254835,0.000000,0
2,2016-01-03,1060.5,37.04,4897.649902,2218420000,2038.20,4304880000,430.011,3.715623e+07,6.494453e+09,13167,0.248901,0.000000,1
3,2016-01-04,1075.5,36.76,4897.649902,2218420000,2038.20,4304880000,433.091,3.669657e+07,6.504166e+09,15821,0.181530,0.000000,0
4,2016-01-05,1078.7,35.97,4917.839844,1927380000,2013.78,3706620000,431.960,3.357784e+07,6.471942e+09,16689,0.182860,0.153550,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2552,2022-12-27,1797.7,75.89,10462.190430,3827290000,3843.34,3030300000,16919.290,1.574858e+10,3.217172e+11,39816,0.097602,-0.054126,0
2553,2022-12-28,1797.7,75.89,10339.200195,3842970000,3829.56,3083520000,16716.400,1.700571e+10,3.185671e+11,42534,0.084050,-0.066367,0
2554,2022-12-29,1797.7,75.89,10321.459961,4154100000,3805.45,3003680000,16552.320,1.447224e+10,3.203093e+11,41316,0.099584,-0.052279,1
2555,2022-12-30,1797.7,75.89,10368.370117,3959030000,3829.06,2979870000,16641.330,1.592916e+10,3.195595e+11,40485,0.095240,-0.024360,0


In [12]:
ETH.drop(["Nasdaq_Volume","S&P500_Volume"],inplace=True,axis=1)

ETH.head()

Unnamed: 0,Date,Gold_Price,Oil_Price,Nasdaq_Open,S&P500_Open,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment,Ethereum_Change
0,2016-01-01,1060.5,37.04,4897.649902,2038.2,434.334,31730930.0,6521422000.0,13583,0.238774,0.0,1
1,2016-01-02,1060.5,37.04,4897.649902,2038.2,433.438,31171750.0,6461269000.0,13691,0.254835,0.0,0
2,2016-01-03,1060.5,37.04,4897.649902,2038.2,430.011,37156230.0,6494453000.0,13167,0.248901,0.0,1
3,2016-01-04,1075.5,36.76,4897.649902,2038.2,433.091,36696570.0,6504166000.0,15821,0.18153,0.0,0
4,2016-01-05,1078.7,35.97,4917.839844,2013.78,431.96,33577840.0,6471942000.0,16689,0.18286,0.15355,0


In [28]:
ETH['Date'] = pd.to_datetime(ETH['Date'])
ETH.set_index('Date', inplace=True)

In [14]:
ETH

Unnamed: 0_level_0,Gold_Price,Oil_Price,Nasdaq_Open,S&P500_Open,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment,Ethereum_Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-01,1060.5,37.04,4897.649902,2038.20,434.334,3.173093e+07,6.521422e+09,13583,0.238774,0.000000,1
2016-01-02,1060.5,37.04,4897.649902,2038.20,433.438,3.117175e+07,6.461269e+09,13691,0.254835,0.000000,0
2016-01-03,1060.5,37.04,4897.649902,2038.20,430.011,3.715623e+07,6.494453e+09,13167,0.248901,0.000000,1
2016-01-04,1075.5,36.76,4897.649902,2038.20,433.091,3.669657e+07,6.504166e+09,15821,0.181530,0.000000,0
2016-01-05,1078.7,35.97,4917.839844,2013.78,431.960,3.357784e+07,6.471942e+09,16689,0.182860,0.153550,0
...,...,...,...,...,...,...,...,...,...,...,...
2022-12-27,1797.7,75.89,10462.190430,3843.34,16919.290,1.574858e+10,3.217172e+11,39816,0.097602,-0.054126,0
2022-12-28,1797.7,75.89,10339.200195,3829.56,16716.400,1.700571e+10,3.185671e+11,42534,0.084050,-0.066367,0
2022-12-29,1797.7,75.89,10321.459961,3805.45,16552.320,1.447224e+10,3.203093e+11,41316,0.099584,-0.052279,1
2022-12-30,1797.7,75.89,10368.370117,3829.06,16641.330,1.592916e+10,3.195595e+11,40485,0.095240,-0.024360,0


In [37]:
Base_ETH = pd.read_excel(r"C:\Users\sdas1\Desktop\TCD\Dissertation\Altcoin\Altcoin-Price-Prediction\New_Experiments\Base_Datasets\Alt_Coin_Yearly_Rank.xlsx", sheet_name = 'Ethereum_Daily')

In [38]:
Base_ETH

Unnamed: 0,Date,Open*,High,Low,Close**,Volume,Market Cap
0,"Dec 31, 2022","$1,199.36","$1,205.09","$1,194.20","$1,196.77","$3,018,513,333","$146,453,523,490"
1,"Dec 30, 2022","$1,201.57","$1,202.03","$1,187.46","$1,199.23","$4,055,668,253","$146,754,748,633"
2,"Dec 29, 2022","$1,190.01","$1,204.14","$1,188.36","$1,201.60","$4,132,233,940","$147,043,867,255"
3,"Dec 28, 2022","$1,212.74","$1,213.13","$1,185.70","$1,189.99","$4,991,669,631","$145,623,194,889"
4,"Dec 27, 2022","$1,226.99","$1,230.42","$1,205.90","$1,212.79","$4,091,530,737","$148,414,001,604"
...,...,...,...,...,...,...,...
2551,"Jan 05, 2016",$0.9531,$0.9706,$0.9465,$0.9502,"$219,833","$72,240,974"
2552,"Jan 04, 2016",$0.972,$0.9764,$0.9298,$0.9545,"$346,245","$72,543,707"
2553,"Jan 03, 2016",$0.9384,$0.9914,$0.9343,$0.9719,"$407,632","$73,843,292"
2554,"Jan 02, 2016",$0.9474,$0.9696,$0.9366,$0.9371,"$255,504","$71,176,658"


In [17]:
print(Base_ETH.isnull().sum())

Date          0
Open*         0
High          0
Low           0
Close**       0
Volume        0
Market Cap    0
dtype: int64


In [61]:
Base_ETH['Date'] = pd.to_datetime(Base_ETH['Date'])
Base_ETH.set_index('Date', inplace=True)

In [40]:
# Convert columns to float, removing any non-numeric characters like commas
Base_ETH['Open*'] = pd.to_numeric(Base_ETH['Open*'].replace('[^\d.]', '', regex=True), errors='coerce')
Base_ETH['High'] = pd.to_numeric(Base_ETH['High'].replace('[^\d.]', '', regex=True), errors='coerce')
Base_ETH['Low'] = pd.to_numeric(Base_ETH['Low'].replace('[^\d.]', '', regex=True), errors='coerce')
Base_ETH['Close**'] = pd.to_numeric(Base_ETH['Close**'].replace('[^\d.]', '', regex=True), errors='coerce')
Base_ETH['Volume'] = pd.to_numeric(Base_ETH['Volume'].replace('[^\d.]', '', regex=True), errors='coerce')
Base_ETH['Market Cap'] = pd.to_numeric(Base_ETH['Market Cap'].replace('[^\d.]', '', regex=True), errors='coerce')


In [16]:
from ta import add_all_ta_features
from ta.utils import dropna

# Clean NaN values
Base_ETH = dropna(Base_ETH)

# Add all technical indicators
Base_ETH = add_all_ta_features(
    Base_ETH, open="Open*", high="High", low="Low", close="Close**", volume="Volume", fillna=True
)

In [41]:
from ta.momentum import RSIIndicator
from ta.trend import MACD
from ta.volatility import BollingerBands

# RSI
Base_ETH['rsi'] = RSIIndicator(close=Base_ETH['Close**'], window=14, fillna=True).rsi()

# MACD
macd = MACD(close=Base_ETH['Close**'], window_slow=26, window_fast=12, window_sign=9, fillna=True)
Base_ETH['macd'] = macd.macd()
Base_ETH['macd_signal'] = macd.macd_signal()
Base_ETH['macd_diff'] = macd.macd_diff()

# Bollinger Bands
indicator_bb = BollingerBands(close=Base_ETH['Close**'], window=20, window_dev=2, fillna=True)
Base_ETH['bb_bbm'] = indicator_bb.bollinger_mavg()
Base_ETH['bb_bbh'] = indicator_bb.bollinger_hband()
Base_ETH['bb_bbl'] = indicator_bb.bollinger_lband()


In [42]:
Base_ETH

Unnamed: 0_level_0,Open*,High,Low,Close**,Volume,Market Cap,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-12-31,1199.3600,1205.0900,1194.2000,1196.7700,3018513333,146453523490,100.000000,0.000000,0.000000,0.000000,1196.770000,1196.770000,1196.770000
2022-12-30,1201.5700,1202.0300,1187.4600,1199.2300,4055668253,146754748633,100.000000,0.196239,0.039248,0.156991,1198.000000,1200.460000,1195.540000
2022-12-29,1190.0100,1204.1400,1188.3600,1201.6000,4132233940,147043867255,100.000000,0.536812,0.138761,0.398051,1199.200000,1203.143907,1195.256093
2022-12-28,1212.7400,1213.1300,1185.7000,1189.9900,4991669631,145623194889,27.127046,-0.128630,0.085283,-0.213912,1196.897500,1205.574128,1188.220872
2022-12-27,1226.9900,1230.4200,1205.9000,1212.7900,4091530737,148414001604,71.323196,1.170282,0.302283,0.868000,1200.076000,1214.971398,1185.180602
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-05,0.9531,0.9706,0.9465,0.9502,219833,72240974,23.222015,-0.573078,-0.675665,0.102587,1.279295,1.930723,0.627867
2016-01-04,0.9720,0.9764,0.9298,0.9545,346245,72543707,23.443580,-0.543918,-0.649316,0.105398,1.220020,1.752223,0.687817
2016-01-03,0.9384,0.9914,0.9343,0.9719,407632,73843292,24.394372,-0.513486,-0.622150,0.108664,1.170115,1.586163,0.754067
2016-01-02,0.9474,0.9696,0.9366,0.9371,255504,71176658,23.758831,-0.486567,-0.595033,0.108466,1.141970,1.540746,0.743194


In [43]:
Base_ETH[["Gold_Price","Oil_Price","Nasdaq_Open","S&P500_Open","BTC_Open","BTC_Volume","BTC_Market_Cap","Tweets_Count","Tweets_Vader_Sentiment","News_Vader_Sentiment"]]=ETH[["Gold_Price","Oil_Price","Nasdaq_Open","S&P500_Open","BTC_Open","BTC_Volume","BTC_Market_Cap","Tweets_Count","Tweets_Vader_Sentiment","News_Vader_Sentiment"]]
Base_ETH

Unnamed: 0_level_0,Open*,High,Low,Close**,Volume,Market Cap,rsi,macd,macd_signal,macd_diff,...,Gold_Price,Oil_Price,Nasdaq_Open,S&P500_Open,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-31,1199.3600,1205.0900,1194.2000,1196.7700,3018513333,146453523490,100.000000,0.000000,0.000000,0.000000,...,1797.7,75.89,10368.370117,3829.06,16603.670,1.123919e+10,3.185163e+11,35625,0.119512,-0.068864
2022-12-30,1201.5700,1202.0300,1187.4600,1199.2300,4055668253,146754748633,100.000000,0.196239,0.039248,0.156991,...,1797.7,75.89,10368.370117,3829.06,16641.330,1.592916e+10,3.195595e+11,40485,0.095240,-0.024360
2022-12-29,1190.0100,1204.1400,1188.3600,1201.6000,4132233940,147043867255,100.000000,0.536812,0.138761,0.398051,...,1797.7,75.89,10321.459961,3805.45,16552.320,1.447224e+10,3.203093e+11,41316,0.099584,-0.052279
2022-12-28,1212.7400,1213.1300,1185.7000,1189.9900,4991669631,145623194889,27.127046,-0.128630,0.085283,-0.213912,...,1797.7,75.89,10339.200195,3829.56,16716.400,1.700571e+10,3.185671e+11,42534,0.084050,-0.066367
2022-12-27,1226.9900,1230.4200,1205.9000,1212.7900,4091530737,148414001604,71.323196,1.170282,0.302283,0.868000,...,1797.7,75.89,10462.190430,3843.34,16919.290,1.574858e+10,3.217172e+11,39816,0.097602,-0.054126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-05,0.9531,0.9706,0.9465,0.9502,219833,72240974,23.222015,-0.573078,-0.675665,0.102587,...,1078.7,35.97,4917.839844,2013.78,431.960,3.357784e+07,6.471942e+09,16689,0.182860,0.153550
2016-01-04,0.9720,0.9764,0.9298,0.9545,346245,72543707,23.443580,-0.543918,-0.649316,0.105398,...,1075.5,36.76,4897.649902,2038.20,433.091,3.669657e+07,6.504166e+09,15821,0.181530,0.000000
2016-01-03,0.9384,0.9914,0.9343,0.9719,407632,73843292,24.394372,-0.513486,-0.622150,0.108664,...,1060.5,37.04,4897.649902,2038.20,430.011,3.715623e+07,6.494453e+09,13167,0.248901,0.000000
2016-01-02,0.9474,0.9696,0.9366,0.9371,255504,71176658,23.758831,-0.486567,-0.595033,0.108466,...,1060.5,37.04,4897.649902,2038.20,433.438,3.117175e+07,6.461269e+09,13691,0.254835,0.000000


In [45]:
# Base_ETH["Price_Direction"]=Base_ETH["Close**"]-Base_ETH["Open*"]
# Binary_direction=[]
# for price in Base_ETH["Price_Direction"]:
#     if(price<0) :
#         Binary_direction.append(0)
#     else :
#         Binary_direction.append(1) 
# Base_ETH["Price_Direction"]=Binary_direction

Base_ETH['Future_Close'] = Base_ETH['Close**'].shift(-1)  # Shift closing price to the previous day for comparison
Base_ETH['Price_Direction'] = (Base_ETH['Future_Close'] > Base_ETH['Close**']).astype(int)  # 1 if price increases, else 0
Base_ETH.dropna(inplace=True)  # Drop the last row where Future_Close is NaN

In [46]:
Base_ETH

Unnamed: 0_level_0,Open*,High,Low,Close**,Volume,Market Cap,rsi,macd,macd_signal,macd_diff,...,Nasdaq_Open,S&P500_Open,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment,Future_Close,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-31,1199.3600,1205.0900,1194.2000,1196.7700,3018513333,146453523490,100.000000,0.000000,0.000000,0.000000,...,10368.370117,3829.06,16603.670,1.123919e+10,3.185163e+11,35625,0.119512,-0.068864,1199.2300,1
2022-12-30,1201.5700,1202.0300,1187.4600,1199.2300,4055668253,146754748633,100.000000,0.196239,0.039248,0.156991,...,10368.370117,3829.06,16641.330,1.592916e+10,3.195595e+11,40485,0.095240,-0.024360,1201.6000,1
2022-12-29,1190.0100,1204.1400,1188.3600,1201.6000,4132233940,147043867255,100.000000,0.536812,0.138761,0.398051,...,10321.459961,3805.45,16552.320,1.447224e+10,3.203093e+11,41316,0.099584,-0.052279,1189.9900,0
2022-12-28,1212.7400,1213.1300,1185.7000,1189.9900,4991669631,145623194889,27.127046,-0.128630,0.085283,-0.213912,...,10339.200195,3829.56,16716.400,1.700571e+10,3.185671e+11,42534,0.084050,-0.066367,1212.7900,1
2022-12-27,1226.9900,1230.4200,1205.9000,1212.7900,4091530737,148414001604,71.323196,1.170282,0.302283,0.868000,...,10462.190430,3843.34,16919.290,1.574858e+10,3.217172e+11,39816,0.097602,-0.054126,1226.9700,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-06,0.9500,0.9607,0.9357,0.9509,308791,72317149,23.232179,-0.601523,-0.701312,0.099789,...,4813.759766,2011.71,429.105,6.315093e+07,6.756367e+09,15981,0.212686,0.000000,0.9502,0
2016-01-05,0.9531,0.9706,0.9465,0.9502,219833,72240974,23.222015,-0.573078,-0.675665,0.102587,...,4917.839844,2013.78,431.960,3.357784e+07,6.471942e+09,16689,0.182860,0.153550,0.9545,1
2016-01-04,0.9720,0.9764,0.9298,0.9545,346245,72543707,23.443580,-0.543918,-0.649316,0.105398,...,4897.649902,2038.20,433.091,3.669657e+07,6.504166e+09,15821,0.181530,0.000000,0.9719,1
2016-01-03,0.9384,0.9914,0.9343,0.9719,407632,73843292,24.394372,-0.513486,-0.622150,0.108664,...,4897.649902,2038.20,430.011,3.715623e+07,6.494453e+09,13167,0.248901,0.000000,0.9371,0


In [47]:
Base_ETH.drop(["Open*","High","Low","Close**","Future_Close"],inplace=True,axis=1)

In [48]:
Base_ETH

Unnamed: 0_level_0,Volume,Market Cap,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl,Gold_Price,Oil_Price,Nasdaq_Open,S&P500_Open,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-12-31,3018513333,146453523490,100.000000,0.000000,0.000000,0.000000,1196.770000,1196.770000,1196.770000,1797.7,75.89,10368.370117,3829.06,16603.670,1.123919e+10,3.185163e+11,35625,0.119512,-0.068864,1
2022-12-30,4055668253,146754748633,100.000000,0.196239,0.039248,0.156991,1198.000000,1200.460000,1195.540000,1797.7,75.89,10368.370117,3829.06,16641.330,1.592916e+10,3.195595e+11,40485,0.095240,-0.024360,1
2022-12-29,4132233940,147043867255,100.000000,0.536812,0.138761,0.398051,1199.200000,1203.143907,1195.256093,1797.7,75.89,10321.459961,3805.45,16552.320,1.447224e+10,3.203093e+11,41316,0.099584,-0.052279,0
2022-12-28,4991669631,145623194889,27.127046,-0.128630,0.085283,-0.213912,1196.897500,1205.574128,1188.220872,1797.7,75.89,10339.200195,3829.56,16716.400,1.700571e+10,3.185671e+11,42534,0.084050,-0.066367,1
2022-12-27,4091530737,148414001604,71.323196,1.170282,0.302283,0.868000,1200.076000,1214.971398,1185.180602,1797.7,75.89,10462.190430,3843.34,16919.290,1.574858e+10,3.217172e+11,39816,0.097602,-0.054126,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-06,308791,72317149,23.232179,-0.601523,-0.701312,0.099789,1.356785,2.179404,0.534166,1092.2,33.97,4813.759766,2011.71,429.105,6.315093e+07,6.756367e+09,15981,0.212686,0.000000,0
2016-01-05,219833,72240974,23.222015,-0.573078,-0.675665,0.102587,1.279295,1.930723,0.627867,1078.7,35.97,4917.839844,2013.78,431.960,3.357784e+07,6.471942e+09,16689,0.182860,0.153550,1
2016-01-04,346245,72543707,23.443580,-0.543918,-0.649316,0.105398,1.220020,1.752223,0.687817,1075.5,36.76,4897.649902,2038.20,433.091,3.669657e+07,6.504166e+09,15821,0.181530,0.000000,1
2016-01-03,407632,73843292,24.394372,-0.513486,-0.622150,0.108664,1.170115,1.586163,0.754067,1060.5,37.04,4897.649902,2038.20,430.011,3.715623e+07,6.494453e+09,13167,0.248901,0.000000,0


In [60]:
Base_ETH.to_csv("Ethereum/RawTechnical_plus_Old.csv")

In [58]:
Base_ETH=pd.read_csv("Ethereum/RawTechnical_plus_Old.csv")

# All Technical indicators no parameters

In [5]:
import numpy as np
from pycaret.classification import *

# Initialize setup
clf = setup(data=Base_ETH, target='Price_Direction', session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Price_Direction
2,Target type,Binary
3,Original data shape,"(2556, 107)"
4,Transformed data shape,"(2556, 107)"
5,Transformed train set shape,"(1789, 107)"
6,Transformed test set shape,"(767, 107)"
7,Numeric features,105
8,Categorical features,1
9,Preprocess,True


In [6]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.7535,0.8306,0.8179,0.7294,0.7708,0.5061,0.5105,0.035
et,Extra Trees Classifier,0.6031,0.6879,0.425,0.6967,0.5114,0.21,0.2356,0.06
lr,Logistic Regression,0.5472,0.5717,0.7074,0.5404,0.6121,0.0906,0.0962,0.436
knn,K Neighbors Classifier,0.5266,0.5446,0.5441,0.5317,0.5373,0.0525,0.0526,0.277
svm,SVM - Linear Kernel,0.5109,0.5009,0.3439,0.5039,0.3271,0.0273,0.0301,0.026
rf,Random Forest Classifier,0.5075,0.7781,1.0,0.507,0.6729,0.0023,0.0152,0.119
dt,Decision Tree Classifier,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.023
ada,Ada Boost Classifier,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.029
gbc,Gradient Boosting Classifier,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.259
lda,Linear Discriminant Analysis,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.028


In [7]:
from pycaret.classification import tune_model

# Tune the best model
tuned_model = tune_model(best_model)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7765,0.8337,0.8352,0.7525,0.7917,0.5521,0.5556
1,0.7263,0.8322,0.8242,0.6944,0.7538,0.4506,0.459
2,0.7821,0.833,0.8901,0.7364,0.806,0.5626,0.5758
3,0.7374,0.8268,0.8352,0.7037,0.7638,0.473,0.4819
4,0.7598,0.8345,0.8571,0.7222,0.7839,0.5179,0.5276
5,0.6983,0.7699,0.7912,0.6729,0.7273,0.3947,0.4012
6,0.7263,0.7979,0.8667,0.6783,0.761,0.4516,0.4704
7,0.7542,0.8697,0.8222,0.7255,0.7708,0.508,0.5126
8,0.7709,0.8818,0.8556,0.7333,0.7897,0.5415,0.5492
9,0.7528,0.8484,0.8556,0.713,0.7778,0.5044,0.5151


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [8]:
from pycaret.classification import finalize_model, save_model

# Finalize the model
final_model = finalize_model(tuned_model)

# Save the model to disk
save_model(final_model, 'ETH_raw_prediction_model')


Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['Volume', 'Market Cap',
                                              'volume_adi', 'volume_obv',
                                              'volume_cmf', 'volume_fi',
                                              'volume_em', 'volume_sma_em',
                                              'volume_vpt', 'volume_vwap',
                                              'volume_mfi', 'volume_nvi',
                                              'volatility_bbm', 'volatility_bbh',
                                              'volatility_bbl', 'volatility_bbw',
                                              'volatility_bbp',
                                              'vola...
                                                               return_df=True,
                                                               smo

# All Technical indicators with parameters

In [7]:
import numpy as np
from pycaret.classification import *

# Initialize setup
exp_clf = setup(data=Base_ETH, target='Price_Direction', session_id=123, normalize=True, transformation=True)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Price_Direction
2,Target type,Binary
3,Original data shape,"(2556, 107)"
4,Transformed data shape,"(2556, 107)"
5,Transformed train set shape,"(1789, 107)"
6,Transformed test set shape,"(767, 107)"
7,Numeric features,105
8,Categorical features,1
9,Preprocess,True


In [8]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.7105,0.7919,0.7848,0.688,0.733,0.4197,0.4245,0.568
et,Extra Trees Classifier,0.6026,0.6975,0.3664,0.7123,0.4742,0.2099,0.2417,0.099
svm,SVM - Linear Kernel,0.5718,0.6135,0.6785,0.5732,0.6064,0.1407,0.1516,0.072
knn,K Neighbors Classifier,0.5618,0.5888,0.6103,0.5641,0.5854,0.1222,0.1226,0.369
rf,Random Forest Classifier,0.5075,0.7781,1.0,0.507,0.6729,0.0023,0.0152,0.149
ridge,Ridge Classifier,0.507,0.8327,1.0,0.5067,0.6726,0.0012,0.0076,0.069
nb,Naive Bayes,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.069
dt,Decision Tree Classifier,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.069
ada,Ada Boost Classifier,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.075
gbc,Gradient Boosting Classifier,0.5064,0.5,1.0,0.5064,0.6724,0.0,0.0,0.286


In [9]:
from pycaret.classification import tune_model

# Tune the best model
tuned_model = tune_model(best_model)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7263,0.7851,0.7363,0.7283,0.7322,0.4523,0.4523
1,0.7318,0.8022,0.7692,0.7216,0.7447,0.4629,0.464
2,0.7039,0.7687,0.8022,0.6759,0.7337,0.4058,0.4133
3,0.7318,0.8081,0.7802,0.7172,0.7474,0.4627,0.4646
4,0.6983,0.7724,0.7582,0.6832,0.7188,0.3953,0.3978
5,0.6983,0.726,0.7582,0.6832,0.7188,0.3953,0.3978
6,0.6592,0.7199,0.7111,0.6465,0.6772,0.318,0.3197
7,0.743,0.8265,0.8111,0.7157,0.7604,0.4856,0.4901
8,0.7542,0.8386,0.8,0.7347,0.766,0.5081,0.5102
9,0.7135,0.787,0.7889,0.6893,0.7358,0.426,0.4306


Fitting 10 folds for each of 10 candidates, totalling 100 fits


# Specific Technical indicators no parameters different price parameter


In [50]:
Base_ETH

Unnamed: 0_level_0,Volume,Market Cap,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl,Gold_Price,Oil_Price,Nasdaq_Open,S&P500_Open,BTC_Open,BTC_Volume,BTC_Market_Cap,Tweets_Count,Tweets_Vader_Sentiment,News_Vader_Sentiment,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-12-31,3018513333,146453523490,100.000000,0.000000,0.000000,0.000000,1196.770000,1196.770000,1196.770000,1797.7,75.89,10368.370117,3829.06,16603.670,1.123919e+10,3.185163e+11,35625,0.119512,-0.068864,1
2022-12-30,4055668253,146754748633,100.000000,0.196239,0.039248,0.156991,1198.000000,1200.460000,1195.540000,1797.7,75.89,10368.370117,3829.06,16641.330,1.592916e+10,3.195595e+11,40485,0.095240,-0.024360,1
2022-12-29,4132233940,147043867255,100.000000,0.536812,0.138761,0.398051,1199.200000,1203.143907,1195.256093,1797.7,75.89,10321.459961,3805.45,16552.320,1.447224e+10,3.203093e+11,41316,0.099584,-0.052279,0
2022-12-28,4991669631,145623194889,27.127046,-0.128630,0.085283,-0.213912,1196.897500,1205.574128,1188.220872,1797.7,75.89,10339.200195,3829.56,16716.400,1.700571e+10,3.185671e+11,42534,0.084050,-0.066367,1
2022-12-27,4091530737,148414001604,71.323196,1.170282,0.302283,0.868000,1200.076000,1214.971398,1185.180602,1797.7,75.89,10462.190430,3843.34,16919.290,1.574858e+10,3.217172e+11,39816,0.097602,-0.054126,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-06,308791,72317149,23.232179,-0.601523,-0.701312,0.099789,1.356785,2.179404,0.534166,1092.2,33.97,4813.759766,2011.71,429.105,6.315093e+07,6.756367e+09,15981,0.212686,0.000000,0
2016-01-05,219833,72240974,23.222015,-0.573078,-0.675665,0.102587,1.279295,1.930723,0.627867,1078.7,35.97,4917.839844,2013.78,431.960,3.357784e+07,6.471942e+09,16689,0.182860,0.153550,1
2016-01-04,346245,72543707,23.443580,-0.543918,-0.649316,0.105398,1.220020,1.752223,0.687817,1075.5,36.76,4897.649902,2038.20,433.091,3.669657e+07,6.504166e+09,15821,0.181530,0.000000,1
2016-01-03,407632,73843292,24.394372,-0.513486,-0.622150,0.108664,1.170115,1.586163,0.754067,1060.5,37.04,4897.649902,2038.20,430.011,3.715623e+07,6.494453e+09,13167,0.248901,0.000000,0


In [51]:
import numpy as np
from pycaret.classification import *

# Initialize setup
clf = setup(data=Base_ETH, target='Price_Direction', session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Price_Direction
2,Target type,Binary
3,Original data shape,"(2555, 20)"
4,Transformed data shape,"(2555, 20)"
5,Transformed train set shape,"(1788, 20)"
6,Transformed test set shape,"(767, 20)"
7,Numeric features,19
8,Preprocess,True
9,Imputation type,simple


In [52]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.5297,0.5249,0.5011,0.521,0.5106,0.0583,0.0584,0.052
ridge,Ridge Classifier,0.528,0.5382,0.5171,0.5193,0.5173,0.0556,0.0558,0.009
lightgbm,Light Gradient Boosting Machine,0.5263,0.5253,0.492,0.5173,0.5038,0.0511,0.0513,0.103
lda,Linear Discriminant Analysis,0.5252,0.5372,0.5183,0.516,0.5163,0.0501,0.0503,0.01
rf,Random Forest Classifier,0.5168,0.5137,0.476,0.5086,0.491,0.0318,0.0322,0.113
qda,Quadratic Discriminant Analysis,0.5129,0.5236,0.6772,0.5047,0.5703,0.0324,0.0336,0.009
nb,Naive Bayes,0.5123,0.5332,0.7113,0.5015,0.5877,0.0322,0.0358,0.012
gbc,Gradient Boosting Classifier,0.5118,0.5109,0.4864,0.5037,0.4942,0.0225,0.0227,0.161
dummy,Dummy Classifier,0.5101,0.5,0.0,0.0,0.0,0.0,0.0,0.009
lr,Logistic Regression,0.5045,0.5172,0.0171,0.3762,0.0324,-0.0105,-0.027,0.024


In [36]:
from pycaret.classification import tune_model

# Tune the best model
tuned_model = tune_model(best_model)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5196,0.5239,0.2637,0.5581,0.3582,0.0474,0.056
1,0.4916,0.4956,0.5824,0.5,0.5381,-0.0199,-0.0202
2,0.486,0.49,0.2527,0.4894,0.3333,-0.0198,-0.0227
3,0.5642,0.5684,0.3187,0.6444,0.4265,0.1357,0.1577
4,0.5251,0.5232,0.6374,0.5273,0.5771,0.0466,0.0477
5,0.5698,0.5739,0.3297,0.6522,0.438,0.1466,0.1692
6,0.5084,0.5056,0.5778,0.5098,0.5417,0.016,0.0161
7,0.5419,0.5534,0.3,0.587,0.3971,0.0863,0.099
8,0.5028,0.5044,0.2111,0.5135,0.2992,0.0088,0.0109
9,0.5393,0.5426,0.2556,0.6053,0.3594,0.0845,0.1038


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


# Only Technical indicators 

In [59]:
Base_ETH.drop(["Gold_Price","Oil_Price","Nasdaq_Open","S&P500_Open","BTC_Open","BTC_Volume","BTC_Market_Cap","Tweets_Count","Tweets_Vader_Sentiment","News_Vader_Sentiment"],inplace=True,axis=1)

In [62]:
Base_ETH

Unnamed: 0_level_0,Volume,Market Cap,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,...,others_dlr,others_cr,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-31,3018513333,146453523490,-1.593797e+09,3018513333,-0.528007,0.000000e+00,0.000000,0.000000,0.000000e+00,1198.686667,...,0.000000,0.000000,100.000000,0.000000,0.000000,0.000000,1196.770000,1196.770000,1196.770000,0
2022-12-30,4055668253,146754748633,9.030692e+08,7074181586,0.127657,9.976944e+09,-1.760326,-1.760326,8.336559e+06,1197.283979,...,0.205342,0.205553,100.000000,0.196239,0.039248,0.156991,1198.000000,1200.460000,1195.540000,0
2022-12-29,4132233940,147043867255,3.705028e+09,11206415526,0.330617,9.950723e+09,0.574723,-0.592802,1.650296e+07,1197.560295,...,0.197432,0.403586,100.000000,0.536812,0.138761,0.398051,1199.200000,1203.143907,1195.256093,1
2022-12-28,4991669631,145623194889,2.747333e+08,6214745895,0.016961,2.501501e+08,1.739217,0.184538,-3.172714e+07,1197.163699,...,-0.970910,-0.566525,27.127046,-0.128630,0.085283,-0.213912,1196.897500,1205.574128,1188.220872,0
2022-12-27,4091530737,148414001604,-1.517397e+09,10306276632,-0.074787,1.354111e+10,11.233629,2.946811,4.666588e+07,1201.036772,...,1.897859,1.338603,71.323196,1.170282,0.302283,0.868000,1200.076000,1214.971398,1185.180602,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-05,219833,72240974,2.460530e+12,-1372918065031,0.197787,-9.471747e+04,0.113466,-0.276769,8.780225e+07,1.216191,...,-0.073642,-99.920603,23.222015,-0.573078,-0.675665,0.102587,1.279295,1.930723,0.627867,0
2016-01-04,346245,72543707,2.460530e+12,-1372917718786,0.195874,-8.097371e+04,-0.073350,-0.298535,8.780381e+07,1.152925,...,0.451515,-99.920244,23.443580,-0.543918,-0.649316,0.105398,1.220020,1.752223,0.687817,0
2016-01-03,407632,73843292,2.460530e+12,-1372917311154,0.051470,-6.839278e+04,0.136575,-0.171678,8.781125e+07,1.137279,...,1.806528,-99.918790,24.394372,-0.513486,-0.622150,0.108664,1.170115,1.586163,0.754067,1
2016-01-02,255504,71176658,2.460530e+12,-1372917566658,0.070587,-5.989260e+04,-0.125928,-0.168491,8.780210e+07,1.115236,...,-3.646292,-99.921698,23.758831,-0.486567,-0.595033,0.108466,1.141970,1.540746,0.743194,0


In [63]:
import numpy as np
from pycaret.classification import *

# Initialize setup
clf = setup(data=Base_ETH, target='Price_Direction', session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Price_Direction
2,Target type,Binary
3,Original data shape,"(2556, 96)"
4,Transformed data shape,"(2556, 96)"
5,Transformed train set shape,"(1789, 96)"
6,Transformed test set shape,"(767, 96)"
7,Numeric features,95
8,Preprocess,True
9,Imputation type,simple


In [64]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.7563,0.8405,0.7661,0.7564,0.7609,0.5124,0.5129,0.267
lda,Linear Discriminant Analysis,0.7541,0.8363,0.7705,0.7509,0.76,0.5079,0.5089,0.014
ridge,Ridge Classifier,0.7524,0.8316,0.7694,0.749,0.7587,0.5045,0.5052,0.013
gbc,Gradient Boosting Classifier,0.7468,0.8321,0.766,0.7427,0.7536,0.4933,0.4944,0.46
rf,Random Forest Classifier,0.7334,0.8084,0.7275,0.7411,0.7334,0.4669,0.468,0.108
ada,Ada Boost Classifier,0.7295,0.814,0.7418,0.7288,0.7348,0.4587,0.4594,0.113
dt,Decision Tree Classifier,0.6747,0.6748,0.6656,0.6842,0.6738,0.3495,0.3505,0.027
et,Extra Trees Classifier,0.6439,0.7056,0.6612,0.6445,0.6524,0.2875,0.2879,0.056
qda,Quadratic Discriminant Analysis,0.6048,0.6686,0.7639,0.5849,0.6596,0.2064,0.2216,0.017
knn,K Neighbors Classifier,0.5589,0.5638,0.5784,0.5631,0.5695,0.1174,0.118,0.024


In [65]:
from pycaret.classification import tune_model

# Tune the best model
tuned_model = tune_model(best_model)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.743,0.8319,0.7473,0.7473,0.7473,0.4859,0.4859
1,0.7318,0.8272,0.7033,0.7529,0.7273,0.4641,0.4652
2,0.7486,0.8329,0.7473,0.7556,0.7514,0.4972,0.4972
3,0.7933,0.8755,0.7912,0.8,0.7956,0.5866,0.5866
4,0.7821,0.8533,0.7692,0.7955,0.7821,0.5644,0.5647
5,0.7374,0.8091,0.7692,0.7292,0.7487,0.4742,0.475
6,0.7654,0.794,0.7444,0.7791,0.7614,0.5308,0.5314
7,0.8156,0.8865,0.8333,0.8065,0.8197,0.6312,0.6316
8,0.7598,0.8503,0.7778,0.7527,0.765,0.5194,0.5197
9,0.7135,0.8259,0.7556,0.701,0.7273,0.4264,0.4277


Fitting 10 folds for each of 10 candidates, totalling 100 fits


# Percentage difference

In [78]:
Base_ETH

Unnamed: 0_level_0,Volume,Market Cap,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,...,others_dlr,others_cr,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-31,3018513333,146453523490,-1.593797e+09,3018513333,-0.528007,0.000000e+00,0.000000,0.000000,0.000000e+00,1198.686667,...,0.000000,0.000000,100.000000,0.000000,0.000000,0.000000,1196.770000,1196.770000,1196.770000,0
2022-12-30,4055668253,146754748633,9.030692e+08,7074181586,0.127657,9.976944e+09,-1.760326,-1.760326,8.336559e+06,1197.283979,...,0.205342,0.205553,100.000000,0.196239,0.039248,0.156991,1198.000000,1200.460000,1195.540000,0
2022-12-29,4132233940,147043867255,3.705028e+09,11206415526,0.330617,9.950723e+09,0.574723,-0.592802,1.650296e+07,1197.560295,...,0.197432,0.403586,100.000000,0.536812,0.138761,0.398051,1199.200000,1203.143907,1195.256093,1
2022-12-28,4991669631,145623194889,2.747333e+08,6214745895,0.016961,2.501501e+08,1.739217,0.184538,-3.172714e+07,1197.163699,...,-0.970910,-0.566525,27.127046,-0.128630,0.085283,-0.213912,1196.897500,1205.574128,1188.220872,0
2022-12-27,4091530737,148414001604,-1.517397e+09,10306276632,-0.074787,1.354111e+10,11.233629,2.946811,4.666588e+07,1201.036772,...,1.897859,1.338603,71.323196,1.170282,0.302283,0.868000,1200.076000,1214.971398,1185.180602,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-05,219833,72240974,2.460530e+12,-1372918065031,0.197787,-9.471747e+04,0.113466,-0.276769,8.780225e+07,1.216191,...,-0.073642,-99.920603,23.222015,-0.573078,-0.675665,0.102587,1.279295,1.930723,0.627867,0
2016-01-04,346245,72543707,2.460530e+12,-1372917718786,0.195874,-8.097371e+04,-0.073350,-0.298535,8.780381e+07,1.152925,...,0.451515,-99.920244,23.443580,-0.543918,-0.649316,0.105398,1.220020,1.752223,0.687817,0
2016-01-03,407632,73843292,2.460530e+12,-1372917311154,0.051470,-6.839278e+04,0.136575,-0.171678,8.781125e+07,1.137279,...,1.806528,-99.918790,24.394372,-0.513486,-0.622150,0.108664,1.170115,1.586163,0.754067,1
2016-01-02,255504,71176658,2.460530e+12,-1372917566658,0.070587,-5.989260e+04,-0.125928,-0.168491,8.780210e+07,1.115236,...,-3.646292,-99.921698,23.758831,-0.486567,-0.595033,0.108466,1.141970,1.540746,0.743194,0


In [83]:
columns_to_change = Base_ETH.columns.drop(['Price_Direction'])

# Create a new DataFrame for percentage changes
df_pct_change = Base_ETH[columns_to_change].pct_change() * 100

# If you want to add the 'Date' column to this new DataFrame
# df_pct_change['Date'] = Base_ETH['Date']

# Rearrange to move 'Date' to the first column if needed
# df_pct_change = df_pct_change[['Date'] + [col for col in df_pct_change.columns if col != 'Date']]

df_pct_change["Price_Direction"]=Base_ETH["Price_Direction"]

In [84]:
df_pct_change

Unnamed: 0_level_0,Volume,Market Cap,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,...,others_dlr,others_cr,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-31,,,,,,,,,,,...,,,,,,,,,,0
2022-12-30,34.359793,0.205680,-1.566615e+02,134.359793,-124.177138,inf,-inf,-inf,inf,-0.117019,...,inf,inf,0.000000,inf,inf,inf,0.102777,0.308330,-0.102777,0
2022-12-29,1.887869,0.197008,3.102706e+02,58.412890,158.988143,-0.262819,-132.648662,-66.324331,97.958906,0.023079,...,-3.852359,96.341463,0.000000,173.549441,253.549441,153.549441,0.100167,0.223573,-0.023747,1
2022-12-28,20.798331,-0.966155,-9.258485e+01,-44.542964,-94.869934,-97.486111,202.618232,-131.129761,-292.251166,-0.033117,...,-591.769766,-240.372671,-72.872954,-123.961799,-38.539802,-153.739916,-0.192003,0.201989,-0.588595,0
2022-12-27,-18.032822,1.916458,-6.523164e+02,65.835849,-540.938273,5313.195146,545.901666,1496.860582,-247.085061,0.323521,...,-295.472187,-336.283186,162.922822,-1009.807312,254.448316,-505.773845,0.265562,0.779485,-0.255867,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-05,-28.808482,-0.105335,-6.191040e-06,0.000016,-21.790451,-14.265816,-300.212455,-15.789598,-0.000184,-1.678797,...,-107.831186,0.000059,-0.043749,-4.728822,-3.656974,2.804065,-5.711295,-11.410536,17.541697,0
2016-01-04,57.503650,0.419060,8.455260e-07,-0.000025,-0.967157,-14.510270,-164.644920,7.864105,0.001785,-5.201999,...,-713.125690,-0.000360,0.954118,-5.088269,-3.899770,2.739486,-4.633411,-9.245245,9.548202,0
2016-01-03,17.729353,1.791451,5.251484e-06,-0.000030,-73.723187,-15.537055,-286.197372,-42.493008,0.008463,-1.357028,...,300.103157,-0.001455,4.055656,-5.595049,-4.183790,3.099201,-4.090507,-9.477060,9.631800,1
2016-01-02,-37.319936,-3.611207,-1.006943e-05,0.000019,37.143323,-12.428472,-192.203710,-1.856536,-0.010419,-1.938286,...,-301.839815,0.002910,-2.605275,-5.242354,-4.358525,-0.182043,-2.405319,-2.863366,-1.441826,0


In [81]:
# df_pct_change.dropna(inplace=True,axis=1)

In [85]:
df_pct_change

Unnamed: 0_level_0,Volume,Market Cap,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,...,others_dlr,others_cr,rsi,macd,macd_signal,macd_diff,bb_bbm,bb_bbh,bb_bbl,Price_Direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-12-31,,,,,,,,,,,...,,,,,,,,,,0
2022-12-30,34.359793,0.205680,-1.566615e+02,134.359793,-124.177138,inf,-inf,-inf,inf,-0.117019,...,inf,inf,0.000000,inf,inf,inf,0.102777,0.308330,-0.102777,0
2022-12-29,1.887869,0.197008,3.102706e+02,58.412890,158.988143,-0.262819,-132.648662,-66.324331,97.958906,0.023079,...,-3.852359,96.341463,0.000000,173.549441,253.549441,153.549441,0.100167,0.223573,-0.023747,1
2022-12-28,20.798331,-0.966155,-9.258485e+01,-44.542964,-94.869934,-97.486111,202.618232,-131.129761,-292.251166,-0.033117,...,-591.769766,-240.372671,-72.872954,-123.961799,-38.539802,-153.739916,-0.192003,0.201989,-0.588595,0
2022-12-27,-18.032822,1.916458,-6.523164e+02,65.835849,-540.938273,5313.195146,545.901666,1496.860582,-247.085061,0.323521,...,-295.472187,-336.283186,162.922822,-1009.807312,254.448316,-505.773845,0.265562,0.779485,-0.255867,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-05,-28.808482,-0.105335,-6.191040e-06,0.000016,-21.790451,-14.265816,-300.212455,-15.789598,-0.000184,-1.678797,...,-107.831186,0.000059,-0.043749,-4.728822,-3.656974,2.804065,-5.711295,-11.410536,17.541697,0
2016-01-04,57.503650,0.419060,8.455260e-07,-0.000025,-0.967157,-14.510270,-164.644920,7.864105,0.001785,-5.201999,...,-713.125690,-0.000360,0.954118,-5.088269,-3.899770,2.739486,-4.633411,-9.245245,9.548202,0
2016-01-03,17.729353,1.791451,5.251484e-06,-0.000030,-73.723187,-15.537055,-286.197372,-42.493008,0.008463,-1.357028,...,300.103157,-0.001455,4.055656,-5.595049,-4.183790,3.099201,-4.090507,-9.477060,9.631800,1
2016-01-02,-37.319936,-3.611207,-1.006943e-05,0.000019,37.143323,-12.428472,-192.203710,-1.856536,-0.010419,-1.938286,...,-301.839815,0.002910,-2.605275,-5.242354,-4.358525,-0.182043,-2.405319,-2.863366,-1.441826,0


In [86]:
import numpy as np
from pycaret.classification import *

# Initialize setup
clf = setup(data=df_pct_change, target='Price_Direction', session_id=123)

ValueError: Input X contains infinity or a value too large for dtype('float64').