In [175]:
pip install pycoingecko



CoinGeckoAPI updates every 5 minutes

In [176]:
pip install plotly



In [177]:
from pycoingecko import CoinGeckoAPI                    #API where we are deriving the crypto data
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import requests
from statsmodels.tsa.arima.model import ARIMA           #Time-series machine learning model
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression       #Linear reggression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor        #Random Forest and GB Regressor
from sklearn.metrics import accuracy_score

In [178]:
cg = CoinGeckoAPI()

Taking the latest information of bitcoin

In [179]:
# Get information about the top 10 cryptocurrencies
top_10_coins = cg.get_coins_markets(vs_currency='usd', order='market_cap_desc', per_page=10, page=1)

# Print information about the top 10 cryptocurrencies
for index, coin in enumerate(top_10_coins, start=1):
    coin_name = coin['name']
    coin_symbol = coin['symbol']
    coin_market_cap = coin['market_cap']
    coin_price = coin['current_price']

    print(f"{index}. {coin_name} ({coin_symbol}) - Market Cap: ${coin_market_cap:,}, Price: ${coin_price:.2f}")

1. Bitcoin (btc) - Market Cap: $806,866,398,297, Price: $41341.00
2. Ethereum (eth) - Market Cap: $262,813,437,611, Price: $2191.21
3. Tether (usdt) - Market Cap: $90,543,075,585, Price: $1.00
4. BNB (bnb) - Market Cap: $39,251,159,650, Price: $255.72
5. XRP (xrp) - Market Cap: $33,313,305,356, Price: $0.62
6. Solana (sol) - Market Cap: $28,873,408,786, Price: $67.92
7. USDC (usdc) - Market Cap: $24,032,959,977, Price: $1.00
8. Lido Staked Ether (steth) - Market Cap: $20,148,563,689, Price: $2189.72
9. Cardano (ada) - Market Cap: $19,909,338,839, Price: $0.57
10. Avalanche (avax) - Market Cap: $13,677,483,269, Price: $37.62


In [180]:
top_10_coins = cg.get_coins_markets(vs_currency='usd', order='market_cap_desc', per_page=10, page=1)

# Extract the IDs of the top 10 cryptocurrencies
top_10_coin_ids = [coin['id'] for coin in top_10_coins]

# Print the IDs
print("Top 10 Cryptocurrency IDs:")
for coin_id in top_10_coin_ids:
    print(coin_id)

Top 10 Cryptocurrency IDs:
bitcoin
ethereum
tether
binancecoin
ripple
solana
usd-coin
staked-ether
cardano
avalanche-2


In [181]:
btc = cg.get_coin_market_chart_by_id(id='bitcoin', vs_currency='USD', days=90)

In [182]:
bitcoin_data = pd.DataFrame(btc['prices'], columns = ['timestamp', 'price'])

In [183]:
bitcoin_data

Unnamed: 0,timestamp,price
0,1694638801518,26226.862816
1,1694642483410,26231.949378
2,1694646050908,26234.189828
3,1694649714172,26222.013304
4,1694653240839,26488.966008
...,...,...
2156,1702400406130,41301.185434
2157,1702404026095,40720.052616
2158,1702407609812,41251.165163
2159,1702411209758,41083.081025


Converting timestamp to datetime format


In [184]:
bitcoin_data['Date'] = pd.to_datetime(bitcoin_data['timestamp'], unit = 'ms')

In [185]:
bitcoin_data

Unnamed: 0,timestamp,price,Date
0,1694638801518,26226.862816,2023-09-13 21:00:01.518
1,1694642483410,26231.949378,2023-09-13 22:01:23.410
2,1694646050908,26234.189828,2023-09-13 23:00:50.908
3,1694649714172,26222.013304,2023-09-14 00:01:54.172
4,1694653240839,26488.966008,2023-09-14 01:00:40.839
...,...,...,...
2156,1702400406130,41301.185434,2023-12-12 17:00:06.130
2157,1702404026095,40720.052616,2023-12-12 18:00:26.095
2158,1702407609812,41251.165163,2023-12-12 19:00:09.812
2159,1702411209758,41083.081025,2023-12-12 20:00:09.758


Aggregating all the values on the basis of date and creating a new dataframe. We will use this for our prediction and visualization.

In [186]:
candlestick_data = bitcoin_data.groupby(bitcoin_data.Date.dt.date).aggregate({'price': {'max', 'min', 'first', 'last'}})

In [187]:
candlestick_data

Unnamed: 0_level_0,price,price,price,price
Unnamed: 0_level_1,last,first,max,min
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2023-09-13,26234.189828,26226.862816,26234.189828,26226.862816
2023-09-14,26615.532123,26222.013304,26751.796646,26193.031946
2023-09-15,26792.608825,26531.395566,26792.608825,26251.615685
2023-09-16,26566.033844,26610.403126,26704.253882,26471.093720
2023-09-17,26468.617802,26557.768692,26605.367472,26462.583232
...,...,...,...,...
2023-12-08,44252.835095,43270.119560,44385.975286,43174.140967
2023-12-09,43908.530003,44158.481190,44290.263279,43824.945022
2023-12-10,43783.921845,43714.935354,43968.397400,43634.411927
2023-12-11,41259.309734,43757.959938,43757.959938,40753.097997


In [188]:
candlestick_data.index

Index([2023-09-13, 2023-09-14, 2023-09-15, 2023-09-16, 2023-09-17, 2023-09-18,
       2023-09-19, 2023-09-20, 2023-09-21, 2023-09-22, 2023-09-23, 2023-09-24,
       2023-09-25, 2023-09-26, 2023-09-27, 2023-09-28, 2023-09-29, 2023-09-30,
       2023-10-01, 2023-10-02, 2023-10-03, 2023-10-04, 2023-10-05, 2023-10-06,
       2023-10-07, 2023-10-08, 2023-10-09, 2023-10-10, 2023-10-11, 2023-10-12,
       2023-10-13, 2023-10-14, 2023-10-15, 2023-10-16, 2023-10-17, 2023-10-18,
       2023-10-19, 2023-10-20, 2023-10-21, 2023-10-22, 2023-10-23, 2023-10-24,
       2023-10-25, 2023-10-26, 2023-10-27, 2023-10-28, 2023-10-29, 2023-10-30,
       2023-10-31, 2023-11-01, 2023-11-02, 2023-11-03, 2023-11-04, 2023-11-05,
       2023-11-06, 2023-11-07, 2023-11-08, 2023-11-09, 2023-11-10, 2023-11-11,
       2023-11-12, 2023-11-13, 2023-11-14, 2023-11-15, 2023-11-16, 2023-11-17,
       2023-11-18, 2023-11-19, 2023-11-20, 2023-11-21, 2023-11-22, 2023-11-23,
       2023-11-24, 2023-11-25, 2023-11-26, 2023-11-2

In [189]:
fig = go.Figure(data=[go.Candlestick(x = candlestick_data.index,
                                     open = candlestick_data['price']['first'],
                                     low = candlestick_data['price']['min'],
                                     high = candlestick_data['price']['max'],
                                     close = candlestick_data['price']['last'])])

Visualization of Data.


In [190]:
fig.update_layout(title = 'Bitcoin prices over the last 90 days',
                  yaxis_title = 'Price (USD)',
                  xaxis_title = 'Date')

Machine Learning Model


Target : Closing Price

Current Features : Date, Max Price, Min Price, Open Price, Close Price.

To Find sentiment score and add that as a feature for the final machine learning model.


Taking the sentiment data from alternative.me

In [191]:
url = 'https://api.alternative.me/fng/?limit=91&date_format=cn'

r= requests.get(url)

In [192]:
r

<Response [200]>

In [193]:
data = r.json()

In [194]:
temp_df = pd.DataFrame(data['data'])

In [195]:
temp_df

Unnamed: 0,value,value_classification,timestamp,time_until_update
0,67,Greed,2023-12-12,-1702327307
1,74,Greed,2023-12-11,
2,74,Greed,2023-12-10,
3,73,Greed,2023-12-09,
4,72,Greed,2023-12-08,
...,...,...,...,...
86,46,Fear,2023-09-17,
87,43,Fear,2023-09-16,
88,45,Fear,2023-09-15,
89,45,Fear,2023-09-14,


Rescaling sentiment from 0 - 100 to -1 to 1

In [196]:
temp_df['value'] = temp_df['value'].astype(int)

In [197]:
temp_df['timestamp'] = pd.to_datetime(temp_df['timestamp'])

In [198]:
temp_df

Unnamed: 0,value,value_classification,timestamp,time_until_update
0,67,Greed,2023-12-12,-1702327307
1,74,Greed,2023-12-11,
2,74,Greed,2023-12-10,
3,73,Greed,2023-12-09,
4,72,Greed,2023-12-08,
...,...,...,...,...
86,46,Fear,2023-09-17,
87,43,Fear,2023-09-16,
88,45,Fear,2023-09-15,
89,45,Fear,2023-09-14,


In [199]:
temp_df.index = temp_df['timestamp']

In [200]:
temp_df

Unnamed: 0_level_0,value,value_classification,timestamp,time_until_update
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-12-12,67,Greed,2023-12-12,-1702327307
2023-12-11,74,Greed,2023-12-11,
2023-12-10,74,Greed,2023-12-10,
2023-12-09,73,Greed,2023-12-09,
2023-12-08,72,Greed,2023-12-08,
...,...,...,...,...
2023-09-17,46,Fear,2023-09-17,
2023-09-16,43,Fear,2023-09-16,
2023-09-15,45,Fear,2023-09-15,
2023-09-14,45,Fear,2023-09-14,


Merging both dataframes to create our final dataset.

In [201]:
final_dataset = pd.merge(candlestick_data, temp_df, left_index=True, right_index=True, how='left')


merging between different levels is deprecated and will be removed in a future version. (2 levels on the left, 1 on the right)



In [202]:
final_dataset = final_dataset.iloc[:,:5]

In [203]:
final_dataset.columns = ['closing_price', 'max_price', 'min_price', 'opening_price', 'sentiment']

Normalizing the data

In [204]:
scaler = MinMaxScaler()

In [205]:
final_dataset_n = scaler.fit_transform(final_dataset)

In [206]:
final_dataset

Unnamed: 0_level_0,closing_price,max_price,min_price,opening_price,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-09-13,26234.189828,26226.862816,26234.189828,26226.862816,41
2023-09-14,26615.532123,26222.013304,26751.796646,26193.031946,45
2023-09-15,26792.608825,26531.395566,26792.608825,26251.615685,45
2023-09-16,26566.033844,26610.403126,26704.253882,26471.093720,43
2023-09-17,26468.617802,26557.768692,26605.367472,26462.583232,46
...,...,...,...,...,...
2023-12-08,44252.835095,43270.119560,44385.975286,43174.140967,72
2023-12-09,43908.530003,44158.481190,44290.263279,43824.945022,73
2023-12-10,43783.921845,43714.935354,43968.397400,43634.411927,74
2023-12-11,41259.309734,43757.959938,43757.959938,40753.097997,74


Normalized dataset

In [207]:
final_dataset_n

array([[4.55313085e-03, 1.23123344e-03, 0.00000000e+00, 9.44617062e-03,
        0.00000000e+00],
       [2.55497588e-02, 9.61121689e-04, 2.85154768e-02, 7.54191212e-03,
        1.17647059e-01],
       [3.52995654e-02, 1.81933276e-02, 3.07638606e-02, 1.08394503e-02,
        1.17647059e-01],
       [2.28243938e-02, 2.25939501e-02, 2.58962985e-02, 2.31933423e-02,
        5.88235294e-02],
       [1.74606866e-02, 1.96622778e-02, 2.04485473e-02, 2.27143074e-02,
        1.47058824e-01],
       [3.93988634e-02, 1.76136534e-02, 6.39275007e-02, 2.23897793e-02,
        1.47058824e-01],
       [5.77764609e-02, 2.98937163e-02, 5.81339541e-02, 3.51121140e-02,
        1.47058824e-01],
       [5.28081242e-02, 5.61681293e-02, 5.97215482e-02, 4.93939447e-02,
        1.76470588e-01],
       [2.41016853e-02, 5.07465123e-02, 4.85713442e-02, 2.02345319e-02,
        1.76470588e-01],
       [2.37359402e-02, 1.98496909e-02, 2.50194745e-02, 2.67442015e-02,
        5.88235294e-02],
       [2.33472015e-02, 2.1010

ARIMA


In [208]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(final_dataset)

features = scaled_data[:, 1:]
target = scaled_data[:, 0]
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [209]:
train_size = int(len(final_dataset_n) * 0.8)

In [210]:
train, test = final_dataset_n[:train_size], final_dataset_n[train_size:]

In [211]:
target_train = train[:, 0]
target_test = test[:, 0]

In [212]:
model = ARIMA(target_train, order=(5,1,0))

model_fit = model.fit()

In [213]:
forecast = model_fit.forecast(steps=len(test))

In [214]:
rmse = mean_squared_error(target_test, forecast, squared=False)
print(f"RMSE: {rmse}")


RMSE: 0.22001591392789352


Random Forest regressor


In [215]:
random_forest = RandomForestRegressor()

random_forest.fit(X_train, y_train)

In [216]:
rf_predictions = random_forest.predict(X_test)

In [217]:
rmse = mean_squared_error(y_test, rf_predictions, squared=False)
print(rmse)

0.03850837226903814


Gradient Boosting Regressor

In [218]:
gradient_boosting = GradientBoostingRegressor()

gradient_boosting.fit(X_train, y_train)

In [219]:
gb_predictions = gradient_boosting.predict(X_test)

In [220]:
rmse = mean_squared_error(y_test, gb_predictions, squared=False)

print(rmse)

0.036759008929800135


Linear Regression

In [221]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(final_dataset)

features = scaled_data[:, 1:]
target = scaled_data[:, 0]

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

predicted_normalized_price = model.predict(X_test)

predicted_prices = scaler.inverse_transform(
    np.concatenate((predicted_normalized_price.reshape(-1, 1), X_test), axis=1))[:, 0]


In [222]:
rmse = mean_squared_error(y_test, predicted_normalized_price, squared=False)
rmse

0.011079102552223474

In [223]:
features_for_prediction = np.array(features[-1]).reshape(1, -1)
predicted_value_normalized = model.predict(features_for_prediction)


In [224]:
predicted_value_normalized

array([0.84722758])

In [225]:
predicted_prices = scaler.inverse_transform(
    np.concatenate((predicted_value_normalized.reshape(-1, 1), features_for_prediction), axis=1))[:, 0]

print("Predicted closing prices of the next day:", predicted_prices)

Predicted closing prices of the next day: [41538.90485264]


In [226]:
latest_closing_price = final_dataset['closing_price'][-1]

In [227]:
if(latest_closing_price - predicted_prices[0] > 0):
  print('The value is going to decrease')

else:
  print('The value is going to increase')

The value is going to increase


Logistic Regression

In [228]:
features = scaled_data[:-1]  # Exclude the last day for prediction
target = (final_dataset['closing_price'].shift(-1) > final_dataset['closing_price']).iloc[:-1]

In [289]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=65)
model = LogisticRegression()
model.fit(X_train, y_train)


In [290]:
predictions = model.predict(X_test)
predictions = [not c for c in predictions]

accuracy = accuracy_score(y_test, predictions)

print("Accuracy =", accuracy*100)

Accuracy = 77.77777777777779


In [233]:
predictions

[True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False]

In [234]:
prediction_features = scaled_data[-1].reshape(1, -1)
prediction_result = model.predict(prediction_features)
prediction_result

array([False])

In [235]:
latest_closing_price = final_dataset['closing_price'][-1]
latest_closing_price

41286.271108285575

In [236]:
if prediction_result[-1]:
   print('The value is going to decrease')
else:
  print('The value is going to increase')

The value is going to increase


# We decided to go with Logistic Regression, which we thought will be a good fit.