In [59]:
pip install pycoingecko



CoinGeckoAPI updates every 5 minutes

In [60]:
pip install plotly



In [61]:
pip install tweepy



In [62]:
from pycoingecko import CoinGeckoAPI                    #API where we are deriving the crypto data
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import requests
from statsmodels.tsa.arima.model import ARIMA           #Time-series machine learning model
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression       #Linear reggression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor        #Random Forest and GB Regressor

In [63]:
cg = CoinGeckoAPI()

Taking the latest information of bitcoin

In [64]:
btc = cg.get_coin_market_chart_by_id(id='bitcoin', vs_currency='USD', days=90)

In [65]:
bitcoin_data = pd.DataFrame(btc['prices'], columns = ['timestamp', 'price'])

In [66]:
bitcoin_data

Unnamed: 0,timestamp,price
0,1694473314943,25140.138899
1,1694476881529,25133.303107
2,1694480488595,25192.835592
3,1694484085000,25134.535000
4,1694487615337,25195.648766
...,...,...
2156,1702234822111,43864.742797
2157,1702238400812,43798.292055
2158,1702242025798,43967.496154
2159,1702245636260,43812.095603


Converting timestamp to datetime format


In [67]:
bitcoin_data['Date'] = pd.to_datetime(bitcoin_data['timestamp'], unit = 'ms')

In [68]:
bitcoin_data

Unnamed: 0,timestamp,price,Date
0,1694473314943,25140.138899,2023-09-11 23:01:54.943
1,1694476881529,25133.303107,2023-09-12 00:01:21.529
2,1694480488595,25192.835592,2023-09-12 01:01:28.595
3,1694484085000,25134.535000,2023-09-12 02:01:25.000
4,1694487615337,25195.648766,2023-09-12 03:00:15.337
...,...,...,...
2156,1702234822111,43864.742797,2023-12-10 19:00:22.111
2157,1702238400812,43798.292055,2023-12-10 20:00:00.812
2158,1702242025798,43967.496154,2023-12-10 21:00:25.798
2159,1702245636260,43812.095603,2023-12-10 22:00:36.260


Aggregating all the values on the basis of date and creating a new dataframe. We will use this for our prediction and visualization.

In [69]:
candlestick_data = bitcoin_data.groupby(bitcoin_data.Date.dt.date).aggregate({'price': {'max', 'min', 'first', 'last'}})

In [70]:
candlestick_data

Unnamed: 0_level_0,price,price,price,price
Unnamed: 0_level_1,last,first,max,min
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2023-09-11,25140.138899,25140.138899,25140.138899,25140.138899
2023-09-12,25979.215385,25133.303107,26288.334884,25133.303107
2023-09-13,26234.189828,25850.322300,26304.015465,25843.107805
2023-09-14,26615.532123,26222.013304,26751.796646,26193.031946
2023-09-15,26792.608825,26531.395566,26792.608825,26251.615685
...,...,...,...,...
2023-12-06,43634.904484,44105.944773,44123.756422,43613.258584
2023-12-07,43166.048404,43788.288808,43985.767023,43158.794425
2023-12-08,44252.835095,43270.119560,44385.975286,43174.140967
2023-12-09,43908.530003,44158.481190,44290.263279,43824.945022


In [71]:
candlestick_data.index

Index([2023-09-11, 2023-09-12, 2023-09-13, 2023-09-14, 2023-09-15, 2023-09-16,
       2023-09-17, 2023-09-18, 2023-09-19, 2023-09-20, 2023-09-21, 2023-09-22,
       2023-09-23, 2023-09-24, 2023-09-25, 2023-09-26, 2023-09-27, 2023-09-28,
       2023-09-29, 2023-09-30, 2023-10-01, 2023-10-02, 2023-10-03, 2023-10-04,
       2023-10-05, 2023-10-06, 2023-10-07, 2023-10-08, 2023-10-09, 2023-10-10,
       2023-10-11, 2023-10-12, 2023-10-13, 2023-10-14, 2023-10-15, 2023-10-16,
       2023-10-17, 2023-10-18, 2023-10-19, 2023-10-20, 2023-10-21, 2023-10-22,
       2023-10-23, 2023-10-24, 2023-10-25, 2023-10-26, 2023-10-27, 2023-10-28,
       2023-10-29, 2023-10-30, 2023-10-31, 2023-11-01, 2023-11-02, 2023-11-03,
       2023-11-04, 2023-11-05, 2023-11-06, 2023-11-07, 2023-11-08, 2023-11-09,
       2023-11-10, 2023-11-11, 2023-11-12, 2023-11-13, 2023-11-14, 2023-11-15,
       2023-11-16, 2023-11-17, 2023-11-18, 2023-11-19, 2023-11-20, 2023-11-21,
       2023-11-22, 2023-11-23, 2023-11-24, 2023-11-2

In [72]:
fig = go.Figure(data=[go.Candlestick(x = candlestick_data.index,
                                     open = candlestick_data['price']['first'],
                                     low = candlestick_data['price']['min'],
                                     high = candlestick_data['price']['max'],
                                     close = candlestick_data['price']['last'])])

Visualization of Data.


In [73]:
fig.update_layout(title = 'Bitcoin prices over the last 90 days',
                  yaxis_title = 'Price (USD)',
                  xaxis_title = 'Date')

Machine Learning Model


Target : Closing Price

Current Features : Date, Max Price, Min Price, Open Price, Close Price.

To Find sentiment score and add that as a feature for the final machine learning model.


Taking the sentiment data from alternative.me

In [74]:
url = 'https://api.alternative.me/fng/?limit=91&date_format=cn'

r= requests.get(url)

In [75]:
r

<Response [200]>

In [76]:
data = r.json()

In [77]:
temp_df = pd.DataFrame(data['data'])

In [78]:
temp_df

Unnamed: 0,value,value_classification,timestamp,time_until_update
0,74,Greed,2023-12-10,-1702160477
1,73,Greed,2023-12-09,
2,72,Greed,2023-12-08,
3,72,Greed,2023-12-07,
4,72,Greed,2023-12-06,
...,...,...,...,...
86,45,Fear,2023-09-15,
87,45,Fear,2023-09-14,
88,41,Fear,2023-09-13,
89,30,Fear,2023-09-12,


Rescaling sentiment from 0 - 100 to -1 to 1

In [79]:
temp_df['value'] = temp_df['value'].astype(int)

In [80]:
temp_df['timestamp'] = pd.to_datetime(temp_df['timestamp'])

In [81]:
temp_df

Unnamed: 0,value,value_classification,timestamp,time_until_update
0,74,Greed,2023-12-10,-1702160477
1,73,Greed,2023-12-09,
2,72,Greed,2023-12-08,
3,72,Greed,2023-12-07,
4,72,Greed,2023-12-06,
...,...,...,...,...
86,45,Fear,2023-09-15,
87,45,Fear,2023-09-14,
88,41,Fear,2023-09-13,
89,30,Fear,2023-09-12,


In [82]:
temp_df.index = temp_df['timestamp']

In [83]:
temp_df

Unnamed: 0_level_0,value,value_classification,timestamp,time_until_update
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-12-10,74,Greed,2023-12-10,-1702160477
2023-12-09,73,Greed,2023-12-09,
2023-12-08,72,Greed,2023-12-08,
2023-12-07,72,Greed,2023-12-07,
2023-12-06,72,Greed,2023-12-06,
...,...,...,...,...
2023-09-15,45,Fear,2023-09-15,
2023-09-14,45,Fear,2023-09-14,
2023-09-13,41,Fear,2023-09-13,
2023-09-12,30,Fear,2023-09-12,


Merging both dataframes to create our final dataset.

In [84]:
final_dataset = pd.merge(candlestick_data, temp_df, left_index=True, right_index=True, how='left')


merging between different levels is deprecated and will be removed in a future version. (2 levels on the left, 1 on the right)



In [85]:
final_dataset = final_dataset.iloc[:,:5]

In [86]:
final_dataset.columns = ['closing_price', 'max_price', 'min_price', 'opening_price', 'sentiment']

Normalizing the data

In [87]:
scaler = MinMaxScaler()

In [88]:
final_dataset_n = scaler.fit_transform(final_dataset)

In [89]:
final_dataset

Unnamed: 0_level_0,closing_price,max_price,min_price,opening_price,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-09-11,25140.138899,25140.138899,25140.138899,25140.138899,40
2023-09-12,25979.215385,25133.303107,26288.334884,25133.303107,30
2023-09-13,26234.189828,25850.322300,26304.015465,25843.107805,41
2023-09-14,26615.532123,26222.013304,26751.796646,26193.031946,45
2023-09-15,26792.608825,26531.395566,26792.608825,26251.615685,45
...,...,...,...,...,...
2023-12-06,43634.904484,44105.944773,44123.756422,43613.258584,72
2023-12-07,43166.048404,43788.288808,43985.767023,43158.794425,72
2023-12-08,44252.835095,43270.119560,44385.975286,43174.140967,72
2023-12-09,43908.530003,44158.481190,44290.263279,43824.945022,73


Normalized dataset

In [90]:
final_dataset_n

array([[0.00000000e+00, 3.59302410e-04, 0.00000000e+00, 3.65713850e-04,
        2.22222222e-01],
       [4.37624614e-02, 0.00000000e+00, 5.96594485e-02, 0.00000000e+00,
        0.00000000e+00],
       [5.70607833e-02, 3.76879097e-02, 6.04742004e-02, 3.79744434e-02,
        2.44444444e-01],
       [7.69498849e-02, 5.72247047e-02, 8.37405927e-02, 5.66953318e-02,
        3.33333333e-01],
       [8.61854104e-02, 7.34864322e-02, 8.58611646e-02, 5.98295529e-02,
        3.33333333e-01],
       [7.43682770e-02, 7.76392217e-02, 8.12703045e-02, 7.15715944e-02,
        2.88888889e-01],
       [6.92874936e-02, 7.48726545e-02, 7.61322368e-02, 7.11162846e-02,
        3.55555556e-01],
       [9.00684794e-02, 7.29394039e-02, 1.17139581e-01, 7.08078294e-02,
        3.55555556e-01],
       [1.07476699e-01, 8.45278818e-02, 1.11675375e-01, 8.29000657e-02,
        3.55555556e-01],
       [1.02770431e-01, 1.09322580e-01, 1.13172720e-01, 9.64745609e-02,
        3.77777778e-01],
       [7.55781941e-02, 1.0420

ARIMA is a time series forcasting model which is perfect for our needs here


In [91]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [92]:
train_size = int(len(final_dataset_n) * 0.8)

In [93]:
train, test = final_dataset_n[:train_size], final_dataset_n[train_size:]

In [94]:
target_train = train[:, 0]
target_test = test[:, 0]

In [95]:
model = ARIMA(target_train, order=(5,1,0))

model_fit = model.fit()

In [96]:
forecast = model_fit.forecast(steps=len(test))

All values are unrealistically low

In [97]:
print(forecast)

[0.60153091 0.60169284 0.59644613 0.60220903 0.59525497 0.59635385
 0.59687661 0.59509942 0.59619036 0.59525211 0.59521292 0.59541809
 0.59502639 0.5951981  0.59508482 0.59503173 0.59508499 0.5950113
 0.59503234]


In [98]:
rmse = mean_squared_error(target_test, forecast, squared=False)
print(f"RMSE: {rmse}")


RMSE: 0.23469232071616947


Random Forest regressor


In [99]:
random_forest = RandomForestRegressor()

random_forest.fit(X_train, y_train)

In [100]:
rf_predictions = random_forest.predict(X_test)

In [101]:
rmse = mean_squared_error(y_test, rf_predictions, squared=False)
print(rmse)

0.02987323008225876


Gradient Boosting Regressor

In [102]:
gradient_boosting = GradientBoostingRegressor()

gradient_boosting.fit(X_train, y_train)

In [103]:
gb_predictions = gradient_boosting.predict(X_test)

In [104]:
rmse = mean_squared_error(y_test, gb_predictions, squared=False)

print(rmse)

0.028809367437606512


Linear Regression

In [105]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(final_dataset)

features = scaled_data[:, 1:]
target = scaled_data[:, 0]

model = LinearRegression()
model.fit(X_train, y_train)

predicted_normalized_price = model.predict(X_test)

predicted_prices = scaler.inverse_transform(
    np.concatenate((predicted_normalized_price.reshape(-1, 1), X_test), axis=1))[:, 0]

print("Predicted closing prices of the next day:", predicted_prices)

Predicted closing prices of the next day: [29987.58379001 27354.15563279 34923.52197199 44143.80120567
 25100.26100572 27922.33081467 29675.56769146 36212.08377984
 26516.89027742 34840.11563627 43623.2858263  28320.20457
 37362.07006732 37010.61073035 26550.69105892 26497.26156667
 26968.25797106 27472.18698452 34422.0381509 ]


Linear regression had the lowest rmse out of the 4 models.

In [106]:
rmse = mean_squared_error(y_test, predicted_normalized_price, squared=False)
rmse

0.010403449384365859

In [107]:
features_for_prediction = np.array(features[-1]).reshape(1, -1)
predicted_value_normalized = model.predict(features_for_prediction)


In [108]:
predicted_value_normalized

array([0.97647863])

In [109]:
predicted_prices = scaler.inverse_transform(
    np.concatenate((predicted_value_normalized.reshape(-1, 1), features_for_prediction), axis=1))[:, 0]

print("Predicted closing prices of the next day:", predicted_prices)

Predicted closing prices of the next day: [43862.58347403]


In [110]:
latest_closing_price = final_dataset['closing_price'][-1]

In [111]:
if(latest_closing_price - predicted_prices[0] > 0):
  print('The value is going to decrease')

else:
  print('The value is going to increase')

The value is going to decrease
