In [40]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
df = pd.read_csv('/content/drive/MyDrive/crypto_data.csv')
df

Unnamed: 0,Date,Close (BNB),Close (BTC),Close (USDT),Close (ETH)
0,09-11-17,1.99077,7143.580078,1.00818,320.884003
1,10-11-17,1.79684,6618.140137,1.00601,299.252991
2,11-11-17,1.67047,6357.600098,1.00899,314.681000
3,12-11-17,1.51969,5950.069824,1.01247,307.907990
4,13-11-17,1.68662,6559.490234,1.00935,316.716003
...,...,...,...,...,...
1748,23-08-22,299.03000,21528.090000,1.00000,1662.770000
1749,24-08-22,296.45000,21395.020000,1.00000,1657.060000
1750,25-08-22,301.58000,21600.900000,1.00010,1696.460000
1751,26-08-22,279.60000,20260.020000,1.00000,1507.780000


In [14]:
df.isnull().sum()

Date            0
Close (BNB)     0
Close (BTC)     0
Close (USDT)    0
Close (ETH)     0
dtype: int64

In [15]:
df['Date'] = pd.to_datetime(df['Date'], format = '%d-%m-%y')

In [17]:
df.set_index('Date', inplace=True)

In [18]:
df

Unnamed: 0_level_0,Close (BNB),Close (BTC),Close (USDT),Close (ETH)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-11-09,1.99077,7143.580078,1.00818,320.884003
2017-11-10,1.79684,6618.140137,1.00601,299.252991
2017-11-11,1.67047,6357.600098,1.00899,314.681000
2017-11-12,1.51969,5950.069824,1.01247,307.907990
2017-11-13,1.68662,6559.490234,1.00935,316.716003
...,...,...,...,...
2022-08-23,299.03000,21528.090000,1.00000,1662.770000
2022-08-24,296.45000,21395.020000,1.00000,1657.060000
2022-08-25,301.58000,21600.900000,1.00010,1696.460000
2022-08-26,279.60000,20260.020000,1.00000,1507.780000


In [19]:
df.sort_index(inplace=True)

In [20]:
df.drop_duplicates(inplace=True)

In [21]:
df

Unnamed: 0_level_0,Close (BNB),Close (BTC),Close (USDT),Close (ETH)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-11-09,1.99077,7143.580078,1.00818,320.884003
2017-11-10,1.79684,6618.140137,1.00601,299.252991
2017-11-11,1.67047,6357.600098,1.00899,314.681000
2017-11-12,1.51969,5950.069824,1.01247,307.907990
2017-11-13,1.68662,6559.490234,1.00935,316.716003
...,...,...,...,...
2022-08-23,299.03000,21528.090000,1.00000,1662.770000
2022-08-24,296.45000,21395.020000,1.00000,1657.060000
2022-08-25,301.58000,21600.900000,1.00010,1696.460000
2022-08-26,279.60000,20260.020000,1.00000,1507.780000


In [22]:
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

In [29]:
# Remove outliers
df_no_outliers = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

In [30]:
print(df_no_outliers)

            Close (BNB)   Close (BTC)  Close (USDT)  Close (ETH)
Date                                                            
2017-11-10      1.79684   6618.140137       1.00601   299.252991
2017-11-14      1.59258   6635.750000       1.00683   337.631012
2017-11-15      1.53045   7315.540039       1.00318   333.356995
2017-11-16      1.57792   7871.689941       1.00212   330.924011
2017-11-17      1.51036   7708.990234       1.00139   332.394012
...                 ...           ...           ...          ...
2022-08-23    299.03000  21528.090000       1.00000  1662.770000
2022-08-24    296.45000  21395.020000       1.00000  1657.060000
2022-08-25    301.58000  21600.900000       1.00010  1696.460000
2022-08-26    279.60000  20260.020000       1.00000  1507.780000
2022-08-27    277.30000  20029.790000       1.00000  1470.760000

[1471 rows x 4 columns]


In [31]:
# Normalize or scale numerical columns
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_no_outliers), columns=df_no_outliers.columns, index=df_no_outliers.index)

In [32]:
df_scaled

Unnamed: 0_level_0,Close (BNB),Close (BTC),Close (USDT),Close (ETH)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-11-10,0.000425,0.053881,0.915589,0.051294
2017-11-14,0.000122,0.054162,0.989224,0.060453
2017-11-15,0.000030,0.064994,0.661458,0.059433
2017-11-16,0.000100,0.073856,0.566272,0.058852
2017-11-17,0.000000,0.071264,0.500718,0.059203
...,...,...,...,...
2022-08-23,0.441310,0.291467,0.375898,0.376682
2022-08-24,0.437483,0.289347,0.375898,0.375319
2022-08-25,0.445092,0.292627,0.384878,0.384722
2022-08-26,0.412490,0.271261,0.375898,0.339695


In [33]:
df_scaled['Day_of_Week'] = df_scaled.index.dayofweek
df_scaled['Month'] = df_scaled.index.month
df_scaled['Year'] = df_scaled.index.year

In [34]:
df_scaled

Unnamed: 0_level_0,Close (BNB),Close (BTC),Close (USDT),Close (ETH),Day_of_Week,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-10,0.000425,0.053881,0.915589,0.051294,4,11,2017
2017-11-14,0.000122,0.054162,0.989224,0.060453,1,11,2017
2017-11-15,0.000030,0.064994,0.661458,0.059433,2,11,2017
2017-11-16,0.000100,0.073856,0.566272,0.058852,3,11,2017
2017-11-17,0.000000,0.071264,0.500718,0.059203,4,11,2017
...,...,...,...,...,...,...,...
2022-08-23,0.441310,0.291467,0.375898,0.376682,1,8,2022
2022-08-24,0.437483,0.289347,0.375898,0.375319,2,8,2022
2022-08-25,0.445092,0.292627,0.384878,0.384722,3,8,2022
2022-08-26,0.412490,0.271261,0.375898,0.339695,4,8,2022


In [36]:
X = df_scaled.drop(columns=['Close (BNB)', 'Close (BTC)', 'Close (USDT)', 'Close (ETH)'])
y = df_scaled[['Close (BNB)', 'Close (BTC)', 'Close (USDT)', 'Close (ETH)']]

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [39]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (1176, 3)
y_train shape: (1176, 4)
X_test shape: (295, 3)
y_test shape: (295, 4)


In [43]:
gb_models = []

In [44]:
for col in y_train.columns:
    gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    gb_model.fit(X_train, y_train[col])
    gb_models.append(gb_model)

In [45]:
y_preds = np.column_stack([model.predict(X_test) for model in gb_models])

In [48]:
mse = mean_squared_error(y_test, y_preds)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 0.08614463985808767
