### Data Cleaning

In [46]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose


In [47]:
# Finding columns with a single unique value

# Reload the dataset
file_path = "data\crypto_with_indicators.csv"
df = pd.read_csv(file_path)

# Find columns with a single unique value (constant columns)
constant_columns = [col for col in df.columns if df[col].nunique() == 1]

# Display constant columns
constant_columns


[]

In [48]:
corr_matrix = df.corr()
corr_matrix

Unnamed: 0,BTCUSDT_Close,BTCUSDT_Volume,ETHUSDT_Close,ETHUSDT_Volume,XRPUSDT_Close,XRPUSDT_Volume,SOLUSDT_Close,SOLUSDT_Volume,BNBUSDT_Close,BNBUSDT_Volume,...,RENDERUSDT_Close,RENDERUSDT_Volume,JUPUSDT_Close,JUPUSDT_Volume,FILUSDT_Close,FILUSDT_Volume,ARBUSDT_Close,ARBUSDT_Volume,FDUSDUSDT_Close,FDUSDUSDT_Volume
BTCUSDT_Close,1.000000,-0.002957,0.832764,0.361629,0.714717,0.265570,0.960768,-0.033769,0.927343,-0.038057,...,0.715743,0.340315,0.383546,-0.049117,0.328020,0.321688,-0.244070,0.387898,-0.194336,0.334372
BTCUSDT_Volume,-0.002957,1.000000,0.063976,0.756872,-0.056116,0.562818,0.010845,0.683196,-0.081910,0.633128,...,0.084596,0.723904,0.034079,0.407016,0.252690,0.486658,0.288940,0.590572,-0.175674,0.552229
ETHUSDT_Close,0.832764,0.063976,1.000000,0.297856,0.384353,0.270316,0.863949,-0.019022,0.860334,0.095299,...,0.900422,0.312879,0.484258,0.009921,0.635291,0.365796,0.094157,0.357519,-0.085960,0.496311
ETHUSDT_Volume,0.361629,0.756872,0.297856,1.000000,0.289200,0.620526,0.345857,0.631726,0.230825,0.544814,...,0.133871,0.683075,0.108353,0.372845,0.245603,0.646719,0.099732,0.827390,-0.263663,0.505106
XRPUSDT_Close,0.714717,-0.056116,0.384353,0.289200,1.000000,0.226815,0.549967,0.037366,0.487907,-0.039501,...,0.608285,0.190230,0.111677,0.109254,0.108703,0.231752,-0.208587,0.234985,-0.146810,0.112390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FILUSDT_Volume,0.321688,0.486658,0.365796,0.646719,0.231752,0.598350,0.310670,0.452647,0.183023,0.566334,...,0.484542,0.711700,0.039757,0.372989,0.528321,1.000000,0.340458,0.688362,-0.238886,0.553287
ARBUSDT_Close,-0.244070,0.288940,0.094157,0.099732,-0.208587,0.156630,-0.186924,0.258532,-0.322396,0.494068,...,0.957878,0.298141,-0.029769,0.512118,0.709557,0.340458,1.000000,0.160161,0.200683,0.479128
ARBUSDT_Volume,0.387898,0.590572,0.357519,0.827390,0.234985,0.638263,0.410675,0.573504,0.279445,0.547516,...,0.317349,0.737695,0.260913,0.291208,0.315528,0.688362,0.160161,1.000000,-0.260276,0.478186
FDUSDUSDT_Close,-0.194336,-0.175674,-0.085960,-0.263663,-0.146810,-0.229365,-0.186870,-0.154342,-0.136312,0.000506,...,-0.179080,-0.377261,-0.106627,0.012804,0.040343,-0.238886,0.200683,-0.260276,1.000000,0.000759


In [49]:
df

Unnamed: 0,Open Time,BTCUSDT_Close,BTCUSDT_Volume,ETHUSDT_Close,ETHUSDT_Volume,XRPUSDT_Close,XRPUSDT_Volume,SOLUSDT_Close,SOLUSDT_Volume,BNBUSDT_Close,...,RENDERUSDT_Close,RENDERUSDT_Volume,JUPUSDT_Close,JUPUSDT_Volume,FILUSDT_Close,FILUSDT_Volume,ARBUSDT_Close,ARBUSDT_Volume,FDUSDUSDT_Close,FDUSDUSDT_Volume
0,2023-07-01,30585.90,17501.75075,1924.50,178373.3688,0.4732,232549409.0,18.70,3515091.930,247.90,...,,,,,4.107,6623940.12,1.1514,40352979.6,,
1,2023-07-02,30617.03,23286.41019,1937.48,255852.7832,0.4845,337155089.0,19.45,4872775.080,246.50,...,,,,,4.109,4937993.31,1.1511,36723276.4,,
2,2023-07-03,31156.20,43761.64311,1955.54,322102.4295,0.4890,357020349.0,19.26,3065186.840,246.50,...,,,,,4.734,28961489.98,1.1694,50849349.9,,
3,2023-07-04,30766.51,33206.11943,1936.20,205525.0984,0.4872,299439491.0,19.13,3096292.180,242.50,...,,,,,4.546,7988349.41,1.1488,46515719.2,,
4,2023-07-05,30504.81,33215.67122,1910.36,267633.4783,0.4777,292620010.0,19.00,2415908.510,238.90,...,,,,,4.600,14798522.19,1.1117,43694827.3,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,2025-02-06,96554.35,23515.20405,2686.64,719459.9571,2.3250,322896937.0,189.54,3444767.147,571.72,...,4.189,5844305.33,0.8274,35259694.7,3.121,9132598.18,0.4455,87332007.5,0.9992,401886647.0
587,2025-02-07,96506.80,31794.22065,2622.10,695467.3612,2.3968,447487945.0,191.99,3925596.773,577.71,...,4.134,6064457.98,0.8007,41987209.7,3.188,8734086.21,0.4326,102282933.0,0.9989,515209741.0
588,2025-02-08,96444.74,10147.24294,2632.46,379685.1509,2.4190,154717204.0,199.32,1967596.752,612.39,...,4.257,3004873.80,0.8403,25710852.7,3.306,4382412.59,0.4500,47660997.6,0.9991,282426481.0
589,2025-02-09,96462.75,14120.91613,2627.18,387166.7911,2.3928,204205603.0,200.47,2879138.460,615.81,...,4.243,3872535.77,0.8327,31213202.3,3.311,4864828.39,0.4490,48100116.0,0.9989,323574754.0


In [61]:
# Check for duplicated rows
duplicates = df.duplicated()
duplicates.sum()

0

In [51]:
# *Normalization
scaler = MinMaxScaler()
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
df[numeric_cols]

Unnamed: 0,BTCUSDT_Close,BTCUSDT_Volume,ETHUSDT_Close,ETHUSDT_Volume,XRPUSDT_Close,XRPUSDT_Volume,SOLUSDT_Close,SOLUSDT_Volume,BNBUSDT_Close,BNBUSDT_Volume,...,RENDERUSDT_Close,RENDERUSDT_Volume,JUPUSDT_Close,JUPUSDT_Volume,FILUSDT_Close,FILUSDT_Volume,ARBUSDT_Close,ARBUSDT_Volume,FDUSDUSDT_Close,FDUSDUSDT_Volume
0,0.066971,0.067436,0.152420,0.043592,0.018693,0.070236,0.004012,0.117545,0.078587,0.162351,...,,,,,0.128293,0.118625,0.393626,0.067390,,
1,0.067355,0.104753,0.157560,0.071772,0.022627,0.112318,0.007083,0.173175,0.076016,0.130901,...,,,,,0.128531,0.085044,0.393461,0.059986,,
2,0.074013,0.236836,0.164712,0.095867,0.024193,0.120310,0.006305,0.099110,0.076016,0.200339,...,,,,,0.202706,0.563551,0.403483,0.088802,,
3,0.069201,0.168743,0.157054,0.053467,0.023567,0.097145,0.005773,0.100385,0.068672,0.103394,...,,,,,0.180394,0.145802,0.392202,0.079962,,
4,0.065969,0.168805,0.146821,0.076056,0.020260,0.094402,0.005241,0.072506,0.062062,0.111225,...,,,,,0.186803,0.281449,0.371885,0.074207,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,0.881584,0.106229,0.454235,0.240389,0.663313,0.106582,0.703460,0.114663,0.673167,0.064481,...,0.009141,0.122292,0.281184,0.079995,0.011275,0.168593,0.007064,0.163225,0.264151,0.078723
587,0.880997,0.159635,0.428677,0.231663,0.688307,0.156704,0.713490,0.134365,0.684165,0.036369,...,0.000473,0.128194,0.260225,0.098071,0.019226,0.160655,0.000000,0.193725,0.235849,0.100925
588,0.880231,0.019993,0.432779,0.116810,0.696035,0.038925,0.743501,0.054137,0.747843,0.103871,...,0.019858,0.046170,0.291310,0.054340,0.033230,0.073977,0.009529,0.082298,0.254717,0.055319
589,0.880453,0.045627,0.430688,0.119532,0.686915,0.058834,0.748209,0.091487,0.754122,0.175188,...,0.017652,0.069431,0.285344,0.069123,0.033824,0.083586,0.008981,0.083194,0.235849,0.063381


In [53]:
print(df.index)

RangeIndex(start=0, stop=591, step=1)
