### Cleaning data from "crypto_name_symbol.csv" file

In [4]:
import pandas as pd

In [6]:
df = pd.read_csv("crypto_name_symbol.csv")
df.head()

Unnamed: 0,Symbol,Image URLS,Name,Price,Change,Change %,Market Cap,Volume,Volume In Currency (24hr),Total Volume All Currencies (24hr),Circulating Supply,52 Wk Change %
0,BTC-USD,https://s2.coinmarketcap.com/static/img/coins/...,Bitcoin USD,102880.3,3494.78,(+3.52%),2.039T,51.401B,51.401B,51.401B,19.816M,139.06%
1,ETH-USD,https://s2.coinmarketcap.com/static/img/coins/...,Ethereum USD,3164.39,90.21,(+2.93%),381.347B,20.954B,20.954B,20.954B,120.512M,38.03%
2,XRP-USD,https://s2.coinmarketcap.com/static/img/coins/...,XRP USD,3.1692,0.2663,(+9.17%),182.676B,9.615B,9.615B,9.615B,57.641B,492.57%
3,USDT-USD,https://s2.coinmarketcap.com/static/img/coins/...,Tether USDt USD,0.9999,0.000275,(+0.03%),139.442B,95.082B,95.082B,95.082B,139.457B,-0.04%
4,SOL-USD,https://s2.coinmarketcap.com/static/img/coins/...,Solana USD,233.8,7.32,(+3.23%),113.784B,5.38B,5.38B,5.38B,486.668M,137.39%


Checking For Null Data

In [11]:
df.isnull().sum()

Symbol                                0
Image URLS                            0
Name                                  0
Price                                 0
Change                                0
Change %                              0
Market Cap                            0
Volume                                0
Volume In Currency (24hr)             0
Total Volume All Currencies (24hr)    0
Circulating Supply                    0
52 Wk Change %                        0
dtype: int64

Checking for duplicate rows

In [14]:
#Counts the number of duplicate
print(df.duplicated().sum())

0


In [15]:
# Removing "USD" From Name Column
df["Name"] = df["Name"].apply(lambda x: x[:-4])

In [17]:
#removing "()" from Change% Column

df['Change %'] = (df['Change %'].str.replace(r'[\(\)]', '', regex=True))

In [20]:
# Converting Trillion billion and million etc data to numeric data for later analysing purpose
def convert_to_numeric(value):
    if isinstance(value, str):
        multiplier = 1
        if value.endswith("T"):
            multiplier = 1e12
            value = value[:-1]
        elif value.endswith("B"):
            multiplier = 1e9
            value = value[:-1]
        elif value.endswith("M"):
            multiplier = 1e6
            value = value[:-1]
        elif value.endswith("K"):
            multiplier = 1e3
            value = value[:-1]
        return float(value) * multiplier
    return value
df[['Market Cap', 'Volume', 'Volume In Currency (24hr)', 
    'Total Volume All Currencies (24hr)', 'Circulating Supply']] = df[['Market Cap', 'Volume', 'Volume In Currency (24hr)', 
                                                                       'Total Volume All Currencies (24hr)', 'Circulating Supply']].map(convert_to_numeric)
    



In [21]:
df.head()

Unnamed: 0,Symbol,Image URLS,Name,Price,Change,Change %,Market Cap,Volume,Volume In Currency (24hr),Total Volume All Currencies (24hr),Circulating Supply,52 Wk Change %
0,BTC-USD,https://s2.coinmarketcap.com/static/img/coins/...,Bitcoin,102880.3,3494.78,+3.52%,2039000000000.0,51401000000.0,51401000000.0,51401000000.0,19816000.0,139.06%
1,ETH-USD,https://s2.coinmarketcap.com/static/img/coins/...,Ethereum,3164.39,90.21,+2.93%,381347000000.0,20954000000.0,20954000000.0,20954000000.0,120512000.0,38.03%
2,XRP-USD,https://s2.coinmarketcap.com/static/img/coins/...,XRP,3.1692,0.2663,+9.17%,182676000000.0,9615000000.0,9615000000.0,9615000000.0,57641000000.0,492.57%
3,USDT-USD,https://s2.coinmarketcap.com/static/img/coins/...,Tether USDt,0.9999,0.000275,+0.03%,139442000000.0,95082000000.0,95082000000.0,95082000000.0,139457000000.0,-0.04%
4,SOL-USD,https://s2.coinmarketcap.com/static/img/coins/...,Solana,233.8,7.32,+3.23%,113784000000.0,5380000000.0,5380000000.0,5380000000.0,486668000.0,137.39%


In [22]:
#exporting the prepocessed data

df.to_csv("crypto_name_symbol_preprocessed.csv",index=True)

### Cleaning data from "historical_data.csv" file

In [23]:
data = pd.read_csv("historical_data.csv")
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj,Volume,Symbol
0,"Jan 25, 2025",104839.47,105142.84,104133.78,104425.06,104425.06,42543095808,BTC-USD
1,"Jan 24, 2025",103965.67,107098.55,102772.13,104819.48,104819.48,52388229265,BTC-USD
2,"Jan 23, 2025",103657.67,106820.33,101257.8,103960.17,103960.17,104104515428,BTC-USD
3,"Jan 22, 2025",106136.38,106294.34,103360.27,103653.07,103653.07,53878181052,BTC-USD
4,"Jan 21, 2025",102052.58,107180.92,100103.95,106146.27,106146.27,88733878242,BTC-USD


In [25]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3660 entries, 0 to 3659
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    3660 non-null   object
 1   Open    3660 non-null   object
 2   High    3660 non-null   object
 3   Low     3660 non-null   object
 4   Close   3660 non-null   object
 5   Adj     3660 non-null   object
 6   Volume  3660 non-null   object
 7   Symbol  3660 non-null   object
dtypes: object(8)
memory usage: 228.9+ KB


In [26]:
data.isnull().sum()

Date      0
Open      0
High      0
Low       0
Close     0
Adj       0
Volume    0
Symbol    0
dtype: int64

In [27]:
#Counts the number of duplicate
print(data.duplicated().sum())

0


In [29]:
print(data["Symbol"].unique())

['BTC-USD' 'ETH-USD' 'XRP-USD' 'USDT-USD' 'SOL-USD' 'BNB-USD' 'USDC-USD'
 'DOGE-USD' 'ADA-USD' 'STETH-USD']


In [30]:
data.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj,Volume,Symbol
count,3660,3660.0,3660.0,3660.0,3660.0,3660.0,3660,3660
unique,366,3396.0,3490.0,3464.0,3395.0,3395.0,3659,10
top,"Jan 26, 2024",0.999997,1.001001,0.999593,0.999998,0.999998,41189878063,BTC-USD
freq,10,6.0,4.0,4.0,6.0,6.0,2,366
