Step 1: Import Necessary libraries

In [1]:
import pandas as pd
import requests

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
url = "http://api.coincap.io/v2/assets"

In [5]:
response = requests.get(url)
response

<Response [200]>

In [6]:
responsedata = response.json()
responsedata

{'data': [{'id': 'bitcoin',
   'rank': '1',
   'symbol': 'BTC',
   'name': 'Bitcoin',
   'supply': '19642150.0000000000000000',
   'maxSupply': '21000000.0000000000000000',
   'marketCapUsd': '1227650565536.9775209764805200',
   'volumeUsd24Hr': '11986817704.8898693295488357',
   'priceUsd': '62500.8242751927625528',
   'changePercent24Hr': '1.6200735325408098',
   'vwap24Hr': '61888.9774167814409747',
   'explorer': 'https://blockchain.info/'},
  {'id': 'ethereum',
   'rank': '2',
   'symbol': 'ETH',
   'name': 'Ethereum',
   'supply': '120142083.8889959300000000',
   'maxSupply': None,
   'marketCapUsd': '412941756547.4896949549995262',
   'volumeUsd24Hr': '6263091681.9759107837191962',
   'priceUsd': '3437.1116529743489174',
   'changePercent24Hr': '2.4781714260341641',
   'vwap24Hr': '3403.0257068531811405',
   'explorer': 'https://etherscan.io/'},
  {'id': 'tether',
   'rank': '3',
   'symbol': 'USDT',
   'name': 'Tether',
   'supply': '98798027729.4258700000000000',
   'maxSupply

Step 2: Transformation (Restructuring and Data Cleaning)

In [7]:
data = pd.json_normalize(responsedata, "data")
data.head()

Unnamed: 0,id,rank,symbol,name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer
0,bitcoin,1,BTC,Bitcoin,19642150.0,21000000.0,1227650565536.9775,11986817704.889868,62500.82427519276,1.6200735325408098,61888.97741678144,https://blockchain.info/
1,ethereum,2,ETH,Ethereum,120142083.88899592,,412941756547.4897,6263091681.975911,3437.111652974349,2.478171426034164,3403.025706853181,https://etherscan.io/
2,tether,3,USDT,Tether,98798027729.42589,,98884510857.64842,25261496631.05633,1.0008753527799097,0.0155857019675373,1.0003141333947072,https://www.omniexplorer.info/asset/31
3,binance-coin,4,BNB,BNB,166801148.0,166801148.0,68019227592.96616,368456630.3002041,407.7863276634413,1.773608050095177,405.4560733436613,https://etherscan.io/token/0xB8c77482e45F1F44d...
4,solana,5,SOL,Solana,442825850.6403168,,57954352250.444405,1531722479.3831708,130.87391390236962,3.8194157105810502,132.7515549643321,https://explorer.solana.com/


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id                 100 non-null    object
 1   rank               100 non-null    object
 2   symbol             100 non-null    object
 3   name               100 non-null    object
 4   supply             100 non-null    object
 5   maxSupply          52 non-null     object
 6   marketCapUsd       100 non-null    object
 7   volumeUsd24Hr      100 non-null    object
 8   priceUsd           100 non-null    object
 9   changePercent24Hr  100 non-null    object
 10  vwap24Hr           99 non-null     object
 11  explorer           98 non-null     object
dtypes: object(12)
memory usage: 9.5+ KB


### Step 3: Converting categorical data into Numerical data

In [10]:
def convert_columns_to_datatypes(df, column_datatypes):
    for column, datatype in column_datatypes.items():
        if column in df.columns:
            df[column] = df[column].astype(datatype)
    return df

In [11]:
column_datatypes = {
    'rank': int,
    'supply': float,
    'maxSupply': float,
    'marketCapUsd': float,
    'volumeUsd24Hr': float,
    'priceUsd': float,
    'changePercent24Hr':float,
    'vwap24Hr': float,
}
data = convert_columns_to_datatypes(data, column_datatypes)

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 100 non-null    object 
 1   rank               100 non-null    int64  
 2   symbol             100 non-null    object 
 3   name               100 non-null    object 
 4   supply             100 non-null    float64
 5   maxSupply          52 non-null     float64
 6   marketCapUsd       100 non-null    float64
 7   volumeUsd24Hr      100 non-null    float64
 8   priceUsd           100 non-null    float64
 9   changePercent24Hr  100 non-null    float64
 10  vwap24Hr           99 non-null     float64
 11  explorer           98 non-null     object 
dtypes: float64(7), int64(1), object(4)
memory usage: 9.5+ KB


Step 3: Dealing with missing data
# checking missing data
# check data type
# Transform the data

In [None]:
data["maxSupply"] = data["maxSupply"].fillna(0)
data["vwap24Hr "] = data["vwap24Hr "].fillna(0)
data["explorer"] = data["explorer"].fillna('na')