In [8]:
# Initial imports
import pandas as pd
from pathlib import Path

In [9]:
# Data loading
file_path = Path("Instructions/crypto_data.csv")
df_crypto = pd.read_csv(file_path, index_col=0)
df_crypto.head(10)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0
1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
2015,2015 coin,X11,True,PoW/PoS,,0
BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,True,PoW,107684200.0,0
LTC,Litecoin,Scrypt,True,PoW,63039240.0,84000000


In [10]:
# List dataframe data types
df_crypto.dtypes

CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [11]:
# Drop Cryptocurrencies that are not being traded
df_crypto = df_crypto[df_crypto.IsTrading != False]

# Drop the 'IsTrading' column now that we kept only the crypto that's being traded
df_crypto = df_crypto.drop(columns=['IsTrading'])
len(df_crypto)

1144

In [12]:
# Drop null values from the dataframe
df_crypto = df_crypto.dropna()
len(df_crypto)

685

In [13]:
# In order for your dataset to be comprehensible to a machine learning algorithm, its data should be numeric.
# Since the coin names do not contribute to the analysis of the data, delete the CoinName from the original dataframe.
df_crypto = df_crypto.drop(columns=['CoinName'])
df_crypto.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
808,SHA-256,PoW/PoS,0.0,0
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000


In [15]:
# Your next step in data preparation is to convert the remaining features with text values, Algorithm and ProofType, into numerical data.
# To accomplish this task, use Pandas to create dummy variables.
df_crypto_dummies = pd.get_dummies(df_crypto[['Algorithm','ProofType']], drop_first=True)
df_crypto_dummies.head()

Unnamed: 0,Algorithm_536,Algorithm_Argon2,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,Algorithm_Counterparty,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
808,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
