In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

In [10]:
#import df
crypto_df = pd.read_csv('Resources/crypto_data.csv')
crypto_df.head()
#crypto_df.count()

Unnamed: 0         1252
CoinName           1252
Algorithm          1252
IsTrading          1252
ProofType          1252
TotalCoinsMined     744
TotalCoinSupply    1252
dtype: int64

In [9]:
#keep only rows where trading is true
trading_only_df = crypto_df.loc[(crypto_df['IsTrading'] == True)]
trading_only_df.count()

Unnamed: 0         1144
CoinName           1144
Algorithm          1144
IsTrading          1144
ProofType          1144
TotalCoinsMined     685
TotalCoinSupply    1144
dtype: int64

In [15]:
#drop isTrading column
trading_dropped_df = trading_only_df.drop(columns='IsTrading')
trading_dropped_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.0,0


In [16]:
#check for nulls
trading_dropped_df.isna().any()

Unnamed: 0         False
CoinName           False
Algorithm          False
ProofType          False
TotalCoinsMined     True
TotalCoinSupply    False
dtype: bool

In [18]:
#drop nulls
dropped_nulls_df = trading_dropped_df.dropna()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
4,808,808,SHA-256,PoW/PoS,0.0,0
5,1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [22]:
#check that nulls are gone
dropped_nulls_df.isna().any()
dropped_nulls_df.count()

Unnamed: 0         685
CoinName           685
Algorithm          685
ProofType          685
TotalCoinsMined    685
TotalCoinSupply    685
dtype: int64

In [21]:
#filter coins mined
coins_mined_df = dropped_nulls_df.loc[(crypto_df['TotalCoinsMined'] > 0)]
coins_mined_df.count()

Unnamed: 0         532
CoinName           532
Algorithm          532
ProofType          532
TotalCoinsMined    532
TotalCoinSupply    532
dtype: int64

In [28]:
#drop coin name and Unnamed: 0
final_df = coins_mined_df.drop(columns='CoinName')
final_df = final_df.drop(columns='Unnamed: 0')
final_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
2,Scrypt,PoW/PoS,1055185000.0,532000000
5,X13,PoW/PoS,29279420000.0,314159265359
7,SHA-256,PoW,17927180.0,21000000
8,Ethash,PoW,107684200.0,0


In [29]:
X = pd.get_dummies(final_df, columns=['Algorithm', 'ProofType'])
X

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,4.199995e+01,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1.055185e+09,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,2.927942e+10,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,1.792718e+07,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1.076842e+08,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238,2.000000e+09,2000000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1242,1.493105e+07,250000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1245,9.802226e+08,1400222610,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,7.296538e+06,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
scaler = StandardScaler()
crypto_scaled = scaler.fit_transform(X)

In [32]:
pca = PCA(n_components=4)

crypto_pcal = pca.fit_transform(crypto_scaled)

In [33]:
df_crypto_pcal = pd.DataFrame(data=crypto_pcal,
                             columns=["PC 1", "PC 2", "PC 3", "PC 4"])
df_crypto_pcal.head()

Unnamed: 0,PC 1,PC 2,PC 3,PC 4
0,-0.32331,0.906551,-0.597125,-0.007281
1,-0.306605,0.906384,-0.597502,-0.007211
2,2.3059,1.546208,-0.698853,0.008685
3,-0.142643,-1.25916,0.187565,0.009723
4,-0.146475,-2.240822,0.420546,-0.02601
