In [1]:
# import dependencies
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib as plt

In [2]:
# import data
crypto_df = pd.read_csv('Instructions/crypto_data.csv')

In [3]:
# verify data set
crypto_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


In [4]:
# remove false IsTrading values
crypto_df = crypto_df.loc[crypto_df['IsTrading'] == True]

In [5]:
# verify they are removed
crypto_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [6]:
# remove null values
crypto_df = crypto_df.dropna()
crypto_df = crypto_df.reset_index()

In [7]:
# filter for crypto that has been mined
crypto_df = crypto_df.loc[crypto_df['TotalCoinsMined'] > 0]

In [8]:
# verify >0 values are filtered
crypto_df

Unnamed: 0.1,index,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,5,1337,EliteCoin,X13,True,PoW/PoS,2.927942e+10,314159265359
4,7,BTC,Bitcoin,SHA-256,True,PoW,1.792718e+07,21000000
5,8,ETH,Ethereum,Ethash,True,PoW,1.076842e+08,0
...,...,...,...,...,...,...,...,...
680,1238,ZEPH,ZEPHYR,SHA-256,True,DPoS,2.000000e+09,2000000000
681,1242,GAP,Gapcoin,Scrypt,True,PoW/PoS,1.493105e+07,250000000
682,1245,BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
683,1246,ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [9]:
# remove unnecessary columns
crypto_df.drop(['index', 'Unnamed: 0', 'CoinName', 'IsTrading'], axis='columns', inplace=True)

In [10]:
# verify columns dropped
crypto_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,4.199995e+01,42
1,Scrypt,PoW/PoS,1.055185e+09,532000000
3,X13,PoW/PoS,2.927942e+10,314159265359
4,SHA-256,PoW,1.792718e+07,21000000
5,Ethash,PoW,1.076842e+08,0
...,...,...,...,...
680,SHA-256,DPoS,2.000000e+09,2000000000
681,Scrypt,PoW/PoS,1.493105e+07,250000000
682,CryptoNight,PoW,9.802226e+08,1400222610
683,Equihash,PoW,7.296538e+06,21000000


In [11]:
# convert all values to numerical
crypto_df = pd.get_dummies(crypto_df)

In [14]:
# verify numerical conversion
crypto_df
# notice the dramatic column increase

Unnamed: 0,TotalCoinsMined,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,...,TotalCoinSupply_91388946,TotalCoinSupply_92000000000,TotalCoinSupply_9354000,TotalCoinSupply_9507271,TotalCoinSupply_9736000,TotalCoinSupply_98000000,TotalCoinSupply_98100000000,TotalCoinSupply_990000000000,TotalCoinSupply_999481516,TotalCoinSupply_9999999
0,4.199995e+01,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.055185e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2.927942e+10,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1.792718e+07,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,1.076842e+08,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680,2.000000e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
681,1.493105e+07,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
682,9.802226e+08,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
683,7.296538e+06,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
scaler = StandardScaler()
data_scale = scaler.fit_transform(crypto_df)