In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path

In [23]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

In [2]:
df = pd.read_csv(Path('./Resources/crypto_data.csv'))
df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


In [3]:
#Discard all cryptocurrencies that are not being traded
trading = df.loc[df["IsTrading"]==True, :]
del trading["IsTrading"]
trading

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,PoW,,1000000000
1244,UOS,UOS,SHA-256,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [4]:
#Remove all rows that have at least one null value.
drop_null = trading.dropna()
drop_null

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
4,808,808,SHA-256,PoW/PoS,0.000000e+00,0
5,1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
7,BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
...,...,...,...,...,...,...
1238,ZEPH,ZEPHYR,SHA-256,DPoS,2.000000e+09,2000000000
1242,GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [5]:
#Filter to show only cryptocurrencies that have been mined.
mined = drop_null.loc[drop_null["TotalCoinsMined"] > 0, :]
mined

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
5,1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
7,BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
8,ETH,Ethereum,Ethash,PoW,1.076842e+08,0
...,...,...,...,...,...,...
1238,ZEPH,ZEPHYR,SHA-256,DPoS,2.000000e+09,2000000000
1242,GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [12]:
#Delete rows not needed for the machine learning
del mined["Unnamed: 0"]
del mined["CoinName"]
mined.reset_index(inplace = True)
mined

Unnamed: 0,index,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,0,Scrypt,PoW/PoS,4.199995e+01,42
1,2,Scrypt,PoW/PoS,1.055185e+09,532000000
2,5,X13,PoW/PoS,2.927942e+10,314159265359
3,7,SHA-256,PoW,1.792718e+07,21000000
4,8,Ethash,PoW,1.076842e+08,0
...,...,...,...,...,...
527,1238,SHA-256,DPoS,2.000000e+09,2000000000
528,1242,Scrypt,PoW/PoS,1.493105e+07,250000000
529,1245,CryptoNight,PoW,9.802226e+08,1400222610
530,1246,Equihash,PoW,7.296538e+06,21000000


In [13]:
#Convert the remaining features with text values into numerical data.
filtered1 = mined.filter(["Algorithm", "ProofType"])
filtered2 = mined.filter(["TotalCoinsMined", "TotalCoinSupply"])
cleaned_crypto = pd.get_dummies(filtered1)
cleaned_crypto

Unnamed: 0,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,Algorithm_Counterparty,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
528,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
529,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
530,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
#Scale data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(filtered2[["TotalCoinsMined", "TotalCoinSupply"]])
scaled_crypto = pd.DataFrame(scaled_data, columns=filtered2.columns)
scaled_crypto

Unnamed: 0,TotalCoinsMined,TotalCoinSupply
0,-0.117108,-0.152870
1,-0.093970,-0.145009
2,0.524946,4.489424
3,-0.116715,-0.152560
4,-0.114747,-0.152870
...,...,...
527,-0.073251,-0.123317
528,-0.116781,-0.149176
529,-0.095613,-0.132179
530,-0.116948,-0.152560


In [16]:
#Bring data back together
cleaned_crypto["TotalCoinsMined"] = scaled_crypto["TotalCoinsMined"]
cleaned_crypto["TotalCoinSupply"] = scaled_crypto["TotalCoinSupply"]
cleaned_crypto

Unnamed: 0,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,Algorithm_Counterparty,...,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW,TotalCoinsMined,TotalCoinSupply
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.117108,-0.152870
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.093970,-0.145009
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.524946,4.489424
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.116715,-0.152560
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.114747,-0.152870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.073251,-0.123317
528,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.116781,-0.149176
529,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.095613,-0.132179
530,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-0.116948,-0.152560


In [25]:
#For this project, preserve 90% of the explained variance in dimensionality reduction.
pca = PCA(n_components=0.90)
crypto_pca = pca.fit_transform(cleaned_crypto)

In [26]:
df_crypto_pca = pd.DataFrame(data=crypto_pca)
df_crypto_pca.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-0.209276,-0.801765,0.172975,-0.672385,-0.028027,-0.02735,0.002161,-0.016859,-0.047423,-0.023043,-0.00785,0.015789
1,-0.18739,-0.804259,0.162555,-0.673029,-0.028782,-0.027213,0.002265,-0.017852,-0.047565,-0.023081,-0.007752,0.015888
2,3.529674,-0.305537,2.915403,0.414815,-0.163323,-0.259278,-0.278472,-0.152434,0.747395,0.360814,-0.233801,-0.07022
3,-0.180157,0.69429,-0.165771,0.239931,-0.27838,-0.614707,0.624124,-0.015063,0.027769,0.009259,-0.026164,-0.026461
4,-0.173289,0.688569,-0.15439,0.131918,-0.212548,-0.104388,-0.242103,0.0028,0.113068,-0.125626,0.058157,-0.129333


In [None]:
#Further reduce the dataset dimensions with t-SNE
tsne = TSNE(learning_rate=35)
tsne_features = tsne.fit_transform(df_crypto_pca)