# Data Preparation

In [3]:
# Import dependancies
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [4]:
# Read crypto_data.csv
crypto = pd.read_csv("Resources/crypto_data.csv")
crypto.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [5]:
# Discard all cryptocurrencies that are not being traded
traded = crypto.loc[crypto['IsTrading']==True]

In [6]:
traded = traded.drop(labels='IsTrading', axis=1)
traded.shape

(1144, 6)

In [7]:
# Remove all rows that have at least one null value
traded = traded.dropna(how="any")

In [8]:
# Filter for cryptocurrencies that have been mined
mined_crypto = traded.loc[crypto['TotalCoinsMined']>0]
mined_crypto.shape

(532, 6)

In [9]:
# Delete the CoinName from the original dataframe.
crypto_new = mined_crypto.drop(labels='CoinName', axis=1)
crypto_new.columns

Index(['Unnamed: 0', 'Algorithm', 'ProofType', 'TotalCoinsMined',
       'TotalCoinSupply'],
      dtype='object')

In [10]:
# Drop unnamed 0 column
crypto_new = crypto_new.drop(labels='Unnamed: 0', axis=1)
crypto_new.shape

(532, 4)

In [11]:
# Check for text values
crypto_new.dtypes

Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [12]:
# Convert TotalCoinSupply to numeric
crypto_new.astype({'TotalCoinSupply': 'float'}).dtypes

Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply    float64
dtype: object

In [13]:
# Convert 'Algorithm','ProofType' columns to numeric using get_dummies 
final_df = pd.get_dummies(crypto_new, prefix='', prefix_sep='',columns=['Algorithm','ProofType'])

In [15]:
final_df.shape

(532, 98)

# Scaling the data 

In [16]:
# Standardize your dataset 
scaler = StandardScaler()
scaled_crypto = scaler.fit_transform(final_df)
print(scaled_crypto.shape)

(532, 98)


# Dimensionality reducation

### Cluster analysis with k-means

### Recommendation