In [8]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
import pandas as pd

# read in pokemon.csv
pokemon = pd.read_csv('pokemon.csv')

# remove all third evolution pokemon, clean data
pokemon.drop(pokemon.loc[pokemon['Order'] == 3].index, inplace=True)
pokemon['Order'] = pokemon['Order'] - 1
del pokemon['Type_2']


# perform one hot encoding for pokemon type
enc = OneHotEncoder(handle_unknown='ignore')
X = list(zip(pokemon.Type_1))
enc.fit(X)
feature_vectors = enc.transform(X).toarray()
pokemon[['type_01', 'type_02', 'type_03', 'type_04', 'type_05',
         'type_06', 'type_07', 'type_08', 'type_09', 'type_10', 
         'type_11', 'type_12', 'type_13', 'type_14', 'type_15']] = feature_vectors
del pokemon['Type_1']

# Use Gaussian for datasets with quantitative variables
clf = GaussianNB()
pokemon = pokemon.dropna()

best_p = 0
best_r = 0
best_f = 0

for i in range(20):
    X_train, X_test, y_train, y_test = train_test_split(pokemon[['Health', 'Attack', 'Defense', 'Speed', 
                                                                 'type_01', 'type_02', 'type_03', 'type_04', 'type_05',
                                                                 'type_06', 'type_07', 'type_08', 'type_09', 'type_10',
                                                                 'type_11', 'type_12', 'type_13', 'type_14', 'type_15',]]
                                                                 .values, pokemon.Order, test_size=0.25)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    p,r,f,s = precision_recall_fscore_support(y_test, y_pred, labels=[1,0])
    if (p[0] > best_p and r[0] > best_r):
        best_p = p[0]
        best_r = r[0]
        best_f = f[0]
    print(p, r, f, s)


display()
display(f'Best Precision -> {best_p}')
display(f'Best Recall    -> {best_r}')
display(f'Best F1        -> {best_f}')


[0.32258065 0.33333333] [0.83333333 0.04545455] [0.46511628 0.08      ] [12 22]
[0.37931034 0.6       ] [0.84615385 0.14285714] [0.52380952 0.23076923] [13 21]
[0.38461538 0.61904762] [0.38461538 0.61904762] [0.38461538 0.61904762] [13 21]
[0.36666667 0.25      ] [0.78571429 0.05      ] [0.5        0.08333333] [14 20]
[0.         0.46153846] [0.  0.6] [0.         0.52173913] [14 20]
[0.36666667 0.25      ] [0.78571429 0.05      ] [0.5        0.08333333] [14 20]
[0.46875 0.     ] [0.88235294 0.        ] [0.6122449 0.       ] [17 17]
[0.44       0.88888889] [0.91666667 0.36363636] [0.59459459 0.51612903] [12 22]
[0.46666667 0.52631579] [0.4375     0.55555556] [0.4516129  0.54054054] [16 18]
[0.5625 0.5   ] [0.94736842 0.06666667] [0.70588235 0.11764706] [19 15]
[0.33333333 0.5       ] [0.83333333 0.09090909] [0.47619048 0.15384615] [12 22]
[0.     0.5625] [0.  0.9] [0.         0.69230769] [14 20]
[0.3 0.5] [0.81818182 0.08695652] [0.43902439 0.14814815] [11 23]
[0.      0.46875] [0.     

'Best Precision -> 0.5625'

'Best Recall    -> 0.9473684210526315'

'Best F1        -> 0.7058823529411765'