In [None]:
import os
import pandas as pd
import save_and_load
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

def prepare_data(dataframe):
    #collect the columns names for non-target features
    result = []
    for x in dataframe.columns:
        if (x == 'attack') or (x == 'defense') or (x == 'speed') or (x == 'sp_defense') or (x == 'sp_attack') or (x == 'weight_kg') or (x == 'height_m') or (x == 'hp'):
            result.append(x)

    #get data (often called X) and target (often calle y) and display its shape
    X = dataframe[result].values
    y = dataframe['Type1'].values
    print(X.shape)
    print(y.shape)

    y = keras.utils.to_categorical(dataframe['Type1'].to_numpy())
    return X, y

def show_prediction(model, X, y, show):
    # make a prediction using all the data
    pred = model.predict(X)
    # show the shape if the inputs
    if show:
        print(pred.shape)
        print(y.shape)
        print("\n Predictions: \n")
        print(np.round(pred[0:20], 3))
        print("\n Actual values: \n")
        print(y[0:20])
    pred_val = np.argmax(pred,axis=1)
    y_compare = np.argmax(y,axis=1)
    print("\n Predictions: \n")
    print(pred_val[0:20])
    print("\n Actual values: \n")
    print(y_compare[0:20])

    #print the accuracy of the model
    score = metrics.accuracy_score(y_compare, pred_val)
    print("Accuracy score: {}".format(score))
    return pred, pred_val, y_compare

path = "."  #absolute or relative path to the folder containing the file. 
            #"." for current folder

#import the data from the dataset
filename_read = os.path.join(path, "pokemon.csv")
df = pd.read_csv(filename_read)

#show the order of the first few entries
print(df[0:5]["Type1"])
#shuffle them
df = df.sample(frac=1).reset_index(drop=True)
#check if they have been shuffeled
print(df[0:5]["Type1"])

#copy the dataframe to encode it
encodeddf = df.copy(deep=True)

In [None]:
#encode the labels
le = LabelEncoder()
encodeddf['Type1'] = le.fit_transform(encodeddf['Type1'])

testingdf = encodeddf[0:721]
holdoutdf = encodeddf[722:890]

print("Encodings: ")
for count in range(0, len(df['Type1'].unique()) - 1):
    print("Type: " + str(df['Type1'].unique()[count]) + ", " + str(encodeddf['Type1'].unique()[count]))

In [None]:
X, y = prepare_data(testingdf)

# make a sequential model and train it using KFold splits
# has 0 hidden layers
model = Sequential()
model.add(Dense(512, input_dim=X.shape[1], activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(y.shape[1],activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

kf = KFold(5)

for train, test in kf.split(X):
    X_train = X[train]
    y_train = y[train]
    X_test = X[test]
    y_test = y[test]

    model.fit(X_train,y_train,verbose=0,epochs=50)
    pred = model.predict(X_test)
    pred = np.argmax(pred,axis=1)
    y_compare = np.argmax(y_test,axis=1) 
    score = metrics.accuracy_score(y_compare, pred)
    print("Accuracy score: {}".format(score))

# save the model
save_and_load.save_model(model, path, "seqKFold")

In [None]:
X, y = prepare_data(holdoutdf)

model = save_and_load.model_loader(path, "seqKFold")

pred, pred_val, y_compare = show_prediction(model, X, y, False)

k = 2
topCut = tf.nn.in_top_k(y_compare, pred, k)
t = 0
f = 0
for value in topCut:
    if value:
        t+=1
    else:
        f+=1
print("Accuracy if we consider an answer as correct if it belongs to the top " + str(k) + " most likely predicitons")
print(t / (t + f))
    
#plot a confusion matrix for the predicitons
cm = confusion_matrix(y_compare, pred_val)

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()