In [1]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

In [2]:
df = pd.read_csv('data/pokedex_enc.csv', index_col=0)

df.info(verbose=1)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1028 entries, 0 to 1027
Data columns (total 976 columns):
 #    Column                         Dtype  
---   ------                         -----  
 0    pokedex_number                 int64  
 1    name                           object 
 2    generation                     int64  
 3    type_number                    int64  
 4    height_m                       float64
 5    weight_kg                      float64
 6    abilities_number               int64  
 7    total_points                   float64
 8    hp                             float64
 9    attack                         float64
 10   defense                        float64
 11   sp_attack                      float64
 12   sp_defense                     float64
 13   speed                          float64
 14   catch_rate                     float64
 15   base_friendship                float64
 16   base_experience                float64
 17   growth_rate                    

In [3]:
drop = [df.columns[0]] + df.columns[2:4].tolist() + df.columns[6:8].tolist() + df.columns[18:19].tolist() + df.columns[21:39].tolist()
print(drop)

df.drop(drop, axis=1, inplace=True)

['pokedex_number', 'generation', 'type_number', 'abilities_number', 'total_points', 'egg_type_number', 'against_normal', 'against_fire', 'against_water', 'against_electric', 'against_grass', 'against_ice', 'against_fight', 'against_poison', 'against_ground', 'against_flying', 'against_psychic', 'against_bug', 'against_rock', 'against_ghost', 'against_dragon', 'against_dark', 'against_steel', 'against_fairy']


In [4]:
df.info(verbose=1)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1028 entries, 0 to 1027
Data columns (total 952 columns):
 #    Column                         Dtype  
---   ------                         -----  
 0    name                           object 
 1    height_m                       float64
 2    weight_kg                      float64
 3    hp                             float64
 4    attack                         float64
 5    defense                        float64
 6    sp_attack                      float64
 7    sp_defense                     float64
 8    speed                          float64
 9    catch_rate                     float64
 10   base_friendship                float64
 11   base_experience                float64
 12   growth_rate                    float64
 13   percentage_male                float64
 14   egg_cycles                     float64
 15   type_Bug                       int64  
 16   type_Dark                      int64  
 17   type_Dragon                    

In [5]:
cols = df.columns.tolist()
cols = cols[307:311] + cols[:307] + cols[311:]

df = df[cols]

df.head()

Unnamed: 0,status_Legendary,status_Mythical,status_Normal,status_Sub Legendary,name,height_m,weight_kg,hp,attack,defense,...,species_Wish Pokémon,species_Wolf Pokémon,species_Wood Gecko Pokémon,species_Woodpecker Pokémon,species_Wool Pokémon,species_Woolly Crab Pokémon,species_Worm Pokémon,species_Wrestling Pokémon,species_Young Fowl Pokémon,species_Zen Charm Pokémon
0,0.0,0.0,1.0,0.0,Bulbasaur,0.7,6.9,45.0,49.0,49.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,Ivysaur,1.0,13.0,60.0,62.0,63.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,Venusaur,2.0,100.0,80.0,82.0,83.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,Mega Venusaur,2.4,155.5,80.0,100.0,123.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,Charmander,0.6,8.5,39.0,52.0,43.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
y = df.iloc[:,:1]

y.loc[df['status_Legendary'] == 1] = 'legendary'
y.loc[df['status_Mythical'] == 1] = 'mythical'
y.loc[df['status_Normal'] == 1] = 'normal'
y.loc[df['status_Sub Legendary'] == 1] = 'sublegendary'

y

Unnamed: 0,status_Legendary
0,normal
1,normal
2,normal
3,normal
4,normal
...,...
1023,legendary
1024,legendary
1025,legendary
1026,legendary


In [7]:
X = df.iloc[:,4:]

scaler = StandardScaler().fit(X.iloc[:,1:])
X.iloc[:,1:] = scaler.transform(X.iloc[:,1:])

X

Unnamed: 0,name,height_m,weight_kg,hp,attack,defense,sp_attack,sp_defense,speed,catch_rate,...,species_Wish Pokémon,species_Wolf Pokémon,species_Wood Gecko Pokémon,species_Woodpecker Pokémon,species_Wool Pokémon,species_Woolly Crab Pokémon,species_Worm Pokémon,species_Wrestling Pokémon,species_Young Fowl Pokémon,species_Zen Charm Pokémon
0,Bulbasaur,-0.197750,-0.415479,-0.931931,-0.961772,-0.814230,-0.236744,-0.254089,-0.790062,-0.649394,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
1,Ivysaur,-0.108952,-0.376906,-0.363168,-0.559999,-0.366775,0.222508,0.280289,-0.286497,-0.649394,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
2,Venusaur,0.187039,0.173240,0.395183,0.058113,0.272446,0.834844,0.992792,0.384924,-0.649394,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
3,Mega Venusaur,0.305435,0.524194,0.395183,0.614415,1.550888,1.508414,1.705296,0.384924,-0.649394,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
4,Charmander,-0.227349,-0.405361,-1.159436,-0.869055,-1.005996,-0.389828,-0.788467,-0.118642,-0.649394,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1023,Zacian Hero of Many Battles,0.423831,0.236475,0.850194,1.541583,1.295200,0.222508,1.527170,2.332042,-1.110919,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
1024,Zamazenta Crowned Shield,0.453431,4.504843,0.850194,1.541583,2.254032,0.222508,2.595926,1.996332,-1.110919,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
1025,Zamazenta Hero of Many Battles,0.453431,0.868825,0.850194,1.541583,1.295200,0.222508,1.527170,2.332042,-1.110919,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625
1026,Eternatus,5.514876,5.548222,2.670236,0.150830,0.655979,2.212600,0.814666,2.063474,2.119759,...,-0.031204,-0.0541,-0.044151,-0.031204,-0.044151,-0.031204,-0.0541,-0.031204,-0.031204,-0.0625


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

X_info = X_test.iloc[:,:1]
X_test = X_test.iloc[:,1:]
X_train = X_train.iloc[:,1:]

In [9]:
def predict_scores(model):
    y_pred = model.predict(X_test)
    
    print(model.score(X_test, y_test))
    print(f1_score(y_test, y_pred, average='weighted'))
    print(confusion_matrix(y_test, y_pred))
    
    return y_pred
    

In [10]:
LR = LogisticRegression(random_state=42, solver='sag', max_iter=1500).fit(X_train, y_train.values.ravel())
y_pred_LR = predict_scores(LR)

0.9794117647058823
0.9782351468051186
[[ 12   0   0   0]
 [  2   5   1   1]
 [  0   0 301   0]
 [  2   0   1  15]]


In [11]:
SVM = svm.SVC(C=10, kernel='sigmoid', random_state=42).fit(X_train, y_train.values.ravel())
y_pred_SVM = predict_scores(SVM)

0.9852941176470589
0.9838012447864414
[[ 12   0   0   0]
 [  1   5   1   2]
 [  0   0 301   0]
 [  0   0   1  17]]


In [12]:
RF = RandomForestClassifier(n_estimators=500, random_state=42).fit(X_train, y_train.values.ravel())
y_pred_RF = predict_scores(RF)

0.9941176470588236
0.9938112623158715
[[ 12   0   0   0]
 [  1   7   1   0]
 [  0   0 301   0]
 [  0   0   0  18]]


In [13]:
NN = MLPClassifier(solver='lbfgs', random_state=42).fit(X_train, y_train.values.ravel())
y_pred_NN = predict_scores(NN)

0.9705882352941176
0.97016302570194
[[  9   1   0   2]
 [  1   6   2   0]
 [  0   0 300   1]
 [  2   1   0  15]]


In [14]:
output = pd.DataFrame()
output['Expected'] = y_test.values.ravel()
output['Predicted LR'] = y_pred_LR
output['Predicted SVM'] = y_pred_SVM
output['Predicted RF'] = y_pred_RF
output['Predicted NN'] = y_pred_NN
output['Name'] = X_info.iloc[:,0].reset_index(drop=True)

output.head()

Unnamed: 0,Expected,Predicted LR,Predicted SVM,Predicted RF,Predicted NN,Name
0,normal,normal,normal,normal,normal,Glalie
1,normal,normal,normal,normal,normal,Skorupi
2,normal,normal,normal,normal,normal,Trapinch
3,normal,normal,normal,normal,normal,Magneton
4,normal,normal,normal,normal,normal,Chimecho


In [15]:
output[(output['Expected'] != output['Predicted LR'])]

Unnamed: 0,Expected,Predicted LR,Predicted SVM,Predicted RF,Predicted NN,Name
22,mythical,legendary,sublegendary,mythical,legendary,Genesect
44,mythical,normal,normal,normal,normal,Manaphy
68,sublegendary,normal,sublegendary,sublegendary,legendary,Entei
103,sublegendary,legendary,normal,sublegendary,mythical,Type: Null
122,mythical,sublegendary,sublegendary,mythical,normal,Celebi
239,mythical,legendary,legendary,legendary,mythical,Arceus
289,sublegendary,legendary,sublegendary,sublegendary,legendary,Silvally


In [16]:
output[(output['Expected'] != output['Predicted SVM'])]

Unnamed: 0,Expected,Predicted LR,Predicted SVM,Predicted RF,Predicted NN,Name
22,mythical,legendary,sublegendary,mythical,legendary,Genesect
44,mythical,normal,normal,normal,normal,Manaphy
103,sublegendary,legendary,normal,sublegendary,mythical,Type: Null
122,mythical,sublegendary,sublegendary,mythical,normal,Celebi
239,mythical,legendary,legendary,legendary,mythical,Arceus


In [17]:
output[(output['Expected'] != output['Predicted RF'])]

Unnamed: 0,Expected,Predicted LR,Predicted SVM,Predicted RF,Predicted NN,Name
44,mythical,normal,normal,normal,normal,Manaphy
239,mythical,legendary,legendary,legendary,mythical,Arceus


In [18]:
output[(output['Expected'] != output['Predicted NN'])]

Unnamed: 0,Expected,Predicted LR,Predicted SVM,Predicted RF,Predicted NN,Name
22,mythical,legendary,sublegendary,mythical,legendary,Genesect
36,legendary,legendary,legendary,legendary,sublegendary,Yveltal
44,mythical,normal,normal,normal,normal,Manaphy
68,sublegendary,normal,sublegendary,sublegendary,legendary,Entei
103,sublegendary,legendary,normal,sublegendary,mythical,Type: Null
122,mythical,sublegendary,sublegendary,mythical,normal,Celebi
143,legendary,legendary,legendary,legendary,mythical,Dialga
196,legendary,legendary,legendary,legendary,sublegendary,Kyogre
289,sublegendary,legendary,sublegendary,sublegendary,legendary,Silvally
310,normal,normal,normal,normal,sublegendary,Avalugg
