### Install requirements

In [None]:
# ! pip3 install pandas
# ! pip3 install sklearn

### Import libraries

In [3]:
# Imports
import numpy as np
import pandas as pd

### Read in data and glimpse

In [5]:
# read in data - part-1: heros information file
heros_info = pd.read_csv('./data/heroes_information.csv')
heros_info.head()

Unnamed: 0.1,Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Skin color,Alignment,Weight
0,0,A-Bomb,Male,yellow,Human,No Hair,203.0,Marvel Comics,-,good,441.0
1,1,Abe Sapien,Male,blue,Icthyo Sapien,No Hair,191.0,Dark Horse Comics,blue,good,65.0
2,2,Abin Sur,Male,blue,Ungaran,No Hair,185.0,DC Comics,red,good,90.0
3,3,Abomination,Male,green,Human / Radiation,No Hair,203.0,Marvel Comics,-,bad,441.0
4,4,Abraxas,Male,blue,Cosmic Entity,Black,-99.0,Marvel Comics,-,bad,-99.0


In [6]:
# read in data - part-2: heros superpowers file
heros_power = pd.read_csv('./data/super_hero_powers.csv')
heros_power.head()

Unnamed: 0,hero_names,Agility,Accelerated Healing,Lantern Power Ring,Dimensional Awareness,Cold Resistance,Durability,Stealth,Energy Absorption,Flight,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,3-D Man,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,False,True,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,True,True,False,False,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### Shape data and extract features

In [8]:
# since only binary classification is required, create a feature that defines weather the hero is human or not
heros_info['Human'] = heros_info['Race'].apply(lambda i: 1 if i == 'Human' else 0)
heros_info.drop(['Unnamed: 0'], inplace=True, axis=1)
heros_info.head()

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Skin color,Alignment,Weight,Human
0,A-Bomb,Male,yellow,Human,No Hair,203.0,Marvel Comics,-,good,441.0,1
1,Abe Sapien,Male,blue,Icthyo Sapien,No Hair,191.0,Dark Horse Comics,blue,good,65.0,0
2,Abin Sur,Male,blue,Ungaran,No Hair,185.0,DC Comics,red,good,90.0,0
3,Abomination,Male,green,Human / Radiation,No Hair,203.0,Marvel Comics,-,bad,441.0,0
4,Abraxas,Male,blue,Cosmic Entity,Black,-99.0,Marvel Comics,-,bad,-99.0,0


In [9]:
# create model matrix (aka dummies in pyworld) for superpowers data
power_bool_cols = heros_power.columns.drop("hero_names")
heros_power_dummies = pd.get_dummies(heros_power, columns=power_bool_cols)
heros_power_dummies.head()

Unnamed: 0,hero_names,Agility_False,Agility_True,Accelerated Healing_False,Accelerated Healing_True,Lantern Power Ring_False,Lantern Power Ring_True,Dimensional Awareness_False,Dimensional Awareness_True,Cold Resistance_False,...,Phoenix Force_False,Phoenix Force_True,Molecular Dissipation_False,Molecular Dissipation_True,Vision - Cryo_False,Vision - Cryo_True,Omnipresent_False,Omnipresent_True,Omniscient_False,Omniscient_True
0,3-D Man,0,1,1,0,1,0,1,0,1,...,1,0,1,0,1,0,1,0,1,0
1,A-Bomb,1,0,0,1,1,0,1,0,1,...,1,0,1,0,1,0,1,0,1,0
2,Abe Sapien,0,1,0,1,1,0,1,0,0,...,1,0,1,0,1,0,1,0,1,0
3,Abin Sur,1,0,1,0,0,1,1,0,1,...,1,0,1,0,1,0,1,0,1,0
4,Abomination,1,0,0,1,1,0,1,0,1,...,1,0,1,0,1,0,1,0,1,0


In [11]:
# model-matrix for info dataframe
info_bool_cols = ['Gender', 'Eye color', 'Hair color', 'Skin color', 'Alignment']
heros_info_dummies = pd.get_dummies(heros_info, columns=info_bool_cols)
heros_info_dummies.head()

Unnamed: 0,name,Race,Height,Publisher,Weight,Human,Gender_-,Gender_Female,Gender_Male,Eye color_-,...,Skin color_purple,Skin color_red,Skin color_red / black,Skin color_silver,Skin color_white,Skin color_yellow,Alignment_-,Alignment_bad,Alignment_good,Alignment_neutral
0,A-Bomb,Human,203.0,Marvel Comics,441.0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
1,Abe Sapien,Icthyo Sapien,191.0,Dark Horse Comics,65.0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
2,Abin Sur,Ungaran,185.0,DC Comics,90.0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,1,0
3,Abomination,Human / Radiation,203.0,Marvel Comics,441.0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
4,Abraxas,Cosmic Entity,-99.0,Marvel Comics,-99.0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0


In [12]:
# merge two dataframes
heros = pd.merge(heros_info_dummies, heros_power_dummies, left_on=['name'], right_on=['hero_names'], how='inner')
heros.head()

Unnamed: 0,name,Race,Height,Publisher,Weight,Human,Gender_-,Gender_Female,Gender_Male,Eye color_-,...,Phoenix Force_False,Phoenix Force_True,Molecular Dissipation_False,Molecular Dissipation_True,Vision - Cryo_False,Vision - Cryo_True,Omnipresent_False,Omnipresent_True,Omniscient_False,Omniscient_True
0,A-Bomb,Human,203.0,Marvel Comics,441.0,1,0,0,1,0,...,1,0,1,0,1,0,1,0,1,0
1,Abe Sapien,Icthyo Sapien,191.0,Dark Horse Comics,65.0,0,0,0,1,0,...,1,0,1,0,1,0,1,0,1,0
2,Abin Sur,Ungaran,185.0,DC Comics,90.0,0,0,0,1,0,...,1,0,1,0,1,0,1,0,1,0
3,Abomination,Human / Radiation,203.0,Marvel Comics,441.0,0,0,0,1,0,...,1,0,1,0,1,0,1,0,1,0
4,Abraxas,Cosmic Entity,-99.0,Marvel Comics,-99.0,0,0,0,1,0,...,1,0,1,0,1,0,1,0,1,0


In [42]:
# create input (X) and output (y) for training a model
# drop all non-numeric collumns from input and all rows with one or more NaN/NAs
heros = heros.dropna()
X_columns_drop = ['name', 'Publisher', 'hero_names', 'Race', 'Human']
X, y = heros.drop(X_columns_drop, axis=1), heros['Human']

# split dataframes into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=35)
print(X_train.shape, X_test.shape)

(485, 413) (162, 413)


In [72]:
# train with MLP
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, max_iter=2e5,
                    hidden_layer_sizes=(8000, 50), random_state=35)
clf.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(8000, 50), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200000.0, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=35, shuffle=True, solver='lbfgs', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [73]:
y_pred = clf.predict(X_test)
y_pred

array([0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])

In [74]:
# evaluate the model
from sklearn.metrics import confusion_matrix, accuracy_score
confusion_matrix(y_test, y_pred)

array([[78, 31],
       [42, 11]])

In [75]:
accuracy_score(y_test, y_pred)

0.5493827160493827