In [48]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score

In [39]:
df = pd.read_csv('Pokemon.csv')
combats = pd.read_csv('combats.csv')
test = pd.read_csv('tests.csv')

In [27]:
pokemon = df.filter(["Name","HP","Attack","Defense","Sp.Atk","Sp.Def","Speed"], axis=1)

In [69]:
cols = ["First_pokemon","Second_pokemon","Winner"]
fights = combats[cols].replace(pokemon.Name)
fights.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,Beautifly,Nosepass,Nosepass
1,Carbink,Dedenne,Dedenne
2,Sunflora,Flabébé,Flabébé
3,Smoochum,Swirlix,Swirlix
4,Chikorita,Donphan,Chikorita


In [74]:
fights.to_csv('fights.csv')

In [23]:
combats.Winner[combats.Winner == combats.First_pokemon] = 0
combats.Winner[combats.Winner == combats.Second_pokemon] = 1

In [29]:
def normalization(data_df):
    stats=["HP","Attack","Defense","Sp.Atk","Sp.Def","Speed"]
    stats_df=pokemon[stats].T.to_dict("list")
    one=data_df.First_pokemon.map(stats_df)
    two=data_df.Second_pokemon.map(stats_df)
    temp_list=[]
    for i in range(len(one)):
        temp_list.append(np.array(one[i])-np.array(two[i]))
    new_test = pd.DataFrame(temp_list, columns=stats)
    for c in stats:
        description=new_test[c].describe()
        new_test[c]=(new_test[c]-description['min'])/(description['max']-description['min'])
    return new_test

In [30]:
data = normalization(combats)
data = pd.concat([data, combats.Winner], axis=1)

In [32]:
X=data.drop("Winner",axis=1)
y=data["Winner"]

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=40)

In [60]:
clf_rfc = RandomForestClassifier(n_estimators=50)
clf_rfc.fit(X_train, y_train)
accuracies = cross_val_score(estimator = clf_rfc, X = X_train, y = y_train, cv = 5,verbose = 1)
y_pred = clf_rfc.predict(X_test)

print('')
print('####### RandomForestClassifier #######')
print('Score : %.4f' % clf_rfc.score(X_test, y_test))
print(accuracies)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred)**0.5
r2 = r2_score(y_test, y_pred)

print('')
print('MSE    : %0.2f ' % mse)
print('MAE    : %0.2f ' % mae)
print('RMSE   : %0.2f ' % rmse)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   12.6s finished



####### RandomForestClassifier #######
Score : 0.5639
[0.54726037 0.56853333 0.55426667 0.5536     0.56514202]

MSE    : 0.44 
MAE    : 0.44 
RMSE   : 0.66 
R2     : -0.75 


In [71]:
new_test_data=test[["First_pokemon","Second_pokemon"]].replace(pokemon.Name)
new_test_data.head()

Unnamed: 0,First_pokemon,Second_pokemon
0,Gyarados,Goldeen
1,Fletchling,Scizor
2,Klefki,Horsea
3,Espeon,Mienfoo
4,Sandslash,Frogadier


In [72]:
final_data=normalization(test)
pred=clf_rfc.predict(final_data)
test["Winner"]=[test["First_pokemon"][i] if pred[i]==0 else test["Second_pokemon"][i] for i in range(len(pred))]

In [73]:
combats_name = test[cols].replace(pokemon.Name)
combats_name.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,Gyarados,Goldeen,Goldeen
1,Fletchling,Scizor,Fletchling
2,Klefki,Horsea,Horsea
3,Espeon,Mienfoo,Espeon
4,Sandslash,Frogadier,Sandslash
