# Preparing the training dataset

In [1]:
import pandas as pd
import sklearn as skl

from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics

from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import GridSearchCV

import os
parent_dir = os.path.dirname(os.getcwd())

In [2]:
# Importing Pokemon stats
pokemon = pd.read_csv(parent_dir + '/data/preprocessed.csv')
pokemon

Unnamed: 0.1,Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Bug,Dark,...,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water,Unnamed: 26
0,0,1,45,49,49,65,65,45,0,0,...,1,0,0,0,1,0,0,0,0,0
1,1,2,60,62,63,80,80,60,0,0,...,1,0,0,0,1,0,0,0,0,0
2,2,3,80,82,83,100,100,80,0,0,...,1,0,0,0,1,0,0,0,0,0
3,3,4,80,100,123,122,120,80,0,0,...,1,0,0,0,1,0,0,0,0,0
4,4,5,39,52,43,60,50,65,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,796,50,100,150,100,150,50,0,0,...,0,0,0,0,0,0,1,0,0,0
796,796,797,50,160,110,160,110,110,0,0,...,0,0,0,0,0,0,1,0,0,0
797,797,798,80,110,60,150,130,70,0,0,...,0,0,0,0,0,1,0,0,0,0
798,798,799,80,160,60,170,130,80,0,1,...,0,0,0,0,0,1,0,0,0,0


In [3]:
# Removing the extra column at the end of the csv 
pokemon.drop(pokemon.columns[-1], axis=1)

Unnamed: 0.1,Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Bug,Dark,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,0,1,45,49,49,65,65,45,0,0,...,0,1,0,0,0,1,0,0,0,0
1,1,2,60,62,63,80,80,60,0,0,...,0,1,0,0,0,1,0,0,0,0
2,2,3,80,82,83,100,100,80,0,0,...,0,1,0,0,0,1,0,0,0,0
3,3,4,80,100,123,122,120,80,0,0,...,0,1,0,0,0,1,0,0,0,0
4,4,5,39,52,43,60,50,65,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,796,50,100,150,100,150,50,0,0,...,0,0,0,0,0,0,0,1,0,0
796,796,797,50,160,110,160,110,110,0,0,...,0,0,0,0,0,0,0,1,0,0
797,797,798,80,110,60,150,130,70,0,0,...,1,0,0,0,0,0,1,0,0,0
798,798,799,80,160,60,170,130,80,0,1,...,0,0,0,0,0,0,1,0,0,0


In [4]:
# Defining functions for getting features from the pokemon dataset according to the index of the Pokemon and a function for renaming the columns

def get_features(id,pokemon):
    return (pokemon.iloc[id - 1, 0:-1:1])

# first_or_second is a bool argument, passing True will return first pokemon and False will return second pokemon
def rename_column(first_or_second, features):
     list_ = features.columns.to_list()
     if first_or_second:
          for i in range(len(list_)):
               list_[i] = 'First_' + list_[i] 

     else:
          for i in range(len(list_)):
               list_[i] = 'Second_' + list_[i] 

     features.columns = list_

In [5]:
# Importing training data

combats = pd.read_csv(parent_dir + '/data/combats.csv')
combats

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,266,298,298
1,702,701,701
2,191,668,668
3,237,683,683
4,151,231,151
...,...,...,...
49995,707,126,707
49996,589,664,589
49997,303,368,368
49998,109,89,109


In [6]:
# Creating a dataframe which will have all the features of the 'First_Pokemon' from 'combats.csv' with column name renamed

first_pokemon = pokemon.copy()
rename_column(True, first_pokemon)

list_first = combats.iloc[:, 0].to_list()
temp_ = list()

for i in range(len(list_first)):
    temp_.append(get_features(list_first[i], first_pokemon).to_frame().T)

df_first_pokemon = pd.concat(temp_, ignore_index=True)
df_first_pokemon

Unnamed: 0,First_Unnamed: 0,First_#,First_HP,First_Attack,First_Defense,First_Sp. Atk,First_Sp. Def,First_Speed,First_Bug,First_Dark,...,First_Ghost,First_Grass,First_Ground,First_Ice,First_Normal,First_Poison,First_Psychic,First_Rock,First_Steel,First_Water
0,265,266,50,64,50,45,50,41,0,0,...,0,0,1,0,0,0,0,1,0,0
1,701,702,91,90,72,90,129,108,0,0,...,0,1,0,0,0,0,0,0,0,0
2,190,191,55,40,85,80,105,40,0,0,...,0,0,0,0,0,0,0,0,0,0
3,236,237,40,40,40,70,40,20,0,0,...,0,0,0,0,0,0,0,0,0,0
4,150,151,70,60,125,115,70,55,0,0,...,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,706,707,100,120,100,150,120,90,0,0,...,0,0,0,0,0,0,0,0,0,0
49996,588,589,60,85,40,30,45,68,0,0,...,0,0,1,0,0,0,0,0,0,0
49997,302,303,60,50,100,85,70,65,0,0,...,0,0,0,0,0,0,0,0,0,1
49998,108,109,40,30,50,55,55,100,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
# Similar as above step, dataframe for 'Second_Pokemon'

temp_.clear()
second_pokemon = pokemon.copy()
rename_column(False, second_pokemon)

list_second = combats.iloc[:, 1].to_list()


for i in range(len(list_second)):
    temp_.append(get_features(list_second[i], second_pokemon).to_frame().T)

df_second_pokemon = pd.concat(temp_, ignore_index=True)
df_second_pokemon

Unnamed: 0,Second_Unnamed: 0,Second_#,Second_HP,Second_Attack,Second_Defense,Second_Sp. Atk,Second_Sp. Def,Second_Speed,Second_Bug,Second_Dark,...,Second_Ghost,Second_Grass,Second_Ground,Second_Ice,Second_Normal,Second_Poison,Second_Psychic,Second_Rock,Second_Steel,Second_Water
0,297,298,70,70,40,60,40,60,0,1,...,0,1,0,0,0,0,0,0,0,0
1,700,701,91,129,90,72,90,108,0,0,...,0,0,0,0,0,0,0,1,0,0
2,667,668,75,75,75,125,95,40,0,0,...,0,0,0,0,0,0,1,0,0,0
3,682,683,77,120,90,60,90,48,0,0,...,0,0,0,0,0,0,0,0,0,0
4,230,231,20,10,230,10,230,5,1,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,125,126,30,40,70,70,25,60,0,0,...,0,0,0,0,0,0,0,0,0,1
49996,663,664,35,55,40,45,40,60,0,0,...,0,0,0,0,0,0,0,0,0,0
49997,367,368,73,115,60,60,60,90,0,0,...,0,0,0,0,1,0,0,0,0,0
49998,88,89,25,35,70,95,55,45,0,0,...,0,0,0,0,0,0,0,0,1,0


In [8]:
# Removing Pokemon ID feature which is unnecessary for training

df_first_pokemon = df_first_pokemon.drop(df_first_pokemon.columns[0], axis=1)
df_second_pokemon = df_second_pokemon.drop(df_second_pokemon.columns[0], axis=1)

In [9]:
# Joining the first and second pokemon dataframe to create the final training dataframe

training_df = pd.concat([df_first_pokemon, df_second_pokemon], axis=1)
training_df

Unnamed: 0,First_#,First_HP,First_Attack,First_Defense,First_Sp. Atk,First_Sp. Def,First_Speed,First_Bug,First_Dark,First_Dragon,...,Second_Ghost,Second_Grass,Second_Ground,Second_Ice,Second_Normal,Second_Poison,Second_Psychic,Second_Rock,Second_Steel,Second_Water
0,266,50,64,50,45,50,41,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,702,91,90,72,90,129,108,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,191,55,40,85,80,105,40,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,237,40,40,40,70,40,20,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,151,70,60,125,115,70,55,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,707,100,120,100,150,120,90,0,0,1,...,0,0,0,0,0,0,0,0,0,1
49996,589,60,85,40,30,45,68,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49997,303,60,50,100,85,70,65,0,0,0,...,0,0,0,0,1,0,0,0,0,0
49998,109,40,30,50,55,55,100,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [10]:
# Creating class labels: 1 means first Pokemon wins, 0 means second Pokemon wins

class_labels = []

x = combats.iloc[:,2].to_list()

for i in range(len(x)):
    if (x[i] == list_first[i]):
        class_labels.append(1)
    else:
        class_labels.append(0)
        

# Training Models

In [11]:

seed = 20031 # Seed is selected as per my Roll No. in the course
train_selection_percent = 0.8 # 80% of the data will be used for training, 20% for testing
predicted = {}

"""
Two evaluation criteria are chosen -

(1) F1 - score: The more the f1-score, the better the classifier, macro average is taken to give equal weight to each class
(2) Accuracy: The more the accuracy, the better the classifier
"""

# Scores  will be stored in this dictionary
scores = {}

# Stores confusion matrix for display
confusion_matrix = {}

# Preparing Training  and Testing data
x_train, x_test, y_train, y_test = train_test_split(training_df, class_labels, train_size=train_selection_percent, random_state=seed)

In [12]:
"""
___________________Model_1 - Decision Tree based on the CART algorithm__________________________
"""

decision_tree = tree.DecisionTreeClassifier()
decision_tree.fit(x_train, y_train)

name = 'Decision Tree' 

predicted[name] = decision_tree.predict(x_test)

scores[name] = metrics.classification_report(y_test, predicted[name])
confusion_matrix[name] = metrics.confusion_matrix(y_test, predicted[name])

In [13]:
"""
___________________Model_2 - Logistic Regression algorithm__________________________
"""

logreg = LogisticRegression()
logreg.fit(x_train, y_train)

name = 'Logistic Regression'

predicted[name] = logreg.predict(x_test)

scores[name] = metrics.classification_report(y_test, predicted[name])
confusion_matrix[name] = metrics.confusion_matrix(y_test, predicted[name])

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [14]:
"""
___________________Model_3 - Random Forest algorithm__________________________
"""

random_forest = RandomForestClassifier() #No. of trees will be 200
random_forest.fit(x_train, y_train)

name = 'Random Forest'

predicted[name] =random_forest.predict(x_test)

scores[name] = metrics.classification_report(y_test, predicted[name])
confusion_matrix[name] = metrics.confusion_matrix(y_test, predicted[name])

In [15]:
"""
___________________Model_4 - k-Nearest Neighbours algorithm__________________________
"""

knn = KNeighborsClassifier() # Number of neighbours chosen was 4
knn.fit(x_train, y_train)

name = 'knn'

predicted[name] = knn.predict(x_test)

scores[name] = metrics.classification_report(y_test, predicted[name])
confusion_matrix[name] = metrics.confusion_matrix(y_test, predicted[name])

# Evaluation

In [16]:
# Display confusion matrix and precision, recall, f1 score and accuracy for all 4 models used above

for item in confusion_matrix:
    print('\n\nConfusion matrix for ' + item)
    display(pd.DataFrame(confusion_matrix[item], index=['Loser', 'Winner'], columns=['Loser', 'Winner']))



Confusion matrix for Decision Tree


Unnamed: 0,Loser,Winner
Loser,4976,296
Winner,281,4447




Confusion matrix for Logistic Regression


Unnamed: 0,Loser,Winner
Loser,4711,561
Winner,550,4178




Confusion matrix for Random Forest


Unnamed: 0,Loser,Winner
Loser,4992,280
Winner,252,4476




Confusion matrix for knn


Unnamed: 0,Loser,Winner
Loser,4437,835
Winner,874,3854


In [17]:
for item in scores:
    print(item)
    print(scores[item] + '\n')

Decision Tree
              precision    recall  f1-score   support

           0       0.95      0.94      0.95      5272
           1       0.94      0.94      0.94      4728

    accuracy                           0.94     10000
   macro avg       0.94      0.94      0.94     10000
weighted avg       0.94      0.94      0.94     10000


Logistic Regression
              precision    recall  f1-score   support

           0       0.90      0.89      0.89      5272
           1       0.88      0.88      0.88      4728

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000


Random Forest
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      5272
           1       0.94      0.95      0.94      4728

    accuracy                           0.95     10000
   macro avg       0.95      0.95      0.95     10000
weighted avg       0.95 

In [18]:
# Finding the most effective features used for classification, top 10 of them are shown

effective = pd.DataFrame()
effective['feature_name'] = training_df.columns.tolist()
effective['feature_importance'] = random_forest.feature_importances_
effective = effective.sort_values(by='feature_importance', ascending=False)

print('Top 10 effective features for classification')
display(effective.iloc[0:10:1,:])

Top 10 effective features for classification


Unnamed: 0,feature_name,feature_importance
6,First_Speed,0.242088
31,Second_Speed,0.231903
2,First_Attack,0.047473
27,Second_Attack,0.047372
4,First_Sp. Atk,0.039125
29,Second_Sp. Atk,0.03899
1,First_HP,0.032867
5,First_Sp. Def,0.031632
30,Second_Sp. Def,0.030038
26,Second_HP,0.029135


# Improvements

In [None]:
# Decision tree and Random Forest have most accuracy so improving on those two models only

In [19]:
# Prameters grid for Random Forest Classifier Grid search
rf_grid = { 
    'n_estimators': [100, 200, 300, 400, 500, 600], # No. of trees
    'criterion': ['gini', 'entropy'],
    'random_state': [seed]
}

# Prameters grid for Decision Tree Classifier Grid search
dt_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [100, 200, 300, 400, 500],
    'random_state': [seed]
}


In [20]:
rf_cv = GridSearchCV(estimator=RandomForestClassifier(), param_grid=rf_grid, cv=5, scoring='accuracy') #5-fold k-validation based on accuracy score
rf_cv.fit(x_train, y_train)

rf_cv_prediction = rf_cv.predict(x_test)

In [21]:
print('Random Forest')

display(pd.DataFrame(metrics.confusion_matrix(y_test, rf_cv_prediction), index=['Loser', 'Winner'], columns=['Loser', 'Winner']))
print(metrics.classification_report(y_test, rf_cv_prediction))

Random Forest


Unnamed: 0,Loser,Winner
Loser,4996,276
Winner,245,4483


              precision    recall  f1-score   support

           0       0.95      0.95      0.95      5272
           1       0.94      0.95      0.95      4728

    accuracy                           0.95     10000
   macro avg       0.95      0.95      0.95     10000
weighted avg       0.95      0.95      0.95     10000



In [30]:
rf_cv_accuracy = round(metrics.accuracy_score(y_test,rf_cv_prediction) * 100, 2) 
rf_cv_fscore = round(metrics.f1_score(y_test, rf_cv_prediction, average='macro') * 100, 2)

In [23]:
dt_cv = GridSearchCV(estimator=tree.DecisionTreeClassifier(), param_grid=dt_grid, cv=5, scoring='accuracy')
dt_cv.fit(x_train, y_train)

dt_cv_prediction = dt_cv.predict(x_test)

In [31]:
print('Decision Tree')

display(pd.DataFrame(metrics.confusion_matrix(y_test, dt_cv_prediction), index=['Loser', 'Winner'], columns=['Loser', 'Winner']))
print(metrics.classification_report(y_test, dt_cv_prediction))

Decision Tree


Unnamed: 0,Loser,Winner
Loser,4978,294
Winner,279,4449


              precision    recall  f1-score   support

           0       0.95      0.94      0.95      5272
           1       0.94      0.94      0.94      4728

    accuracy                           0.94     10000
   macro avg       0.94      0.94      0.94     10000
weighted avg       0.94      0.94      0.94     10000



In [32]:
dt_cv_accuracy = round(metrics.accuracy_score(y_test, dt_cv_prediction) * 100, 2) 
dt_cv_fscore = round(metrics.f1_score(y_test, dt_cv_prediction, average='macro') * 100, 2)

In [33]:


print('Accuracy:')

print('Random Forest Classifier: ' + str(rf_cv_accuracy))
print('Decision Tree Classifier: ' + str(dt_cv_accuracy))




print('\n\nF1-Score:')

print('Test:')
print('Random Forest Classifier: ' + str(rf_cv_fscore))
print('Decision Tree Classifier: ' + str(dt_cv_fscore))

Accuracy:
Random Forest Classifier: 94.79
Decision Tree Classifier: 94.27


F1-Score:
Test:
Random Forest Classifier: 94.78
Decision Tree Classifier: 94.25


# Result

In [34]:
# Random forest classifier is chosen

# Importing the test data
test = pd.read_csv(parent_dir + '/data/test.csv')
test

Unnamed: 0,First_pokemon,Second_pokemon
0,129,117
1,660,211
2,706,115
3,195,618
4,27,656
...,...,...
9995,216,498
9996,113,404
9997,493,104
9998,643,259


In [35]:
# Creating dataframe from test data to put into the classifier

temp_.clear()
list_first = test.iloc[:, 0].to_list()

for i in range(len(list_first)):
    temp_.append(get_features(list_first[i], first_pokemon).to_frame().T)

df_first_pokemon_test = pd.concat(temp_, ignore_index=True)
df_first_pokemon_test

Unnamed: 0,First_Unnamed: 0,First_#,First_HP,First_Attack,First_Defense,First_Sp. Atk,First_Sp. Def,First_Speed,First_Bug,First_Dark,...,First_Ghost,First_Grass,First_Ground,First_Ice,First_Normal,First_Poison,First_Psychic,First_Rock,First_Steel,First_Water
0,128,129,80,92,65,65,80,68,0,0,...,0,0,0,0,0,0,0,0,0,1
1,659,660,74,94,131,54,116,20,0,0,...,0,1,0,0,0,0,0,0,1,0
2,705,706,79,105,70,145,80,101,0,0,...,0,0,0,0,0,0,0,0,0,0
3,194,195,70,55,55,80,60,45,0,0,...,0,0,0,0,0,0,0,0,0,0
4,26,27,40,60,30,31,31,70,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,215,216,60,60,60,85,85,85,0,0,...,1,0,0,0,0,0,0,0,0,0
9996,112,113,50,50,95,40,50,35,0,0,...,0,0,1,0,0,0,0,0,0,0
9997,492,493,68,90,65,50,55,82,0,0,...,0,0,1,0,0,0,0,0,0,0
9998,642,643,75,87,63,87,63,98,0,0,...,0,0,0,0,0,0,0,0,0,1


In [36]:
temp_.clear()
list_second = test.iloc[:, 1].to_list()

for i in range(len(list_second)):
    temp_.append(get_features(list_second[i], second_pokemon).to_frame().T)

df_second_pokemon_test = pd.concat(temp_, ignore_index=True)
df_second_pokemon_test

Unnamed: 0,Second_Unnamed: 0,Second_#,Second_HP,Second_Attack,Second_Defense,Second_Sp. Atk,Second_Sp. Def,Second_Speed,Second_Bug,Second_Dark,...,Second_Ghost,Second_Grass,Second_Ground,Second_Ice,Second_Normal,Second_Poison,Second_Psychic,Second_Rock,Second_Steel,Second_Water
0,116,117,90,55,75,60,75,30,0,0,...,0,0,0,0,1,0,0,0,0,0
1,210,211,95,85,85,65,65,35,0,0,...,0,0,1,0,0,0,0,0,0,1
2,114,115,50,120,53,35,110,87,0,0,...,0,0,0,0,0,0,0,0,0,0
3,617,618,75,86,67,106,67,60,0,0,...,0,1,0,0,0,0,0,0,0,0
4,655,656,165,75,80,40,45,65,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,497,498,70,110,70,115,70,90,0,0,...,0,0,0,0,0,0,0,0,1,0
9996,403,404,55,84,105,114,75,52,0,0,...,0,0,0,0,0,0,0,0,0,1
9997,103,104,35,45,160,30,45,70,0,0,...,0,0,1,0,0,0,0,1,0,0
9998,258,259,45,63,37,65,55,95,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
df_first_pokemon_test = df_first_pokemon_test.drop(df_first_pokemon_test.columns[0], axis=1)
df_second_pokemon_test = df_second_pokemon_test.drop(df_second_pokemon_test.columns[0], axis=1)

In [38]:
# Creating final test dataframe to put into the classifier
test_df = pd.concat([df_first_pokemon_test, df_second_pokemon_test], axis=1)
test_df

Unnamed: 0,First_#,First_HP,First_Attack,First_Defense,First_Sp. Atk,First_Sp. Def,First_Speed,First_Bug,First_Dark,First_Dragon,...,Second_Ghost,Second_Grass,Second_Ground,Second_Ice,Second_Normal,Second_Poison,Second_Psychic,Second_Rock,Second_Steel,Second_Water
0,129,80,92,65,65,80,68,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,660,74,94,131,54,116,20,0,0,0,...,0,0,1,0,0,0,0,0,0,1
2,706,79,105,70,145,80,101,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,195,70,55,55,80,60,45,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,27,40,60,30,31,31,70,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,216,60,60,60,85,85,85,0,0,0,...,0,0,0,0,0,0,0,0,1,0
9996,113,50,50,95,40,50,35,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9997,493,68,90,65,50,55,82,0,0,1,...,0,0,1,0,0,0,0,1,0,0
9998,643,75,87,63,87,63,98,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
# Since Random Forest Classifier had the best f-1 score and accuracy, it is chosen to run on the test data
result_rf = rf_cv.predict(test_df)

result_id_rf = []

for i in range(len(result_rf)):
    if (result_rf[i] == 1):
        result_id_rf.append(list_first[i])
    else:
        result_id_rf.append(list_second[i])


result_id_rf

[129,
 211,
 706,
 618,
 27,
 126,
 436,
 36,
 169,
 302,
 380,
 11,
 747,
 302,
 436,
 381,
 205,
 86,
 681,
 570,
 496,
 36,
 518,
 376,
 542,
 482,
 164,
 773,
 260,
 524,
 275,
 490,
 9,
 100,
 347,
 542,
 40,
 635,
 41,
 482,
 339,
 341,
 702,
 682,
 575,
 682,
 773,
 601,
 260,
 165,
 714,
 130,
 193,
 135,
 42,
 703,
 591,
 137,
 614,
 74,
 82,
 301,
 474,
 132,
 715,
 300,
 340,
 370,
 788,
 264,
 551,
 475,
 704,
 722,
 25,
 53,
 461,
 128,
 711,
 341,
 635,
 313,
 138,
 273,
 363,
 345,
 109,
 689,
 141,
 257,
 246,
 269,
 163,
 106,
 335,
 317,
 103,
 223,
 248,
 535,
 99,
 219,
 177,
 586,
 766,
 64,
 732,
 337,
 306,
 34,
 472,
 724,
 551,
 798,
 124,
 9,
 376,
 110,
 505,
 255,
 303,
 201,
 157,
 325,
 638,
 84,
 735,
 682,
 581,
 744,
 527,
 339,
 567,
 742,
 703,
 369,
 316,
 702,
 327,
 553,
 339,
 387,
 148,
 142,
 431,
 43,
 40,
 453,
 478,
 32,
 495,
 355,
 155,
 411,
 662,
 534,
 631,
 202,
 770,
 426,
 707,
 139,
 40,
 72,
 611,
 298,
 344,
 272,
 269,
 511,
 83,


In [40]:
# Creating the predicted data in the same format as in 'combats.csv' and exporting to a file 'results.csv'

output = pd.DataFrame({'First_pokemon': list_first , 'Second_pokemon': list_second, 'Winner': result_id_rf})
output.to_csv(parent_dir + '/data/result.csv', index=False)
output

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,129,117,129
1,660,211,211
2,706,115,706
3,195,618,618
4,27,656,27
...,...,...,...
9995,216,498,498
9996,113,404,404
9997,493,104,493
9998,643,259,643
