In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("data/pokemon.csv") #Information about pokemon
data = data.rename(columns = {"Type 1": "Type_1", "Type 2": "Type_2", "Sp. Atk": "Sp_Atk", "Sp. Def": "Sp_Def"})
data = data.drop(["Name","#",], axis = 1)

#clean up NaN values in Type 2 series, replace with None value
#Legendary vals changed to int from bool
data.loc[:,"Type_2"] = data.loc[:,"Type_2"].fillna("None")
data.loc[:, "Legendary"] = data.loc[:, "Legendary"].astype(int)

#Normalize columns with continuous data
continuous_feats = ["HP","Attack","Defense","Sp_Atk","Sp_Def","Speed"]
for feat in continuous_feats:
    ser = data.loc[:,feat]
    data.loc[:,feat] = (ser - ser.mean())/ser.std()

cdata = pd.read_csv("data/combats.csv") #Combats training data
tdata = pd.read_csv("data/tests.csv") #Combats test data

# Create new training and test set from given datasets
#Do the same for column two

#create training data
x1_train = data.iloc[cdata.loc[:, "First_pokemon"]-1,:]
x2_train = data.iloc[cdata.loc[:,"Second_pokemon"]-1,:]
x2_train = x2_train.assign(Output= ((cdata.loc[:,"Winner"] == cdata.loc[:, "First_pokemon"]).astype(int)).values)

#change column names
x1_train.columns = [col+"_1" for col in x1_train.columns]
x1_train.reset_index(inplace = True)
x1_train = x1_train.drop("index", axis=1)
x2_train.columns = [col+"_2" if col != "Output" else col for col in x2_train.columns]
x2_train.reset_index(inplace = True)
x2_train = x2_train.drop("index", axis=1)

#concatenate x1_train and x2_train now
X_train = pd.concat([x1_train,x2_train], axis = 1)

#create test data
x1_test = data.iloc[tdata.loc[:, "First_pokemon"]-1,:]
x2_test = data.iloc[tdata.loc[:,"Second_pokemon"]-1,:]

#change column names
x1_test.columns = [col+"_1" for col in x1_test.columns]
x1_test.reset_index(inplace = True)
x1_test = x1_test.drop("index", axis=1)
x2_test.columns = [col+"_2" for col in x2_test.columns]
x2_test.reset_index(inplace = True)
x2_test = x2_test.drop("index", axis=1)

#concatenate x1_test and x2_test now
X_test = pd.concat([x1_test,x2_test], axis = 1)

#Save train and test data to file
X_train.to_csv("inputs/all_train_data.csv", index = None)

#Split into train and eval data
num_train = int(0.95*(len(X_train)))
X_val = X_train.iloc[num_train:, :]
X_train = X_train.iloc[:num_train, :]

X_train.to_csv("inputs/train_data.csv", header = None, index = None)
X_val.to_csv("inputs/val_data.csv", header = None, index = None)
X_test.to_csv("inputs/test_data.csv", header = None, index = None)