In [None]:
import tensorflow as tf
import pandas as pd
import tempfile

data = pd.read_csv("inputs/all_train_data.csv")
col_list = list(data.columns)
ncolumns = len(col_list)

test_col_list = col_list.copy()
test_col_list.pop()
testcolumns = len(test_col_list)

#Define Base continuous features
HP_1 = tf.feature_column.numeric_column("HP_1")
HP_2 = tf.feature_column.numeric_column("HP_2")
Attack_1 = tf.feature_column.numeric_column("Attack_1")
Attack_2 = tf.feature_column.numeric_column("Attack_2")
Defense_1 = tf.feature_column.numeric_column("Defense_1")
Defense_2 = tf.feature_column.numeric_column("Defense_2")
Sp_Atk_1 = tf.feature_column.numeric_column("Sp_Atk_1")
Sp_Atk_2 = tf.feature_column.numeric_column("Sp_Atk_2")
Sp_Def_1 = tf.feature_column.numeric_column("Sp_Def_1")
Sp_Def_2 = tf.feature_column.numeric_column("Sp_Def_2")
Speed_1 = tf.feature_column.numeric_column("Speed_1")
Speed_2 = tf.feature_column.numeric_column("Speed_2")

#Define base categorical features
Type1_1 = tf.feature_column.categorical_column_with_vocabulary_list('Type_1_1',['Rock', 'Grass', 'Fairy', 'Fire', 'Bug', 'Psychic', 'Fighting',
       'Water', 'Normal', 'Ground', 'Electric', 'Dark', 'Ice', 'Dragon',
       'Steel', 'Ghost', 'Flying', 'Poison'])
Type1_2 = tf.feature_column.categorical_column_with_vocabulary_list('Type_1_2',['Rock', 'Grass', 'Fairy', 'Fire', 'Bug', 'Psychic', 'Fighting',
       'Water', 'Normal', 'Ground', 'Electric', 'Dark', 'Ice', 'Dragon',
       'Steel', 'Ghost', 'Flying', 'Poison'])
Type2_1 = tf.feature_column.categorical_column_with_vocabulary_list('Type_2_1',['Ground', 'Fighting', 'Flying', 'None', 'Water', 'Electric', 'Dark',
       'Ice', 'Steel', 'Ghost', 'Rock', 'Fairy', 'Psychic', 'Poison',
       'Dragon', 'Grass', 'Fire', 'Bug', 'Normal'])
Type2_2 = tf.feature_column.categorical_column_with_vocabulary_list('Type_2_2',['Ground', 'Fighting', 'Flying', 'None', 'Water', 'Electric', 'Dark',
       'Ice', 'Steel', 'Ghost', 'Rock', 'Fairy', 'Psychic', 'Poison',
       'Dragon', 'Grass', 'Fire', 'Bug', 'Normal'])
Generation_1 = tf.feature_column.categorical_column_with_vocabulary_list('Generation_1',[2, 5, 1, 3, 4, 6])
Generation_2 = tf.feature_column.categorical_column_with_vocabulary_list('Generation_2',[2, 5, 1, 3, 4, 6])
Legendary_1 = tf.feature_column.categorical_column_with_vocabulary_list('Legendary_1',[0, 1])
Legendary_2 = tf.feature_column.categorical_column_with_vocabulary_list('Legendary_2',[0, 1])

base_columns = [HP_1, HP_2, Attack_1, Attack_2, Defense_1, Defense_2, Sp_Atk_1, Sp_Atk_2, Sp_Def_1, Sp_Def_2, Speed_1, Speed_2,
               Type1_1, Type1_2, Type2_1, Type2_2, Generation_1, Generation_2, Legendary_1, Legendary_2]
deep_columns = [
    HP_1, HP_2, Attack_1, Attack_2, Defense_1, Defense_2, Sp_Atk_1, Sp_Atk_2, Sp_Def_1, Sp_Def_2, Speed_1, Speed_2,
    tf.feature_column.indicator_column(Type1_1),tf.feature_column.indicator_column(Type1_2),
    tf.feature_column.indicator_column(Type2_1),tf.feature_column.indicator_column(Type2_2),
    tf.feature_column.indicator_column(Generation_1), tf.feature_column.indicator_column(Generation_2),
    tf.feature_column.indicator_column(Legendary_1), tf.feature_column.indicator_column(Legendary_2)
]
model_dir = "model"
model = tf.estimator.DNNLinearCombinedClassifier(
    model_dir=model_dir, linear_feature_columns=base_columns,
    dnn_feature_columns=deep_columns, dnn_hidden_units=[100, 50,25],
    dnn_optimizer = tf.train.ProximalAdagradOptimizer(
    learning_rate=0.1))

def input_fn(data_file, num_epochs, shuffle, batch_size):
    def parse_csv(value):
        assert tf.gfile.Exists(data_file), ('%s not found. Please make sure you have either run data_download.py or '
                                            'set both arguments --train_data and --test_data.' % data_file)
        print ("Parsing ", data_file)
        records_defaults = [[1.0] for i in range(ncolumns)]
        records_defaults[0] = ['']
        records_defaults[1] = ['']
        records_defaults[8] = [1]
        records_defaults[9] = [1]
        records_defaults[10] = ['']
        records_defaults[11] = ['']
        records_defaults[18] = [1]
        records_defaults[19] = [1]
        records_defaults[20] = [1]
        columns = tf.decode_csv(value, record_defaults=records_defaults)
        features = dict(zip(col_list, columns))
        labels = features.pop("Output")
        return features, tf.equal(labels, 1)
    # Extract lines from input files using the Dataset API.
    dataset = tf.data.TextLineDataset(data_file)
    dataset = dataset.map(parse_csv, num_parallel_calls=5)
    
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

#Model training
#set hyper_params, batch_size and num_epochs
batch_size = 500
num_epochs = 100

model.train(input_fn = lambda: input_fn("inputs/train_data.csv", num_epochs, True, batch_size))

#Evaluate model on training and validation set
results = model.evaluate(input_fn=lambda: input_fn(
   "inputs/val_data.csv" , 1, False, batch_size))
for key in sorted(results):
  print('%s: %s' % (key, results[key]))

print ("Results on training set")

results = model.evaluate(input_fn=lambda: input_fn(
   "inputs/train_data.csv" , 1, False, batch_size))
for key in sorted(results):
  print('%s: %s' % (key, results[key]))


#Model testing/prediction
def test_input_fn(data_file, num_epochs, batch_size):
    def parse_csv(value):
        assert tf.gfile.Exists(data_file), ('%s not found. Please make sure you have either run data_download.py or '
                                            'set both arguments --train_data and --test_data.' % data_file)
        print ("Parsing test file: ", data_file)
        records_defaults = [[1.0] for i in range(testcolumns)] #Output column not present in test data, hence ncolumns-1
        records_defaults[0] = ['']
        records_defaults[1] = ['']
        records_defaults[8] = [1]
        records_defaults[9] = [1]
        records_defaults[10] = ['']
        records_defaults[11] = ['']
        records_defaults[18] = [1]
        records_defaults[19] = [1]
        columns = tf.decode_csv(value, record_defaults=records_defaults)
        features = dict(zip(test_col_list, columns))
        return features
    # Extract lines from input files using the Dataset API.
    dataset = tf.data.TextLineDataset(data_file)
    dataset = dataset.map(parse_csv, num_parallel_calls=5)
    
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    
    iterator = dataset.make_one_shot_iterator()
    features = iterator.get_next()
    return features

pred_values = model.predict(input_fn = lambda: test_input_fn("inputs/test_data.csv",1,batch_size))

#Read tests file
tests_data = pd.read_csv("data/tests.csv")
tests_data = tests_data.assign(Prediction = tests_data.loc[:, "First_pokemon"])
for i, p in enumerate(pred_values):
    tests_data.loc[i, "Prediction"] = tests_data.loc[i, "First_pokemon"] if p["class_ids"] == 0 else tests_data.loc[i, "Second_pokemon"]
    
tests_data.to_csv("outputs/predictions.csv", index = None)