## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras as kr
from tensorflow.keras.layers import Normalization, IntegerLookup, StringLookup

## Data

In [2]:
dataframe = pd.read_csv("06_Data/pokemon.csv")

Shape:

In [3]:
dataframe.shape

(801, 41)

Visualization:

In [4]:
dataframe.abilities[0]

"['Overgrow', 'Chlorophyll']"

In [5]:
dataframe.columns

Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
       'against_electric', 'against_fairy', 'against_fight', 'against_fire',
       'against_flying', 'against_ghost', 'against_grass', 'against_ground',
       'against_ice', 'against_normal', 'against_poison', 'against_psychic',
       'against_rock', 'against_steel', 'against_water', 'attack',
       'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
       'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
       'japanese_name', 'name', 'percentage_male', 'pokedex_number',
       'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
       'generation', 'is_legendary'],
      dtype='object')

Definimos qué queremos predecir en base a qué.  
Intentaremos predecir si un pokemon es __legendario__ en base a __base_egg_steps__, __base_total__ y __experience_growth__.

Dividimos el data en data de entrenamiento y de validación.

In [6]:
df_val = dataframe.sample(frac=0.2, random_state=413)
df_train = dataframe.drop(df_val.index)

print(
    "Using %d samples for training and %d for validation"
    % (len(df_train), len(df_val))
)

Using 641 samples for training and 160 for validation


Pasamos los datos a un tipo que tensorflow pueda entender.

In [7]:
def dataframe_to_dataset(dataframe):
    # Seleccionamos aquí las columnas que vamos a utilizar
    dataframe = dataframe[["base_egg_steps", "base_total", "experience_growth", "is_legendary"]]
    labels = dataframe.pop("is_legendary") # Lo que queremos predecir
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

ds_train = dataframe_to_dataset(df_train)
ds_val = dataframe_to_dataset(df_val)


In [8]:
for x, y in ds_train.take(1):
    print("Input:", x)
    print("Target:", y)

Input: {'base_egg_steps': <tf.Tensor: shape=(), dtype=int64, numpy=10240>, 'base_total': <tf.Tensor: shape=(), dtype=int64, numpy=360>, 'experience_growth': <tf.Tensor: shape=(), dtype=int64, numpy=1250000>}
Target: tf.Tensor(0, shape=(), dtype=int64)


Batch de los datasets

In [9]:
ds_train = ds_train.batch(32)
ds_val = ds_val.batch(32)

## Procesamiento con Keras

In [10]:
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = kr.layers.Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)
    return encoded_feature

## Modelo

In [11]:
# Datos numéricos
base_egg_steps = kr.Input(shape=(1,), name="base_egg_steps")
base_total = kr.Input(shape=(1,), name="base_total")
experience_growth = kr.Input(shape=(1,), name="experience_growth")

# Lista con todos los inputs
all_inputs =[
    base_egg_steps,
    base_total,
    experience_growth,
]

# Datos numéricos encoded
base_egg_steps_encoded = encode_numerical_feature(base_egg_steps, "base_egg_steps", ds_train)
base_total_encoded = encode_numerical_feature(base_total, "base_total", ds_train)
experience_growth_encoded = encode_numerical_feature(experience_growth, "experience_growth", ds_train)

all_features = kr.layers.concatenate(
    [
        base_egg_steps_encoded,
        base_total_encoded,
        experience_growth_encoded,
    ]
)

x = kr.layers.Dense(32, activation="relu")(all_features)
x = kr.layers.Dropout(0.3)(x)
output = kr.layers.Dense(1, activation="sigmoid")(x)
model = kr.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

## Entrenamiento del Modelo

In [12]:
model.fit(ds_train, epochs = 100, validation_data = ds_val)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1ed3ba01c10>

In [13]:
pok = pok = dataframe.loc[dataframe.name == "Tyranitar"]

sample = {
    "base_egg_steps": pok.base_egg_steps,
    "base_total": pok.base_total,
    "experience_growth": pok.experience_growth
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = model.predict(input_dict)

print(
    "Hay un %.1f de posibilidades de que este Pokemon sea legendario." % (100 * predictions[0][0],)
)

Hay un 26.5 de posibilidades de que este Pokemon sea legendario.
