In [50]:
import os
import numpy as np 
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model

In [5]:
df_train = pd.read_csv("data/type_encodings.csv")
print(df_train.shape)
df_train.head()

(8447, 19)


Unnamed: 0,file,type_Bug,type_Dark,type_Dragon,type_Electric,type_Fairy,type_Fighting,type_Fire,type_Flying,type_Ghost,type_Grass,type_Ground,type_Ice,type_Normal,type_Poison,type_Psychic,type_Rock,type_Steel,type_Water
0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
3,4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [7]:
target_cols = df_train.drop(['file'], axis=1).columns.to_list()

number_classes = len(target_cols)
image_size = 200
number_epochs = 5

batch_size = 16

target_cols

['type_Bug',
 'type_Dark',
 'type_Dragon',
 'type_Electric',
 'type_Fairy',
 'type_Fighting',
 'type_Fire',
 'type_Flying',
 'type_Ghost',
 'type_Grass',
 'type_Ground',
 'type_Ice',
 'type_Normal',
 'type_Poison',
 'type_Psychic',
 'type_Rock',
 'type_Steel',
 'type_Water']

In [39]:
def build_decoder(with_labels=True, target_size=(image_size, image_size)):
    def decode(path):
        try:
            file_bytes = tf.io.read_file(path)

            image = tf.image.decode_image(file_bytes, channels=3)
            image = tf.cast(image, tf.float32) / 255.0
            image = tf.image.resize(image, target_size)
            image = tf.keras.applications.efficientnet.preprocess_input(image)
        except:
            print(path)
        
        return image
    
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    
    return decode_with_labels if with_labels else decode

In [40]:
def build_dataset(paths, labels=None, bsize=32, decode_function=None):
    if decode_function is None:
        decode_function = build_decoder(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dataset = tf.data.Dataset.from_tensor_slices(slices)  
    dataset = dataset.map(decode_function, num_parallel_calls=AUTO)

    dataset = dataset.batch(bsize).prefetch(AUTO) 

    return dataset

In [70]:
load_dir = "data/images/"

paths = load_dir + df_train['file'] + '.png'

label_cols = df_train.columns[1:]
labels = df_train[label_cols].values

labels

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [71]:
train_paths, test_paths, train_labels, test_labels = train_test_split(paths[:1000], labels[:1000], test_size=0.3)

In [72]:
decoder = build_decoder(with_labels=True, target_size=(image_size, image_size))

data_train = build_dataset(train_paths, train_labels, bsize=batch_size, decode_function=decoder)
data_test = build_dataset(test_paths, test_labels, bsize=batch_size, decode_function=decoder)

Tensor("args_0:0", shape=(), dtype=string)
Tensor("args_0:0", shape=(), dtype=string)


In [73]:
net = EfficientNetB0(include_top=False, input_shape=(image_size, image_size, 3), weights='imagenet')

output = net.output
output = GlobalAveragePooling2D()(output)
output = Dropout(0.5)(output)
output = Dense(number_classes, activation='sigmoid')(output)

model = Model(inputs=net.input, outputs=output)
model.compile(Adam(lr=0.0005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(multi_label=True, name='auc')])

In [74]:
steps_per_epoch = train_paths.shape[0] // batch_size

In [75]:
history = model.fit(data_train,                      
                    validation_data=data_test,                                       
                    epochs=number_epochs,
                    steps_per_epoch=steps_per_epoch,
                    shuffle=True,
                    verbose=1)

InvalidArgumentError:  Cannot add tensor to the batch: number of elements does not match. Shapes are: [tensor]: [120,120,3], [batch]: [215,215,3]
	 [[node IteratorGetNext (defined at <ipython-input-75-873fbb50d89f>:1) ]] [Op:__inference_train_function_66790]

Function call stack:
train_function


In [57]:
plt.rcParams.update({'font.size': 16})
hist = pd.DataFrame(history.history)
fig, (ax1, ax2) = plt.subplots(figsize=(12,12),nrows=2, ncols=1)

hist['loss'].plot(ax=ax1,c='k',label='training loss')
hist['val_loss'].plot(ax=ax1,c='r',linestyle='--', label='validation loss')
ax1.legend()

hist['auc'].plot(ax=ax2,c='k',label='training AUC')
hist['val_auc'].plot(ax=ax2,c='r',linestyle='--',label='validation AUC')
ax2.legend()

plt.show()

NameError: name 'history' is not defined