In [1]:
%matplotlib inline

In [107]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

import tensorflow as tf

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Conv2D, MaxPool2D, Flatten, Add, Embedding, LSTM, GlobalAvgPool2D, Concatenate, UpSampling2D, Convolution2DTranspose

from tensorflow.io import read_file
from tensorflow.image import decode_jpeg, resize

from tensorflow.keras.applications import vgg19
from tensorflow.keras.callbacks import TensorBoard

# Exam Preparation

In [3]:
# Constants
BASE_DIR = os.path.join(".", "data")
IMAGE_SIZE = (240, 240)
IMAGE_SIZE_CHANNELS = (240, 240, 3)
BATCH_SIZE = 4
NUM_CLASSES = 12

VOCABULARY_SIZE = 14000
MAX_SEQUENCE_LENGTH = 10

## 1. Data preparation

In [4]:
for dir_name, directory_names, filenames in os.walk("./data"):
    print(dir_name, "|||", directory_names, "<<<", filenames)

./data ||| ['cats', 'dogs'] <<< []
./data\cats ||| [] <<< ['cat.1.jpg', 'cat.10.jpg', 'cat.100.jpg', 'cat.1000.jpg', 'cat.1001.jpg', 'cat.1002.jpg', 'cat.1003.jpg', 'cat.1004.jpg', 'cat.1005.jpg', 'cat.1006.jpg', 'cat.1007.jpg', 'cat.1008.jpg', 'cat.1009.jpg', 'cat.101.jpg', 'cat.1010.jpg', 'cat.1011.jpg', 'cat.1012.jpg', 'cat.1013.jpg', 'cat.1014.jpg', 'cat.1015.jpg', 'cat.1016.jpg', 'cat.1017.jpg', 'cat.1018.jpg', 'cat.1019.jpg', 'cat.102.jpg', 'cat.1020.jpg', 'cat.1021.jpg', 'cat.1022.jpg', 'cat.1023.jpg', 'cat.1024.jpg', 'cat.1025.jpg', 'cat.1026.jpg', 'cat.1027.jpg', 'cat.1028.jpg', 'cat.1029.jpg', 'cat.103.jpg', 'cat.1030.jpg', 'cat.1031.jpg', 'cat.1032.jpg', 'cat.1033.jpg', 'cat.1034.jpg', 'cat.1035.jpg', 'cat.1036.jpg', 'cat.1037.jpg', 'cat.1038.jpg', 'cat.1039.jpg', 'cat.104.jpg', 'cat.1040.jpg', 'cat.1041.jpg', 'cat.1042.jpg', 'cat.1043.jpg', 'cat.1044.jpg', 'cat.1045.jpg', 'cat.1046.jpg', 'cat.1047.jpg', 'cat.1048.jpg', 'cat.1049.jpg', 'cat.105.jpg', 'cat.1050.jpg', 'cat.105

In [5]:
BASE_DIR = os.path.join(".", "data")
image_classes = os.listdir(BASE_DIR)

all_filenames = []
all_classses = []

for image_class in image_classes:
    filenames = os.listdir(os.path.join(BASE_DIR, image_class))
    filenames = [os.path.join(BASE_DIR, image_class, filename) for filename in filenames]
    
    all_filenames.extend(filenames)
    all_classses.extend([image_class[:-1]] * len(filenames))

In [6]:
all_filenames

['.\\data\\cats\\cat.1.jpg',
 '.\\data\\cats\\cat.10.jpg',
 '.\\data\\cats\\cat.100.jpg',
 '.\\data\\cats\\cat.1000.jpg',
 '.\\data\\cats\\cat.1001.jpg',
 '.\\data\\cats\\cat.1002.jpg',
 '.\\data\\cats\\cat.1003.jpg',
 '.\\data\\cats\\cat.1004.jpg',
 '.\\data\\cats\\cat.1005.jpg',
 '.\\data\\cats\\cat.1006.jpg',
 '.\\data\\cats\\cat.1007.jpg',
 '.\\data\\cats\\cat.1008.jpg',
 '.\\data\\cats\\cat.1009.jpg',
 '.\\data\\cats\\cat.101.jpg',
 '.\\data\\cats\\cat.1010.jpg',
 '.\\data\\cats\\cat.1011.jpg',
 '.\\data\\cats\\cat.1012.jpg',
 '.\\data\\cats\\cat.1013.jpg',
 '.\\data\\cats\\cat.1014.jpg',
 '.\\data\\cats\\cat.1015.jpg',
 '.\\data\\cats\\cat.1016.jpg',
 '.\\data\\cats\\cat.1017.jpg',
 '.\\data\\cats\\cat.1018.jpg',
 '.\\data\\cats\\cat.1019.jpg',
 '.\\data\\cats\\cat.102.jpg',
 '.\\data\\cats\\cat.1020.jpg',
 '.\\data\\cats\\cat.1021.jpg',
 '.\\data\\cats\\cat.1022.jpg',
 '.\\data\\cats\\cat.1023.jpg',
 '.\\data\\cats\\cat.1024.jpg',
 '.\\data\\cats\\cat.1025.jpg',
 '.\\data\\cats\

In [7]:
list(zip(all_filenames, all_classses))

[('.\\data\\cats\\cat.1.jpg', 'cat'),
 ('.\\data\\cats\\cat.10.jpg', 'cat'),
 ('.\\data\\cats\\cat.100.jpg', 'cat'),
 ('.\\data\\cats\\cat.1000.jpg', 'cat'),
 ('.\\data\\cats\\cat.1001.jpg', 'cat'),
 ('.\\data\\cats\\cat.1002.jpg', 'cat'),
 ('.\\data\\cats\\cat.1003.jpg', 'cat'),
 ('.\\data\\cats\\cat.1004.jpg', 'cat'),
 ('.\\data\\cats\\cat.1005.jpg', 'cat'),
 ('.\\data\\cats\\cat.1006.jpg', 'cat'),
 ('.\\data\\cats\\cat.1007.jpg', 'cat'),
 ('.\\data\\cats\\cat.1008.jpg', 'cat'),
 ('.\\data\\cats\\cat.1009.jpg', 'cat'),
 ('.\\data\\cats\\cat.101.jpg', 'cat'),
 ('.\\data\\cats\\cat.1010.jpg', 'cat'),
 ('.\\data\\cats\\cat.1011.jpg', 'cat'),
 ('.\\data\\cats\\cat.1012.jpg', 'cat'),
 ('.\\data\\cats\\cat.1013.jpg', 'cat'),
 ('.\\data\\cats\\cat.1014.jpg', 'cat'),
 ('.\\data\\cats\\cat.1015.jpg', 'cat'),
 ('.\\data\\cats\\cat.1016.jpg', 'cat'),
 ('.\\data\\cats\\cat.1017.jpg', 'cat'),
 ('.\\data\\cats\\cat.1018.jpg', 'cat'),
 ('.\\data\\cats\\cat.1019.jpg', 'cat'),
 ('.\\data\\cats\\cat.1

In [8]:
dataset = tf.data.Dataset.from_tensor_slices([all_filenames, all_classses])

In [9]:
for item in dataset.take(4):
    print(item)

tf.Tensor(
[b'.\\data\\cats\\cat.1.jpg' b'.\\data\\cats\\cat.10.jpg'
 b'.\\data\\cats\\cat.100.jpg' ... b'.\\data\\dogs\\dog.998.jpg'
 b'.\\data\\dogs\\dog.999.jpg' b'.\\data\\dogs\\_DS_Store'], shape=(8007,), dtype=string)
tf.Tensor([b'cat' b'cat' b'cat' ... b'dog' b'dog' b'dog'], shape=(8007,), dtype=string)


In [10]:
dataset = tf.data.Dataset.from_tensors([all_filenames, all_classses])

In [11]:
for item in dataset.take(4):
    print(item)

tf.Tensor(
[[b'.\\data\\cats\\cat.1.jpg' b'.\\data\\cats\\cat.10.jpg'
  b'.\\data\\cats\\cat.100.jpg' ... b'.\\data\\dogs\\dog.998.jpg'
  b'.\\data\\dogs\\dog.999.jpg' b'.\\data\\dogs\\_DS_Store']
 [b'cat' b'cat' b'cat' ... b'dog' b'dog' b'dog']], shape=(2, 8007), dtype=string)


In [12]:
dataset = tf.data.Dataset.from_tensor_slices(all_filenames)

In [13]:
for item in dataset.take(4):
    print(item)

tf.Tensor(b'.\\data\\cats\\cat.1.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.10.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.100.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1000.jpg', shape=(), dtype=string)


In [14]:
BASE_DIR = os.path.join(".", "data")
image_classes = os.listdir(BASE_DIR)

all_filenames = []

for image_class in image_classes:
    filenames = os.listdir(os.path.join(BASE_DIR, image_class))
    filenames = [os.path.join(BASE_DIR, image_class, filename) for filename in filenames]
    
    all_filenames.extend(filenames)

In [15]:
dataset = tf.data.Dataset.from_tensor_slices(all_filenames)

In [16]:
for item in dataset.take(40):
    print(item)

tf.Tensor(b'.\\data\\cats\\cat.1.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.10.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.100.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1000.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1001.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1002.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1003.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1004.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1005.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1006.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1007.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1008.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1009.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.101.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\data\\cats\\cat.1010.jpg', shape=(), dtype=string)
tf.Tensor(b'.\\da

In [17]:
def process_image(image_filename):
    print(image_filename)
    file = read_file(image_filename)
#    image = decode_jpeg(file)
    return file

In [18]:
dataset = tf.data.Dataset.from_tensor_slices(all_filenames)
dataset = dataset.map(process_image)

Tensor("args_0:0", shape=(), dtype=string)


In [19]:
for item in dataset.take(1):
    print(item)

tf.Tensor(b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\n\x07\x07\x08\x07\x06\n\x08\x08\x08\x0b\n\n\x0b\x0e\x18\x10\x0e\r\r\x0e\x1d\x15\x16\x11\x18#\x1f%$"\x1f"!&+7/&)4)!"0A149;>>>%.DIC<H7=>;\xff\xdb\x00C\x01\n\x0b\x0b\x0e\r\x0e\x1c\x10\x10\x1c;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\xff\xc0\x00\x11\x08\x01\x18\x01,\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x1f\x00\x00\x01\x05\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\xff\xc4\x00\xb5\x10\x00\x02\x01\x03\x03\x02\x04\x03\x05\x05\x04\x04\x00\x00\x01}\x01\x02\x03\x00\x04\x11\x05\x12!1A\x06\x13Qa\x07"q\x142\x81\x91\xa1\x08#B\xb1\xc1\x15R\xd1\xf0$3br\x82\t\n\x16\x17\x18\x19\x1a%&\'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz\x83\x84\x85\x86\x87\x88\x89\x8a\x92\x93\x94\x95\x96\x97\x98\x99\x9a\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9

In [20]:
BASE_DIR = os.path.join(".", "data")
image_classes = os.listdir(BASE_DIR)
class_id_to_class_name = dict(enumerate(image_classes))
class_name_to_class_id = {v: k for k, v in class_id_to_class_name.items()}

In [21]:
class_id_to_class_name, class_name_to_class_id

({0: 'cats', 1: 'dogs'}, {'cats': 0, 'dogs': 1})

In [22]:
all_filenames = []
all_classes = []

for image_class in image_classes:
    filenames = os.listdir(os.path.join(BASE_DIR, image_class))
    filenames = [os.path.join(BASE_DIR, image_class, filename) for filename in filenames]
    
    all_filenames.extend(filenames)
    all_classes.extend([class_name_to_class_id[image_class]] * len(filenames))

In [23]:
all_classes[-10:]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [24]:
def process_image(image_filename, image_class):
    file = read_file(image_filename)
    image = decode_jpeg(file)
    
    image = resize(image, IMAGE_SIZE)
    image /= 256.0
    
    return image, image_class

In [25]:
dataset = tf.data.Dataset.from_tensor_slices((all_filenames, all_classes))
dataset = dataset.shuffle(buffer_size = len(all_filenames), seed = 42)
dataset = dataset.map(process_image)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
dataset = dataset.repeat()

In [26]:
image_filename = ".\\data\\cats\\cat.1.jpg"

In [27]:
os.path.split(image_filename)

('.\\data\\cats', 'cat.1.jpg')

In [28]:
os.path.split(image_filename)[1]

'cat.1.jpg'

In [29]:
os.path.split(image_filename)[1].split(".")

['cat', '1', 'jpg']

In [30]:
os.path.split(image_filename)[1].split(".")[0]

'cat'

In [31]:
image_filename.split(os.sep)[-1].split(".")[0]

'cat'

In [32]:
list(zip(all_filenames, all_classes))

[('.\\data\\cats\\cat.1.jpg', 0),
 ('.\\data\\cats\\cat.10.jpg', 0),
 ('.\\data\\cats\\cat.100.jpg', 0),
 ('.\\data\\cats\\cat.1000.jpg', 0),
 ('.\\data\\cats\\cat.1001.jpg', 0),
 ('.\\data\\cats\\cat.1002.jpg', 0),
 ('.\\data\\cats\\cat.1003.jpg', 0),
 ('.\\data\\cats\\cat.1004.jpg', 0),
 ('.\\data\\cats\\cat.1005.jpg', 0),
 ('.\\data\\cats\\cat.1006.jpg', 0),
 ('.\\data\\cats\\cat.1007.jpg', 0),
 ('.\\data\\cats\\cat.1008.jpg', 0),
 ('.\\data\\cats\\cat.1009.jpg', 0),
 ('.\\data\\cats\\cat.101.jpg', 0),
 ('.\\data\\cats\\cat.1010.jpg', 0),
 ('.\\data\\cats\\cat.1011.jpg', 0),
 ('.\\data\\cats\\cat.1012.jpg', 0),
 ('.\\data\\cats\\cat.1013.jpg', 0),
 ('.\\data\\cats\\cat.1014.jpg', 0),
 ('.\\data\\cats\\cat.1015.jpg', 0),
 ('.\\data\\cats\\cat.1016.jpg', 0),
 ('.\\data\\cats\\cat.1017.jpg', 0),
 ('.\\data\\cats\\cat.1018.jpg', 0),
 ('.\\data\\cats\\cat.1019.jpg', 0),
 ('.\\data\\cats\\cat.102.jpg', 0),
 ('.\\data\\cats\\cat.1020.jpg', 0),
 ('.\\data\\cats\\cat.1021.jpg', 0),
 ('.\\dat

In [33]:
filenames_dataset = tf.data.Dataset.from_tensor_slices(all_filenames[:5000]).shuffle(5000)

In [34]:
from sklearn.model_selection import train_test_split

In [35]:
train_test_split(all_filenames, stratify = all_classes)

[['.\\data\\dogs\\dog.2877.jpg',
  '.\\data\\cats\\cat.3268.jpg',
  '.\\data\\dogs\\dog.3654.jpg',
  '.\\data\\cats\\cat.2352.jpg',
  '.\\data\\dogs\\dog.369.jpg',
  '.\\data\\dogs\\dog.3044.jpg',
  '.\\data\\dogs\\dog.2574.jpg',
  '.\\data\\dogs\\dog.3258.jpg',
  '.\\data\\dogs\\dog.3479.jpg',
  '.\\data\\dogs\\dog.1198.jpg',
  '.\\data\\cats\\cat.250.jpg',
  '.\\data\\cats\\_DS_Store',
  '.\\data\\cats\\cat.2267.jpg',
  '.\\data\\dogs\\dog.3307.jpg',
  '.\\data\\cats\\cat.914.jpg',
  '.\\data\\cats\\cat.3130.jpg',
  '.\\data\\dogs\\dog.3607.jpg',
  '.\\data\\dogs\\dog.1563.jpg',
  '.\\data\\cats\\cat.2775.jpg',
  '.\\data\\dogs\\dog.3595.jpg',
  '.\\data\\dogs\\dog.3067.jpg',
  '.\\data\\dogs\\dog.831.jpg',
  '.\\data\\cats\\cat.158.jpg',
  '.\\data\\dogs\\dog.118.jpg',
  '.\\data\\cats\\cat.878.jpg',
  '.\\data\\cats\\cat.3182.jpg',
  '.\\data\\dogs\\dog.2046.jpg',
  '.\\data\\dogs\\dog.396.jpg',
  '.\\data\\cats\\cat.1523.jpg',
  '.\\data\\dogs\\dog.3955.jpg',
  '.\\data\\cats\\cat

## 2. Models

In [36]:
def conv_pooling_block(input_layer, num_filters, filter_size = (3, 3), name = "block1"):
    batch_norm = BatchNormalization(name = name + "_bn")(input_layer)
    conv_layer1 = Conv2D(filters = num_filters, kernel_size = filter_size, padding = "same", activation = "relu", name = name + "_conv1")(batch_norm)
    conv_layer2 = Conv2D(filters = num_filters, kernel_size = filter_size, padding = "same", activation = "relu", name = name + "_conv2")(conv_layer1)
    max_pooling = MaxPool2D(name = name + "_pool")(conv_layer2)
    
    return max_pooling

In [37]:
input_layer = Input(shape = IMAGE_SIZE_CHANNELS)
block1 = conv_pooling_block(input_layer, 32, name = "first_block")
block2 = conv_pooling_block(block1, 16, name = "second_block")

In [38]:
cnn = Model(inputs = input_layer, outputs = block2)

In [39]:
cnn.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 240, 240, 3)]     0         
                                                                 
 first_block_bn (BatchNormal  (None, 240, 240, 3)      12        
 ization)                                                        
                                                                 
 first_block_conv1 (Conv2D)  (None, 240, 240, 32)      896       
                                                                 
 first_block_conv2 (Conv2D)  (None, 240, 240, 32)      9248      
                                                                 
 first_block_pool (MaxPoolin  (None, 120, 120, 32)     0         
 g2D)                                                            
                                                                 
 second_block_bn (BatchNorma  (None, 120, 120, 32)     128   

In [85]:
pooling = GlobalAvgPool2D()(cnn.output)
classification = Dense(1, activation = "sigmoid")(pooling)

In [88]:
model = Model(inputs = cnn.input, outputs = classification).summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 240, 240, 3)]     0         
                                                                 
 first_block_bn (BatchNormal  (None, 240, 240, 3)      12        
 ization)                                                        
                                                                 
 first_block_conv1 (Conv2D)  (None, 240, 240, 32)      896       
                                                                 
 first_block_conv2 (Conv2D)  (None, 240, 240, 32)      9248      
                                                                 
 first_block_pool (MaxPoolin  (None, 120, 120, 32)     0         
 g2D)                                                            
                                                                 
 second_block_bn (BatchNorma  (None, 120, 120, 32)     128 

In [90]:
model = Model(inputs = cnn.input, outputs = classification)

In [92]:
model.compile(loss = "binary_crossentropy", optimizer = "adam")

In [95]:
for _ in dataset.take(5):
    print(_)

(<tf.Tensor: shape=(4, 240, 240, 3), dtype=float32, numpy=
array([[[[0.9005534 , 0.81852216, 0.7521159 ],
         [0.9080098 , 0.8259786 , 0.7595723 ],
         [0.9034831 , 0.82145184, 0.7550456 ],
         ...,
         [0.87890625, 0.80078125, 0.7578125 ],
         [0.8828125 , 0.8046875 , 0.76171875],
         [0.8828125 , 0.8046875 , 0.76171875]],

        [[0.9005534 , 0.81852216, 0.7521159 ],
         [0.90297854, 0.8209473 , 0.75454104],
         [0.90021163, 0.8181804 , 0.75177413],
         ...,
         [0.87890625, 0.80078125, 0.7578125 ],
         [0.8828125 , 0.8046875 , 0.76171875],
         [0.8828125 , 0.8046875 , 0.76171875]],

        [[0.9015062 , 0.81947494, 0.7530687 ],
         [0.89835614, 0.8163249 , 0.74991864],
         [0.8941244 , 0.81209314, 0.7456869 ],
         ...,
         [0.87890625, 0.80078125, 0.7578125 ],
         [0.87890625, 0.80078125, 0.7578125 ],
         [0.87890625, 0.80078125, 0.7578125 ]],

        ...,

        [[0.30053714, 0.3278809 ,

In [110]:
model.fit(dataset, steps_per_epoch = 10, epochs = 10, callbacks = [TensorBoard()])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x259a53ff7c0>

In [115]:
with tf.summary.create_file_writer(logdir = "logs").as_default():
    tf.summary.image()

TypeError: image() missing 2 required positional arguments: 'name' and 'data'

In [104]:
len(filenames) // 4

1001

In [101]:
input_layer = Input(shape = IMAGE_SIZE_CHANNELS)
conv_input = Conv2D(filters = 32, kernel_size = (3, 3), padding = "same")(input_layer)

def residual_block(input_layer):
    conv1 = Conv2D(filters = 32, kernel_size = (3, 3), padding = "same", activation = "relu")(input_layer)
    conv2 = Conv2D(filters = 32, kernel_size = (3, 3), padding = "same", activation = "relu")(conv1)
    conv3 = Conv2D(filters = 32, kernel_size = (3, 3), padding = "same", activation = "relu")(conv2)

    residual_add = Add()([conv_input, conv3])
    
    return residual_add

In [41]:
residual_block(conv_input)

<KerasTensor: shape=(None, 240, 240, 32) dtype=float32 (created by layer 'add')>

## 3. Heterogenous data and outputs

In [42]:
image_input = np.random.normal(size = (4, 240, 240, 3))

In [43]:
text_input = []
for i in range(4):
    random_length = np.random.choice(range(3, 9))
    text_input.append(np.random.choice(range(14000), size = random_length))

In [44]:
text_input_padded = []
for el in text_input:
    pad_length = (0, MAX_SEQUENCE_LENGTH - len(el))
    text_input_padded.append(np.pad(el, pad_length))
text_input_padded = np.array(text_input_padded)    

In [45]:
image_input.shape, text_input_padded.shape

((4, 240, 240, 3), (4, 10))

In [46]:
tf.keras.backend.clear_session()

In [47]:
image_part = Sequential([
    Input(shape = image_input.shape[1:]),
    Conv2D(32, 3, padding = "same", activation = "relu"),
    MaxPool2D(),
    Conv2D(16, 3, padding = "same", activation = "relu"),
    MaxPool2D(),
    Conv2D(16, 3, padding = "same", activation = "relu"),
    MaxPool2D()
])

In [48]:
text_part = Sequential([
    Input(shape = (MAX_SEQUENCE_LENGTH, )),
    Embedding(input_dim = 14000, output_dim = 32),
    LSTM(8, return_sequences = True),
    LSTM(4, return_sequences = True)
])

In [49]:
image_part.output_shape, text_part.output_shape

((None, 30, 30, 16), (None, 10, 4))

In [50]:
img_pooling = GlobalAvgPool2D()(image_part.output)

In [51]:
text_flatten = Flatten()(text_part.output)

In [52]:
concat = Concatenate()([img_pooling, text_flatten])

In [53]:
dense1 = Dense(10, activation = "relu")(concat)
dense2 = Dense(5, activation = "relu")(dense1)
output_layer = Dense(NUM_CLASSES, activation = "softmax")(dense2)

In [54]:
output_layer

<KerasTensor: shape=(None, 12) dtype=float32 (created by layer 'dense_2')>

In [55]:
heterogenous_model = Model(inputs = [image_part.input, text_part.input], outputs = [output_layer])

In [56]:
heterogenous_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 240, 240, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 240, 240, 32  896         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 120, 120, 32  0           ['conv2d[0][0]']                 
                                )                                                             

In [57]:
heterogenous_model.input_shape

[(None, 240, 240, 3), (None, 10)]

In [58]:
image_autoencoder = Sequential([
    Input(shape = image_input.shape[1:]),
    Conv2D(32, 3, padding = "same", activation = "relu"),
    MaxPool2D(),
    Conv2D(16, 3, padding = "same", activation = "relu"),
    MaxPool2D(),
    Conv2D(16, 3, padding = "same", activation = "relu"),
    MaxPool2D(),
    
    UpSampling2D(),
    Convolution2DTranspose(16, 3, padding = "same", activation = "relu"),
    UpSampling2D(),
    Convolution2DTranspose(16, 3, padding = "same", activation = "relu"),
    UpSampling2D(),
    Convolution2DTranspose(32, 3, padding = "same", activation = "relu"),
    Conv2D(3, 3, padding = "same", activation = "sigmoid")
])

In [59]:
image_autoencoder.compile(loss = "mse")
image_autoencoder.fit(image_input, image_input)



<keras.callbacks.History at 0x2599df91e50>

In [60]:
list(enumerate(image_autoencoder.layers))

[(0, <keras.layers.convolutional.conv2d.Conv2D at 0x2599df3f520>),
 (1, <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2599df402b0>),
 (2, <keras.layers.convolutional.conv2d.Conv2D at 0x2599df3fd30>),
 (3, <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2599df3f5e0>),
 (4, <keras.layers.convolutional.conv2d.Conv2D at 0x2599df4c2e0>),
 (5, <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2599df3fc70>),
 (6, <keras.layers.reshaping.up_sampling2d.UpSampling2D at 0x2599df4c3a0>),
 (7,
  <keras.layers.convolutional.conv2d_transpose.Conv2DTranspose at 0x2599df4cdf0>),
 (8, <keras.layers.reshaping.up_sampling2d.UpSampling2D at 0x2599df4cdc0>),
 (9,
  <keras.layers.convolutional.conv2d_transpose.Conv2DTranspose at 0x2599df4e640>),
 (10, <keras.layers.reshaping.up_sampling2d.UpSampling2D at 0x2599df4e610>),
 (11,
  <keras.layers.convolutional.conv2d_transpose.Conv2DTranspose at 0x2599df4ee50>),
 (12, <keras.layers.convolutional.conv2d.Conv2D at 0x2599df4ef10>)]

In [61]:
Model(inputs = image_autoencoder.input, outputs = image_autoencoder.layers[5].output).summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 240, 240, 3)]     0         
                                                                 
 conv2d_3 (Conv2D)           (None, 240, 240, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 120, 120, 32)     0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 120, 120, 16)      4624      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 60, 60, 16)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 60, 60, 16)        2320

In [62]:
class CustomLoss(tf.keras.losses.Loss):
    def __init__(self, regression_loss_coef, classification_loss_coef):
        self.regression_loss_coef = regression_loss_coef
        self.classification_loss_coef = classification_loss_coef
        
    def call(self, y_true, y_pred):
        regression_loss = tf.keras.losses.MSE(y_true[:5], y_pred[:5])
        classification_loss = tf.keras.losses.SparseCategoricalCrossentropy(y_true[6:], y_pred[6:])
        
        return self.regression_loss_coef * regression_loss + self.classification_loss_coef * classification_loss

## 4. Model monitoring

In [63]:
tf.keras.backend.clear_session()

In [64]:
model = vgg19.VGG19()

In [65]:
[tf.io.decode_jpeg(tf.io.read_file(f"./data/cats/cat.{i}.jpg")) for i in (42, 380, 1555, 12, 140)]

[<tf.Tensor: shape=(173, 237, 3), dtype=uint8, numpy=
 array([[[213, 211, 222],
         [215, 213, 224],
         [217, 217, 225],
         ...,
         [ 84,  82,  87],
         [ 73,  71,  76],
         [ 63,  61,  66]],
 
        [[214, 212, 223],
         [214, 214, 224],
         [215, 215, 223],
         ...,
         [ 63,  62,  67],
         [ 71,  70,  75],
         [ 86,  84,  89]],
 
        [[218, 218, 226],
         [218, 218, 226],
         [219, 219, 227],
         ...,
         [ 51,  52,  56],
         [ 60,  61,  65],
         [ 68,  67,  73]],
 
        ...,
 
        [[228, 233, 239],
         [228, 233, 239],
         [226, 231, 237],
         ...,
         [190, 198, 209],
         [191, 199, 210],
         [193, 201, 212]],
 
        [[226, 231, 237],
         [226, 231, 237],
         [225, 230, 236],
         ...,
         [185, 193, 204],
         [185, 193, 204],
         [187, 195, 206]],
 
        [[225, 229, 238],
         [225, 229, 238],
         [224,

In [71]:
model.input

<KerasTensor: shape=(None, 224, 224, 3) dtype=float32 (created by layer 'input_1')>

In [76]:
cats = [tf.io.decode_jpeg(tf.io.read_file(f"./data/cats/cat.{i}.jpg")) for i in (42, 380, 1555, 12, 140)]
cats = [resize(cat, (224, 224)) for cat in cats]
cats = [tf.cast(cat, tf.uint8) for cat in cats]
cats = [vgg19.preprocess_input(cat) for cat in cats]
cats

[<tf.Tensor: shape=(224, 224, 3), dtype=uint8, numpy=
 array([[[119,  95,  90],
         [121,  97,  92],
         [122, 101,  94],
         ...,
         [239, 221, 216],
         [229, 211, 206],
         [219, 201, 196]],
 
        [[119,  95,  90],
         [120,  97,  91],
         [121, 100,  92],
         ...,
         [226, 209, 203],
         [228, 210, 204],
         [233, 215, 210]],
 
        [[121,  98,  92],
         [121,  99,  92],
         [122, 101,  93],
         ...,
         [215, 198, 191],
         [222, 205, 198],
         [234, 216, 210]],
 
        ...,
 
        [[134, 115, 103],
         [134, 115, 103],
         [133, 114, 102],
         ...,
         [103,  79,  64],
         [103,  79,  64],
         [105,  81,  66]],
 
        [[134, 114, 102],
         [134, 114, 102],
         [133, 113, 101],
         ...,
         [100,  76,  61],
         [ 99,  75,  60],
         [101,  77,  62]],
 
        [[135, 113, 102],
         [134, 112, 101],
         [133,

In [80]:
model.predict(tf.stack(cats, axis = 0))



array([[4.30174829e-08, 6.46426124e-05, 1.23978708e-07, ...,
        1.75667651e-07, 1.27641442e-06, 5.94823796e-05],
       [3.24881511e-11, 6.30933386e-08, 1.62066147e-08, ...,
        2.33592659e-11, 9.91965337e-08, 1.57813265e-04],
       [1.41573313e-08, 7.20247219e-04, 3.25471910e-06, ...,
        5.18910337e-09, 1.40754003e-06, 1.11382338e-04],
       [3.46929811e-11, 8.34373353e-08, 1.90765412e-08, ...,
        1.54994229e-11, 6.40622932e-09, 1.02741515e-05],
       [2.90286721e-07, 5.47995005e-05, 6.15489262e-06, ...,
        2.45058203e-07, 4.38198913e-06, 8.62603265e-05]], dtype=float32)