In [76]:
import numpy as np
import pydot
import graphviz
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import AveragePooling2D, GlobalAveragePooling2D, Dense, Input
from tensorflow.keras.layers import Embedding, LSTM, Add

In [72]:
!pip install pydot

Collecting pydot
  Downloading pydot-1.4.2-py2.py3-none-any.whl (21 kB)
Installing collected packages: pydot
Successfully installed pydot-1.4.2


In [73]:
!pip install graphviz

Collecting graphviz
  Using cached graphviz-0.20-py3-none-any.whl (46 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.20


## Build CNN model with Pooling and Dense layers

### Test on one image

In [45]:
img_path = '../raw_data/images/10815824_2997e03d76.jpg'

In [46]:
img = image.load_img(img_path, target_size=(256,256,3))
x = image.img_to_array(img)

In [47]:
x = np.expand_dims(x, axis=0)
x.shape

(1, 256, 256, 3)

### CNN Model layers

In [37]:
inputs1 = Input(shape=(256,256,3))

In [38]:
CNN_model = EfficientNetB0(
    include_top=False, # Whether to include the fully-connected layer at the top of the network
    weights='imagenet', # pre-trained weights on ImageNet
    input_tensor=None,
    input_shape= (256,256,3), # It should have exactly 3 inputs channels
    pooling=None # Optional pooling mode for feature extraction when include_top is False
)(inputs1)

In [63]:
pooling = GlobalAveragePooling2D()(CNN_model)
cnn_dense = Dense(256, activation='relu')(pooling)
model1 = Model(inputs=inputs1, outputs=cnn_dense)

In [64]:
model1.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 efficientnetb0 (Functional)  (None, 8, 8, 1280)       4049571   
                                                                 
 global_average_pooling2d_5   (None, 1280)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_4 (Dense)             (None, 256)               327936    
                                                                 
Total params: 4,377,507
Trainable params: 4,335,484
Non-trainable params: 42,023
_________________________________________________________________


## Combine with LSTM sequence model

### LSTM Model layers

In [55]:
max_caption_length = 32+1
vocab_size = 8763+2

In [59]:
inputs2 = Input(shape=(max_caption_length,))
embed_layer = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
lstm_layer = LSTM(256)(embed_layer)

### Combine CNN and LSTM

In [78]:
decoder1 = Add()([cnn_dense,lstm_layer])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)

### Model summary

In [79]:
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [83]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 efficientnetb0 (Functional)    (None, 8, 8, 1280)   4049571     ['input_6[0][0]']                
                                                                                                  
 input_9 (InputLayer)           [(None, 34)]         0           []                               
                                                                                                  
 global_average_pooling2d_5 (Gl  (None, 1280)        0           ['efficientnetb0[0][0]']   

In [82]:
plot_model(model,show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
