# Base Model
---
This model is taken from the Recursive Recurrent Nets with Attention Modeling for OCR in the Wild paper by Lee et al. In their paper, Lee construct a recursive recurrent neural network with attention modeling. For our project we want to first understand this model architecture, and then try to improve upon it. Later we will provide an ethical analysis for OCR technology.

In [11]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import string

import tensorflow as tf
from tensorflow import keras
from keras.utils.vis_utils import plot_model
from keras import layers

From the paper, the base model: 
> has 8 convolutional layer with 64, 64, 128, 128, 256, 256, 512 and 512 channels, and each convolutional layer uses kernel with a 3 × 3 spatial extent. Convo- lutions are performed with stride 1, zero padding, and ReLU activation function. 2 × 2 max pooling follows the second, fourth, and sixth convolutional layers. The two fully con- nected layers have 4096 units. The input is a resized 32 × 100 gray scale image.

In [12]:
softmax_classes = len(string.printable)

In [13]:
if 'base_cnn_model' in locals():
  del base_cnn_model

input_shape = (100, 32, 1)

base_cnn_model = keras.Sequential(
    [
     keras.Input(shape = input_shape),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer1"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer2"),
     layers.MaxPooling2D((2, 2), name = "pool1"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer3"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer4"),
     layers.MaxPooling2D((2, 2), name = "pool2"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer5"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer6"),
     layers.MaxPooling2D((2, 2), name = "pool3"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer7"),
     layers.Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "cov_layer8"),
     layers.Flatten(),
     layers.Dense(units = 4096),
     layers.Dense(units = 4096),
     layers.Dense(softmax_classes, activation = "softmax")
    ],
    name = 'base_cnn_model'
)

base_cnn_model.summary()

Model: "base_cnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cov_layer1 (Conv2D)         (None, 100, 32, 64)       640       
                                                                 
 cov_layer2 (Conv2D)         (None, 100, 32, 64)       36928     
                                                                 
 pool1 (MaxPooling2D)        (None, 50, 16, 64)        0         
                                                                 
 cov_layer3 (Conv2D)         (None, 50, 16, 64)        36928     
                                                                 
 cov_layer4 (Conv2D)         (None, 50, 16, 64)        36928     
                                                                 
 pool2 (MaxPooling2D)        (None, 25, 8, 64)         0         
                                                                 
 cov_layer5 (Conv2D)         (None, 25, 8, 64)      

Lee et al continue saying:
> For the character-level language modeling, we use RNNs with 1024 hidden units equipped with hyperbolic tangent activation function.

I believe it is within the RNN that the activation layer is called, not the CNN like I initially believed.

In [17]:
from keras.layers.dense_attention import Attention
if 'base_rnn_model' in locals():
  del base_rnn_model

input_shape = (100, 32, 1)

base_rnn_model = keras.Sequential(
    [
     keras.Input(shape = input_shape),
     layers.Dense(units = 1024, activation = 'tanh', name='rnn_layer1'),
     layers.Attention(['rnn_layer1']),
     layers.Dense(units = 1024, activation = 'tanh'),
     layers.Dense(softmax_classes, activation = "softmax")
    ],
    name = 'base_rnn_model'
)

base_rnn_model.summary()

ValueError: ignored