# EfficientNet in Tensorflow

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers as ly
import math



In [2]:
def bn_act(x, bn=True, act=True):
    if bn:
        x = ly.BatchNormalization(epsilon=1e-3,momentum=0.999)(x)
    if act:
        x = ly.Activation(tf.nn.swish)(x)
    return x

In [3]:
def SEBlock(x, c, r=24):
    squeeze = ly.GlobalAveragePooling2D()(x)
    squeeze = ly.Reshape((1, 1, c))(squeeze)
    ex = ly.Conv2D(c // r, (1, 1), padding='same')(squeeze)
    ex = ly.Activation(tf.nn.swish)(ex)
    ex = ly.Conv2D(c, (1, 1), padding='same')(ex)
    ex = ly.Activation(tf.nn.sigmoid)(ex)
    x = ly.multiply([x, ex])
    return x

In [4]:
def MBConv(x, n_in, n_out, expansion, ks=3, strides=1, dropout=0.1, r=24):
    residual = x
    skip_connection = (strides == 1) and (n_in == n_out)
    padding = (ks-1)//2
    if expansion != 1:
        # Expand Pointwise
        x = ly.Conv2D(expansion * n_in, kernel_size=1, padding='same', use_bias=False,
                      activation=None)(x)
        x = bn_act(x)
    ## Depthwise
    if strides == 2:
        x = ly.ZeroPadding2D(padding=padding)(x)
    x = ly.DepthwiseConv2D(kernel_size=ks, strides=strides, activation=None, use_bias=False, 
                          padding='same' if strides == 1 else 'valid')(x)
    x = bn_act(x)
    x = SEBlock(x, expansion * n_in, r=r)
    x = ly.Conv2D(n_out, (1, 1), padding='same', activation=None, use_bias=False)(x)
    x = bn_act(x, act=False)
    if skip_connection:
        x = ly.Dropout(0.2)(x)
        x = ly.add([x, residual])
    return x

In [5]:
### Obtained from Paper ###
widths = [32, 16, 24, 40, 80, 112, 192, 320, 1280]
depths = [1, 2, 2, 3, 3, 4, 1]
kernel_sizes = [3, 3, 5, 3, 5, 5, 3]
strides = [1, 2, 2, 2, 1, 2, 1]

In [6]:
def scale_width(w, w_factor):
    w *= w_factor
    new_w = (int(w+4) // 8) * 8
    new_w = max(8, new_w)
    if new_w < 0.9*w:
        new_w += 8
    return int(new_w)

In [7]:
def efficientnet_scaler(w_factor=1, d_factor=1):
    scaled_widths = [scale_width(w, w_factor) for w in widths]
    scaled_depths = [math.ceil(d_factor*d) for d in depths]
    return scaled_widths, scaled_depths

In [8]:
def EfficientNet(w_factor=1, d_factor=1, n_classes=1000):
    scaled_widths, scaled_depths = efficientnet_scaler(w_factor=w_factor, d_factor=d_factor)
    inputs = keras.Input(shape=(224, 224, 3))
    x = ly.ZeroPadding2D(
      padding=1)(inputs)
    x = ly.Conv2D(scaled_widths[0], (3, 3), strides=(2, 2), padding='same', use_bias=False)(inputs)
    x = bn_act(x)
    print(x.shape)
    
    for i in range(7):
        depth = scaled_depths[i]
        stride = strides[i]
        w_in = scaled_widths[i]
        w_out = scaled_widths[i + 1]
        ks = kernel_sizes[i]
        x = MBConv(x, w_in, w_out, expansion= 1 if i == 0 else 6, ks=ks, strides=stride, r= 4 if i==0 else 24)
        for j in range(1, depth):
            x = MBConv(x, w_out, w_out, expansion= 1 if i == 0 else 6, ks=ks, r= 4 if i==0 else 24)
    
    x = ly.Conv2D(scaled_widths[-1], kernel_size=1, use_bias=False)(x)
    x = bn_act(x)
    x = ly.GlobalAveragePooling2D()(x)
    x = ly.Dense(n_classes)(x)
    return keras.Model(inputs=inputs, outputs=x, name="efficientnet")

In [9]:
def efficientnet_b0(n_classes=1000, builder = EfficientNet):
    return builder(n_classes=n_classes)

In [10]:
def efficientnet_b1(n_classes=1000, builder = EfficientNet):
    return builder(1, 1.1, n_classes=n_classes)

In [11]:
def efficientnet_b2(n_classes=1000, builder = EfficientNet):
    return builder(1.1, 1.2, n_classes=n_classes)

In [12]:
def efficientnet_b3(n_classes=1000, builder = EfficientNet):
    return builder(1.2, 1.4, n_classes=n_classes)

In [13]:
def efficientnet_b4(n_classes=1000, builder = EfficientNet):
    return builder(1.4, 1.8, n_classes=n_classes)

In [14]:
def efficientnet_b5(n_classes=1000, builder = EfficientNet):
    return builder(1.6, 2.2, n_classes=n_classes)

In [15]:
def efficientnet_b6(n_classes=1000, builder = EfficientNet):
    return builder(1.8, 2.6, n_classes=n_classes)

In [16]:
def efficientnet_b7(n_classes=1000, builder = EfficientNet):
    return builder(2, 3.1, n_classes=n_classes)

In [17]:
b0 = efficientnet_b0()
b1 = efficientnet_b1()
b2 = efficientnet_b2()
b3 = efficientnet_b3()
b4 = efficientnet_b4()
b5 = efficientnet_b5()
b6 = efficientnet_b6()
b7 = efficientnet_b7()

Metal device set to: Apple M1
(None, 112, 112, 32)
(None, 112, 112, 32)
(None, 112, 112, 32)
(None, 112, 112, 40)
(None, 112, 112, 48)
(None, 112, 112, 48)
(None, 112, 112, 56)
(None, 112, 112, 64)


In [18]:
%%time
inp = np.random.random((1, 224, 224, 3))
b0(inp).shape, b3(inp).shape, b6(inp).shape, 

CPU times: user 878 ms, sys: 171 ms, total: 1.05 s
Wall time: 2.38 s


(TensorShape([1, 1000]), TensorShape([1, 1000]), TensorShape([1, 1000]))

In [19]:
def fmat(n):
    return "{:.2f}M".format(n / 1e6)

In [20]:
def params(model, f = True):
    count = int(np.sum([np.prod(p.shape) for p in model.variables]))
    return fmat(count) if f else count

In [21]:
params(b0),params(b1), params(b2), params(b3), params(b4), params(b5), params(b6), params(b7)

('5.33M', '7.86M', '9.18M', '12.32M', '19.47M', '30.56M', '43.27M', '66.66M')

**Untested**