## ADVANCED CNN ARCHITECTURES

##### Q1 Image Recognition using VGG16
Consider a color images to be restricted to the size of 224x224 squares and apply the image to the pre trained VGG16 network further determine the name of the object of the image

In [25]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input,decode_predictions
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
from keras.applications.imagenet_utils import preprocess_input
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [26]:
model=VGG16()
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [27]:
# load the image
img = load_img('cat.jpg',target_size=(224,224))
print(type(img))

<class 'PIL.Image.Image'>


In [28]:
img_array = img_to_array(img)
print(img_array.dtype)
print(img_array.shape)

float32
(224, 224, 3)


In [29]:
# convert back to image
img_pil = array_to_img(img_array)
print(type(img))

<class 'PIL.Image.Image'>


In [30]:
image=img_array.reshape((1,img_array.shape[0],img_array.shape[1],img_array.shape[2]))
image=preprocess_input(image)
image

array([[[[-11.939003 , -36.779    , -45.68     ],
         [-11.939003 , -36.779    , -45.68     ],
         [-11.939003 , -36.779    , -45.68     ],
         ...,
         [-37.939003 , -64.779    , -77.68     ],
         [-37.939003 , -64.779    , -77.68     ],
         [-38.939003 , -63.779    , -74.68     ]],

        [[-11.939003 , -36.779    , -45.68     ],
         [-11.939003 , -36.779    , -45.68     ],
         [-11.939003 , -36.779    , -45.68     ],
         ...,
         [-34.939003 , -61.779    , -74.68     ],
         [-34.939003 , -61.779    , -74.68     ],
         [-35.939003 , -62.779    , -75.68     ]],

        [[-10.939003 , -35.779    , -44.68     ],
         [-11.939003 , -36.779    , -45.68     ],
         [-10.939003 , -35.779    , -44.68     ],
         ...,
         [-31.939003 , -60.779    , -73.68     ],
         [-31.939003 , -60.779    , -73.68     ],
         [-33.939003 , -59.779    , -74.68     ]],

        ...,

        [[ 35.060997 ,   8.221001 ,   

In [31]:
yhat=model.predict(image)
label=decode_predictions(yhat)
label



[[('n02127052', 'lynx', 0.23655625),
  ('n02124075', 'Egyptian_cat', 0.18337671),
  ('n02123597', 'Siamese_cat', 0.13331616),
  ('n02123045', 'tabby', 0.039415386),
  ('n03958227', 'plastic_bag', 0.030638352)]]

##### Q2 IMAGE RECOGNITION USING RESNET50

In [32]:
import numpy as np
from keras.preprocessing import image
from keras.applications import resnet50

In [33]:
#Load Keras's ResNet50 model that is pre-trained against the ImageNet database.
model = resnet50.ResNet50()

In [34]:
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_5[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                       

In [35]:
# Load image file and resizing to 224*224 pixels that is required by ResNet50 model
img = image.load_img("cat.jpg", target_size=(224, 224))

In [36]:
# Convert image to a numpy array
x = image.img_to_array(img)
# Add a forth dimension since Keras expecta a list of images.
x = np.expand_dims(x, axis=0)
# Scale the input image to the range used in the trained network
x = resnet50.preprocess_input(x)

In [37]:
predictions = model.predict(x)



In [38]:
decoded_predictions = decode_predictions(predictions, top=3)[0]
print("This is an image of :")
for i, (imagenet_id, label, score) in enumerate(decoded_predictions):
    print(f"{i + 1}: {label} ({score:.2f})")

This is an image of :
1: Persian_cat (0.78)
2: lynx (0.10)
3: Siamese_cat (0.03)


##### Image Generation

In [39]:
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array

In [40]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [41]:
pic = load_img('cat.jpg')
pic_array = img_to_array(pic)
pic_array.shape # Image dimension

(793, 626, 3)

In [42]:
pic_array = pic_array.reshape((1,) + pic_array.shape) # Converting into 4 dimension array
pic_array.shape

(1, 793, 626, 3)

In [43]:
# Generate 10 images
# batch_size: At a time, how many image should be created.
count = 0
for batch in datagen.flow(pic_array, batch_size=5,save_to_dir="catimages/", save_prefix='cat', save_format='jpeg'):
    count += 1
    if count > 10:
        break

##### Q3 Construct a GoogleNet / InceptionNet whereas GoogleNet is a convolutional neural network (CNN) designed for image classification tasks and developed for the ImageNet Large Scale Visual Recognition Challenge

In [44]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Dense, Dropout, Flatten, Concatenate, Input
from tensorflow.keras.models import Model

In [46]:
# Inception Module
def inception_module(x, f1, f3_in, f3, f5_in, f5, pool_proj):
    # 1x1 Convolution
    conv1 = Conv2D(f1, (1, 1), padding='same', activation='relu')(x)

    # 1x1 Convolution followed by 3x3 Convolution
    conv3 = Conv2D(f3_in, (1, 1), padding='same', activation='relu')(x)
    conv3 = Conv2D(f3, (3, 3), padding='same', activation='relu')(conv3)

    # 1x1 Convolution followed by 5x5 Convolution
    conv5 = Conv2D(f5_in, (1, 1), padding='same', activation='relu')(x)
    conv5 = Conv2D(f5, (5, 5), padding='same', activation='relu')(conv5)

    # 3x3 MaxPooling followed by 1x1 Convolution (pool projection)
    pool_proj = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool_proj = Conv2D(f5, (1, 1), padding='same', activation='relu')(pool_proj)

    # Concatenate the outputs
    output = Concatenate()([conv1, conv3, conv5, pool_proj])

    return output

In [47]:
# GoogleNet Model
def googlenet(input_shape=(224, 224, 3), num_classes=1000):
    input_layer = Input(shape=input_shape)

    # Initial layers
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu')(input_layer)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = Conv2D(64, (1, 1), padding='same', activation='relu')(x)
    x = Conv2D(192, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    # Inception modules
    x = inception_module(x, 64, 96, 128, 16, 32, 32)
    x = inception_module(x, 128, 128, 192, 32, 96, 64)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 192, 96, 208, 16, 48, 64)
    x = inception_module(x, 160, 112, 224, 24, 64, 64)
    x = inception_module(x, 128, 128, 256, 24, 64, 64)
    x = inception_module(x, 112, 144, 288, 32, 64, 64)
    x = inception_module(x, 256, 160, 320, 32, 128, 128)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = inception_module(x, 256, 160, 320, 32, 128, 128)
    x = inception_module(x, 384, 192, 384, 48, 128, 128)

    # Average Pooling Layer
    x = AveragePooling2D((7, 7), strides=(1, 1))(x)

    # Dropout Layer
    x = Dropout(0.4)(x)

    # Fully Connected Layer
    x = Flatten()(x)
    x = Dense(1000, activation='softmax')(x)

    # Build Model
    model = Model(input_layer, x)

    return model

In [49]:
# Instantiate and compile the model
model = googlenet(input_shape=(224, 224, 3), num_classes=1000)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [50]:
# Summary of the model
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_7 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d_66 (Conv2D)          (None, 112, 112, 64)         9472      ['input_7[0][0]']             
                                                                                                  
 max_pooling2d_16 (MaxPooli  (None, 56, 56, 64)           0         ['conv2d_66[0][0]']           
 ng2D)                                                                                            
                                                                                                  
 conv2d_67 (Conv2D)          (None, 56, 56, 64)           4160      ['max_pooling2d_16[0][0]