In [1]:

from IPython.display import Image

image_url = 'https://media.geeksforgeeks.org/wp-content/uploads/20200429201549/Inceptionv1_architecture.png'
print("Google Net architecture")
Image(url=image_url,height=1000)


Google Net architecture


## With Tensorflow Keras

In [2]:
import numpy as np
import keras
import tensorflow as tf
from keras.optimizers import SGD 
from keras.callbacks import LearningRateScheduler
from keras.datasets import cifar10
from keras.layers import Conv2D, MaxPool2D,Dropout, Dense, Input, concatenate, GlobalAveragePooling2D, AveragePooling2D,Flatten,  Average
import tensorflow_datasets as tfds

2024-08-06 09:00:26.321096: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-06 09:00:26.321201: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-06 09:00:26.440933: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Inception Module

In [3]:
class InceptionModule(keras.Model):
    def __init__(self, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj, name=None):
        super().__init__()
        self.conv_1x1 = Conv2D(filters_1x1, (1,1), padding="same", activation="relu")
        self.conv_3x3_red = Conv2D(filters_3x3_reduce, (3,3), padding="same", activation="relu")
        self.conv_3x3 = Conv2D(filters_3x3, (3,3), padding="same", activation="relu")
        self.conv_5x5_red = Conv2D(filters_5x5_reduce, (5,5), padding="same", activation="relu")
        self.conv_5x5 = Conv2D(filters_5x5, (5,5), padding="same", activation="relu")
        self.pool_proj_red = MaxPool2D((3,3), strides=(1,1), padding="same")
        self.pool = Conv2D(filters_pool_proj, (1,1), padding="same", activation="relu")
        self.name = name
    
    def call(self, X):
        conv_1 = self.conv_1x1(X)
        conv_3_red = self.conv_3x3_red(X)
        conv_3 = self.conv_3x3(conv_3_red)
        conv_5_red = self.conv_5x5_red(X)
        conv_5 = self.conv_5x5(conv_5_red)
        pool_red = self.pool_proj_red(X)
        pool = self.pool(pool_red)
        output = concatenate([conv_1, conv_3, conv_5, pool], axis=3, name = self.name)
        return output          

## Test InceptionModule

In [4]:
inception_module = InceptionModule(
    filters_1x1=64,
    filters_3x3_reduce=96,
    filters_3x3=128,
    filters_5x5_reduce=16,
    filters_5x5=32,
    filters_pool_proj=32,
    name="inception_block_1"
)

input_tensor = keras.Input(shape=(224, 224, 3))

output_tensor = inception_module(input_tensor)
print(output_tensor)

<KerasTensor shape=(None, 224, 224, 256), dtype=float32, sparse=False, name=keras_tensor_2>


In [5]:
inception_module.summary()

## Google Net architecture

In [6]:
class GoogLeNet(keras.Model):
    def __init__(self, num_classes):
        super(GoogLeNet, self).__init__()
        
        
        self.conv_1_7x7 = Conv2D(64, (7,7),   padding="same", strides=(2,2), activation="relu")
        self.max_pool_1 = MaxPool2D((3,3),    padding="same", strides=(2,2))
        self.conv_2_1x1 = Conv2D(64, (1,1),   padding="same", strides=(1,1), activation="relu")
        self.conv_3_3x3 = Conv2D(192, (3,3),  padding="same", strides=(1,1), activation="relu")
        self.max_pool_2 = MaxPool2D((3,3),    padding="same", strides=(2,2))
        
        self.inception_1 = InceptionModule(filters_1x1=64, filters_3x3_reduce=96, filters_3x3=128,filters_5x5_reduce=16, filters_5x5=32,filters_pool_proj=32, name="inception_3a")
        self.inception_2 = InceptionModule(filters_1x1=128,filters_3x3_reduce=128, filters_3x3=192,filters_5x5_reduce=32,  filters_5x5=96,filters_pool_proj=64,  name="inception_3b")
        
        self.max_pool_3 = MaxPool2D((3,3), padding="same", strides = (2,2))
        
        self.inception_3 = InceptionModule(filters_1x1=192,filters_3x3_reduce=96, filters_3x3=208,filters_5x5_reduce=16,filters_5x5=48,filters_pool_proj=64,  name="inception_4a")
        
        self.flatten =  Flatten()
        
        # Output 1
        self.x1_avg = AveragePooling2D((5,5), strides=3)
        self.x1_conv = Conv2D(128, (1,1), padding="same", activation="relu")
        self.x1_dense1 = Dense(1024, activation="relu")
        self.x1_drop = Dropout(0.7)
        self.x1_out = Dense(num_classes, activation="softmax")
        
        self.inception_4 = InceptionModule(filters_1x1=160,filters_3x3_reduce=112, filters_3x3=224,filters_5x5_reduce=24,filters_5x5=64, filters_pool_proj=64,name="inception_4b")
        self.inception_5 = InceptionModule(filters_1x1=128,filters_3x3_reduce=128, filters_3x3=256,filters_5x5_reduce=24,filters_5x5=64, filters_pool_proj=64,name="inception_4c")
        self.inception_6 = InceptionModule(filters_1x1=112,filters_3x3_reduce=144, filters_3x3=288,filters_5x5_reduce=32,filters_5x5=64, filters_pool_proj=64,  name="inception_4d")
        
        self.x2_avg = AveragePooling2D((5,5), strides=3)
        self.x2_conv = Conv2D(128,(1,1), padding="same", activation="relu")
        self.x2_dense1 = Dense(1024, activation="relu")
        self.x2_drop = Dropout(0.7)
        self.x2_out = Dense(num_classes, activation="softmax")
        
        self.inception_7 = InceptionModule(filters_1x1=256,filters_3x3_reduce=160, filters_3x3=320,filters_5x5_reduce=32,filters_5x5=128,  filters_pool_proj=128,  name="inception_4e")
        
        self.max_pool_4 = MaxPool2D((3,3), padding="same", strides=(2,2))
        
        self.inception_8 = InceptionModule(filters_1x1=256,filters_3x3_reduce=160, filters_3x3=320,filters_5x5_reduce=32,filters_5x5=128,  filters_pool_proj=128,  name="inception_5a")
        self.inception_9 = InceptionModule(filters_1x1=384,filters_3x3_reduce=192, filters_3x3=384,filters_5x5_reduce=48,filters_5x5=128, filters_pool_proj=128, name="inception_5b")
        
        self.x3_avg = GlobalAveragePooling2D()
        self.x3_drop = Dropout(0.3)
        self.x3_out = Dense(num_classes, activation="softmax")
        
        self.avg_out = Average()
        
    def call(self, X):
            
        X = self.conv_1_7x7(X)
        X = self.max_pool_1(X)
        X = self.conv_2_1x1(X)
        X = self.conv_3_3x3(X)
        X = self.max_pool_2(X)
        
        X = self.inception_1(X)
        X = self.inception_2(X)
        X = self.max_pool_3(X)
        X = self.inception_3(X)
        
        X1 = self.x1_avg(X)
        X1 = self.x1_conv(X1)
        X1 = self.flatten(X1)
        X1 = self.x1_dense1(X1)
        X1 = self.x1_drop(X1)
        X1 = self.x1_out(X1)
        
        X = self.inception_4(X)
        X = self.inception_5(X)
        X = self.inception_6(X)
        
        X2 = self.x2_avg(X)
        X2 = self.x2_conv(X2)
        X2 = self.flatten(X2)
        X2 = self.x2_dense1(X2)
        X2 = self.x2_drop(X2)
        X2 = self.x2_out(X2)
                                 
        X = self.inception_7(X)
        X = self.inception_8(X)
        X = self.max_pool_4(X)
        X = self.inception_9(X)
        
        X = self.x3_avg(X)
        X = self.x3_drop(X)
        X = self.x3_out(X)
        out = self.avg_out([X, X1, X2])
        return   out

## Test GoogLeNet

In [7]:
google_net = GoogLeNet(10)

input_tensor = keras.Input(shape=(224, 224, 3))

output_tensor = google_net(input_tensor)
print(output_tensor)

<KerasTensor shape=(None, 10), dtype=float32, sparse=False, name=keras_tensor_14>


In [8]:
google_net.summary()

## Train with cats_vs_dogs dataset

In [9]:
dataset = tfds.load('cats_vs_dogs', split='train[:70%]', as_supervised=True)

train_dataset = dataset.take(int(len(dataset) * 0.8))
test_dataset = dataset.skip(int(len(dataset) * 0.8))


def preprocess(image, label):
    image = tf.image.resize(image, [224, 224])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

train_dataset = train_dataset.map(preprocess).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = test_dataset.map(preprocess).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

[1mDownloading and preparing dataset 786.67 MiB (download: 786.67 MiB, generated: 1.04 GiB, total: 1.81 GiB) to /root/tensorflow_datasets/cats_vs_dogs/4.0.1...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/23262 [00:00<?, ? examples/s]

Corrupt JPEG data: 239 extraneous bytes before marker 0xd9
Corrupt JPEG data: 214 extraneous bytes before marker 0xd9
Corrupt JPEG data: 128 extraneous bytes before marker 0xd9
Corrupt JPEG data: 99 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1153 extraneous bytes before marker 0xd9
Corrupt JPEG data: 396 extraneous bytes before marker 0xd9
Corrupt JPEG data: 228 extraneous bytes before marker 0xd9
Corrupt JPEG data: 162 extraneous bytes before marker 0xd9
Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9
Corrupt JPEG data: 252 extraneous bytes before marker 0xd9
Corrupt JPEG data: 2226 extraneous bytes before marker 0xd9
Corrupt JPEG data: 65 extraneous bytes before marker 0xd9


Shuffling /root/tensorflow_datasets/cats_vs_dogs/4.0.1.incomplete43AZF5/cats_vs_dogs-train.tfrecord*...:   0%|…

[1mDataset cats_vs_dogs downloaded and prepared to /root/tensorflow_datasets/cats_vs_dogs/4.0.1. Subsequent calls will reuse this data.[0m


In [10]:
train_dataset = train_dataset.batch(batch_size=32)

In [11]:
num_classes = 2
input_shape = (224,224,3)

In [12]:
model = GoogLeNet(num_classes)

In [13]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [14]:
model.fit(train_dataset, epochs=10)

Epoch 1/10


I0000 00:00:1722934983.570086      70 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1722934983.629095      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1722934983.629839      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1722934983.630432      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 224ms/step - accuracy: 0.5061 - loss: 0.6936
Epoch 2/10


W0000 00:00:1722935074.915162      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1722935074.922194      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1722935074.931025      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 151ms/step - accuracy: 0.5076 - loss: 0.6931
Epoch 3/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 150ms/step - accuracy: 0.5076 - loss: 0.6931
Epoch 4/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 150ms/step - accuracy: 0.5076 - loss: 0.6931
Epoch 5/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 148ms/step - accuracy: 0.5078 - loss: 0.6931
Epoch 6/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 148ms/step - accuracy: 0.5079 - loss: 0.6931
Epoch 7/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 148ms/step - accuracy: 0.5080 - loss: 0.6931
Epoch 8/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 148ms/step - accuracy: 0.5080 - loss: 0.6931
Epoch 9/10
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 148ms/step - accuracy: 0.5080 - loss: 0.6931
Epoch 10/10
[1m408/408[0m

<keras.src.callbacks.history.History at 0x7fcea0475db0>

## With Pytorch

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [16]:
class InceptionModule(nn.Module):
    def __init__(self, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj):
        super(InceptionModule, self).__init__()
        
        self.conv_1x1 = nn.Conv2d(in_channels=filters_1x1, out_channels=filters_1x1, kernel_size=1, padding='same')
        self.conv_3x3_red = nn.Conv2d(in_channels=filters_3x3_reduce, out_channels=filters_3x3_reduce, kernel_size=1, padding='same')
        self.conv_3x3 = nn.Conv2d(in_channels=filters_3x3_reduce, out_channels=filters_3x3, kernel_size=3, padding='same')
        self.conv_5x5_red = nn.Conv2d(in_channels=filters_5x5_reduce, out_channels=filters_5x5_reduce, kernel_size=1, padding='same')
        self.conv_5x5 = nn.Conv2d(in_channels=filters_5x5_reduce, out_channels=filters_5x5, kernel_size=5, padding='same')
        self.pool_proj = nn.Conv2d(in_channels=filters_pool_proj, out_channels=filters_pool_proj, kernel_size=1, padding='same')
        
    def forward(self, x):
        conv_1 = F.relu(self.conv_1x1(x))
        conv_3_red = F.relu(self.conv_3x3_red(x))
        conv_3 = F.relu(self.conv_3x3(conv_3_red))
        conv_5_red = F.relu(self.conv_5x5_red(x))
        conv_5 = F.relu(self.conv_5x5(conv_5_red))
        pool_red = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
        pool = F.relu(self.pool_proj(pool_red))
        
        output = torch.cat([conv_1, conv_3, conv_5, pool], dim=1)
        return output

In [17]:
class GoogLeNet(nn.Module):
    def __init__(self, num_classes):
        super(GoogLeNet, self).__init__()
        
        self.conv_1_7x7 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding='same')
        self.max_pool_1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv_2_1x1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding='same')
        self.conv_3_3x3 = nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding='same')
        self.max_pool_2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception_3a = InceptionModule(filters_1x1=64, filters_3x3_reduce=96, filters_3x3=128, filters_5x5_reduce=16, filters_5x5=32, filters_pool_proj=32)
        self.inception_3b = InceptionModule(filters_1x1=128, filters_3x3_reduce=128, filters_3x3=192, filters_5x5_reduce=32, filters_5x5=96, filters_pool_proj=64)
        
        self.max_pool_3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception_4a = InceptionModule(filters_1x1=192, filters_3x3_reduce=96, filters_3x3=208, filters_5x5_reduce=16, filters_5x5=48, filters_pool_proj=64)
        
        # Output 1
        self.x1_avg = nn.AvgPool2d(kernel_size=5, stride=3)
        self.x1_conv = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, padding='same')
        self.x1_fc1 = nn.Linear(in_features=128*4*4, out_features=1024)
        self.x1_drop = nn.Dropout(p=0.7)
        self.x1_out = nn.Linear(in_features=1024, out_features=num_classes)
        
        self.inception_4b = InceptionModule(filters_1x1=160, filters_3x3_reduce=112, filters_3x3=224, filters_5x5_reduce=24, filters_5x5=64, filters_pool_proj=64)
        self.inception_4c = InceptionModule(filters_1x1=128, filters_3x3_reduce=128, filters_3x3=256, filters_5x5_reduce=24, filters_5x5=64, filters_pool_proj=64)
        self.inception_4d = InceptionModule(filters_1x1=112, filters_3x3_reduce=144, filters_3x3=288, filters_5x5_reduce=32, filters_5x5=64, filters_pool_proj=64)
        
        # Output 2
        self.x2_avg = nn.AvgPool2d(kernel_size=5, stride=3)
        self.x2_conv = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1, padding='same')
        self.x2_fc1 = nn.Linear(in_features=128*4*4, out_features=1024)
        self.x2_drop = nn.Dropout(p=0.7)
        self.x2_out = nn.Linear(in_features=1024, out_features=num_classes)
        
        self.inception_4e = InceptionModule(filters_1x1=256, filters_3x3_reduce=160, filters_3x3=320, filters_5x5_reduce=32, filters_5x5=128, filters_pool_proj=128)
        
        self.max_pool_4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.inception_5a = InceptionModule(filters_1x1=256, filters_3x3_reduce=160, filters_3x3=320, filters_5x5_reduce=32, filters_5x5=128, filters_pool_proj=128)
        self.inception_5b = InceptionModule(filters_1x1=384, filters_3x3_reduce=192, filters_3x3=384, filters_5x5_reduce=48, filters_5x5=128, filters_pool_proj=128)
        
        # Output 3
        self.x3_avg = nn.AdaptiveAvgPool2d((1,1))
        self.x3_drop = nn.Dropout(p=0.3)
        self.x3_out = nn.Linear(in_features=1024, out_features=num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv_1_7x7(x))
        x = self.max_pool_1(x)
        x = F.relu(self.conv_2_1x1(x))
        x = F.relu(self.conv_3_3x3(x))
        x = self.max_pool_2(x)
        
        x = self.inception_3a(x)
        x = self.inception_3b(x)
        x = self.max_pool_3(x)
        x = self.inception_4a(x)
        
        x1 = self.x1_avg(x)
        x1 = F.relu(self.x1_conv(x1))
        x1 = torch.flatten(x1, 1)
        x1 = F.relu(self.x1_fc1(x1))
        x1 = self.x1_drop(x1)
        x1 = self.x1_out(x1)
        
        x = self.inception_4b(x)
        x = self.inception_4c(x)
        x = self.inception_4d(x)
        
        x2 = self.x2_avg(x)
        x2 = F.relu(self.x2_conv(x2))
        x2 = torch.flatten(x2, 1)
        x2 = F.relu(self.x2_fc1(x2))
        x2 = self.x2_drop(x2)
        x2 = self.x2_out(x2)
        
        x = self.inception_4e(x)
        x = self.max_pool_4(x)
        x = self.inception_5a(x)
        x = self.inception_5b(x)
        
        x3 = self.x3_avg(x)
        x3 = torch.flatten(x3, 1)
        x3 = self.x3_drop(x3)
        x3 = self.x3_out(x3)
        
        out = (x1 + x2 + x3) / 3
        return out