<a href="https://colab.research.google.com/github/R4HUL-ROY/Multimodal_feature_extraction/blob/main/MobileNetV2_transferLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import pathlib
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model, load_model
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import seaborn as sns
sns.set_style('darkgrid')
from sklearn.metrics import confusion_matrix, classification_report
from IPython.core.display import display, HTML

In [2]:
data_root = pathlib.Path('/content/drive/MyDrive/Tobacco3482-jpg/')

print(data_root)
for item in data_root.iterdir():
  print(item)

/content/drive/MyDrive/Tobacco3482-jpg
/content/drive/MyDrive/Tobacco3482-jpg/Tobacco3482-jpg
/content/drive/MyDrive/Tobacco3482-jpg/ADVE
/content/drive/MyDrive/Tobacco3482-jpg/Form
/content/drive/MyDrive/Tobacco3482-jpg/Note
/content/drive/MyDrive/Tobacco3482-jpg/Email
/content/drive/MyDrive/Tobacco3482-jpg/News
/content/drive/MyDrive/Tobacco3482-jpg/Resume
/content/drive/MyDrive/Tobacco3482-jpg/Scientific
/content/drive/MyDrive/Tobacco3482-jpg/Memo
/content/drive/MyDrive/Tobacco3482-jpg/Report
/content/drive/MyDrive/Tobacco3482-jpg/Letter


In [3]:
def get_file_paths_and_labels(data_root):
     img_paths = [str(path) for path in data_root.glob('*/*.jpg')]
     labels = [p.split("/")[-2] for p in img_paths]
     return img_paths, labels

img_paths, labels = get_file_paths_and_labels(data_root)
print(img_paths)
print(labels)
print(len(img_paths))
print(len(labels))

['/content/drive/MyDrive/Tobacco3482-jpg/ADVE/0000136188.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/0000435350.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/0030049569.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/0000556056.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/03496270.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/0030048095.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/03722789.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/03567810.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/0030048989.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/04412344.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/04233037_04233039.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/04102204.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/04106546.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/1002760819.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/1002761179.jpg', '/content/drive/MyDrive/Tobacco3482-jpg/ADVE/1002325458.jpg', '/content/dr

In [4]:
df = pd.DataFrame(list(zip(img_paths, labels)),
               columns =['image_path', 'data_label'])
df.head()

Unnamed: 0,image_path,data_label
0,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
1,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
2,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
3,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
4,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/03...,ADVE


In [5]:
balance=df['data_label'].value_counts()
print (balance)

Memo          620
Email         599
Letter        567
Form          431
Report        265
Scientific    261
ADVE          230
Note          201
News          188
Resume        120
Name: data_label, dtype: int64


In [6]:
train_df, dummy_df=train_test_split(df, test_size=0.2, shuffle=True, random_state=42)
test_df, valid_df= train_test_split(dummy_df, test_size=0.5, shuffle=True, random_state=42)
print (f"train size: {len(train_df)} test size: {len(test_df)}  valid size: {len(valid_df)}")
length=len(test_df)

train size: 2785 test size: 348  valid size: 349


In [None]:
# filepaths = df['image_path']
# labels = df['data_label']

In [8]:
batch_size=40
def scalar(x):
    return x/127.5-1 # rescales pixels to range -1 to +1

trgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar, horizontal_flip=True)

train_gen=trgen.flow_from_dataframe(train_df, x_col='image_path', y_col='data_label', target_size=(224,224), class_mode='categorical',batch_size=batch_size, shuffle=True, seed=42)

tvgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar)

valid_gen=tvgen.flow_from_dataframe(valid_df, x_col='image_path', y_col='data_label', target_size=(224,224), class_mode='categorical',batch_size=batch_size, shuffle=False)

# determine test generator batch size and steps to go through the test set exactly once for predictions
test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=batch_size],reverse=True)[0]  

test_steps=int(length/test_batch_size)

test_gen=tvgen.flow_from_dataframe(test_df, x_col='image_path', y_col='data_label', target_size=(224,224), class_mode='categorical',batch_size=test_batch_size, shuffle=False)

test_labels=test_gen.labels

Found 2785 validated image filenames belonging to 10 classes.
Found 349 validated image filenames belonging to 10 classes.
Found 348 validated image filenames belonging to 10 classes.


In [9]:
img_shape=(224,224,3)
class_count = len(set(df['data_label']))
neurons=1024
dropout=.3
lr= 0.001
freeze=True


base_model=tf.keras.applications.MobileNetV2( include_top=False, input_shape=img_shape, pooling='max', weights='imagenet') 
if freeze:
    base_model.trainable=False

x=base_model.output
x=tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99 , epsilon=0.001 )(x)
x =tf.keras.layers.Dense(neurons, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006), bias_regularizer=regularizers.l1(0.006) ,activation='relu', kernel_initializer= tf.keras.initializers.HeUniform(seed=42))(x)

x=tf.keras.layers.Dropout(rate=dropout, seed=42)(x)

x =tf.keras.layers.Dense(128, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006), bias_regularizer=regularizers.l1(0.006) ,activation='relu', kernel_initializer= tf.keras.initializers.HeUniform(seed=42))(x)

x=tf.keras.layers.Dropout(rate=dropout, seed=42)(x)

output=tf.keras.layers.Dense(class_count, activation='softmax',kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)

model=Model(inputs=base_model.input, outputs=output)

model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy']) 

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [10]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                             

In [11]:
history=model.fit(x=train_gen,  epochs=20, verbose=2, validation_data=valid_gen,
               validation_steps=None,  shuffle=False,  initial_epoch=0)

Epoch 1/20
70/70 - 913s - loss: 35.1224 - accuracy: 0.4923 - val_loss: 30.0730 - val_accuracy: 0.5244 - 913s/epoch - 13s/step
Epoch 2/20
70/70 - 95s - loss: 24.8815 - accuracy: 0.6244 - val_loss: 21.3175 - val_accuracy: 0.5989 - 95s/epoch - 1s/step
Epoch 3/20
70/70 - 96s - loss: 18.1090 - accuracy: 0.6765 - val_loss: 15.5523 - val_accuracy: 0.6676 - 96s/epoch - 1s/step
Epoch 4/20
70/70 - 97s - loss: 13.3314 - accuracy: 0.7178 - val_loss: 11.5161 - val_accuracy: 0.6877 - 97s/epoch - 1s/step
Epoch 5/20
70/70 - 95s - loss: 9.9088 - accuracy: 0.7508 - val_loss: 8.6935 - val_accuracy: 0.7163 - 95s/epoch - 1s/step
Epoch 6/20
70/70 - 111s - loss: 7.5099 - accuracy: 0.7655 - val_loss: 6.7118 - val_accuracy: 0.7106 - 111s/epoch - 2s/step
Epoch 7/20
70/70 - 99s - loss: 5.8440 - accuracy: 0.7713 - val_loss: 5.3257 - val_accuracy: 0.7135 - 99s/epoch - 1s/step
Epoch 8/20
70/70 - 96s - loss: 4.6547 - accuracy: 0.7878 - val_loss: 4.3399 - val_accuracy: 0.7192 - 96s/epoch - 1s/step
Epoch 9/20
70/70 - 

In [12]:
epochs = 20
base_model.trainable=True
fine_tune_epochs=10
total_epochs=epochs + fine_tune_epochs
history=model.fit(x=train_gen,  epochs=total_epochs, verbose=2, validation_data=valid_gen,
               validation_steps=None,  shuffle=False,  initial_epoch=epochs)

Epoch 21/30
70/70 - 97s - loss: 1.2265 - accuracy: 0.8765 - val_loss: 1.5647 - val_accuracy: 0.7135 - 97s/epoch - 1s/step
Epoch 22/30
70/70 - 95s - loss: 1.1897 - accuracy: 0.8697 - val_loss: 1.5234 - val_accuracy: 0.7163 - 95s/epoch - 1s/step
Epoch 23/30
70/70 - 95s - loss: 1.1425 - accuracy: 0.8794 - val_loss: 1.4896 - val_accuracy: 0.7393 - 95s/epoch - 1s/step
Epoch 24/30
70/70 - 96s - loss: 1.1175 - accuracy: 0.8794 - val_loss: 1.4795 - val_accuracy: 0.7364 - 96s/epoch - 1s/step
Epoch 25/30
70/70 - 95s - loss: 1.0918 - accuracy: 0.8844 - val_loss: 1.4653 - val_accuracy: 0.7278 - 95s/epoch - 1s/step
Epoch 26/30
70/70 - 95s - loss: 1.0572 - accuracy: 0.8937 - val_loss: 1.4571 - val_accuracy: 0.7278 - 95s/epoch - 1s/step
Epoch 27/30
70/70 - 97s - loss: 1.0319 - accuracy: 0.8969 - val_loss: 1.4616 - val_accuracy: 0.7307 - 97s/epoch - 1s/step
Epoch 28/30
70/70 - 96s - loss: 1.0110 - accuracy: 0.8980 - val_loss: 1.4128 - val_accuracy: 0.7393 - 96s/epoch - 1s/step
Epoch 29/30
70/70 - 96s 

In [13]:
model.evaluate( test_gen, batch_size=test_batch_size, verbose=1, steps=test_steps, return_dict=True)



{'accuracy': 0.732758641242981, 'loss': 1.3931958675384521}