<a href="https://colab.research.google.com/github/PIYALI-bhunia/MultimodelDataPreprocessing/blob/main/Resnet_50_Storing_Result_in_CSV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import necessary modules

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import pathlib
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model, load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##Load the dataset

In [3]:
data_root = pathlib.Path('/content/drive/MyDrive/Tobacco3482-jpg')

print(data_root)
for item in data_root.iterdir():
  print(item)

/content/drive/MyDrive/Tobacco3482-jpg
/content/drive/MyDrive/Tobacco3482-jpg/Letter
/content/drive/MyDrive/Tobacco3482-jpg/Tobacco3482-jpg
/content/drive/MyDrive/Tobacco3482-jpg/Scientific
/content/drive/MyDrive/Tobacco3482-jpg/Memo
/content/drive/MyDrive/Tobacco3482-jpg/Note
/content/drive/MyDrive/Tobacco3482-jpg/Report
/content/drive/MyDrive/Tobacco3482-jpg/News
/content/drive/MyDrive/Tobacco3482-jpg/Email
/content/drive/MyDrive/Tobacco3482-jpg/Form
/content/drive/MyDrive/Tobacco3482-jpg/Resume
/content/drive/MyDrive/Tobacco3482-jpg/ADVE


In [4]:
def get_file_paths_and_labels(data_root):
     img_paths = [str(path) for path in data_root.glob('*/*.jpg')]
     labels = [p.split("/")[-2] for p in img_paths]
     return img_paths, labels

img_paths, labels = get_file_paths_and_labels(data_root)
# print(img_paths)
# print(labels)
print(len(img_paths))
print(len(labels))

3482
3482


##Create the dataframe

In [5]:
df = pd.DataFrame(list(zip(img_paths, labels)),
               columns =['image_path', 'data_label'])
df.head()

Unnamed: 0,image_path,data_label
0,/content/drive/MyDrive/Tobacco3482-jpg/Letter/...,Letter
1,/content/drive/MyDrive/Tobacco3482-jpg/Letter/...,Letter
2,/content/drive/MyDrive/Tobacco3482-jpg/Letter/...,Letter
3,/content/drive/MyDrive/Tobacco3482-jpg/Letter/...,Letter
4,/content/drive/MyDrive/Tobacco3482-jpg/Letter/...,Letter


In [6]:
balance=df['data_label'].value_counts()
print (balance)

Memo          620
Email         599
Letter        567
Form          431
Report        265
Scientific    261
ADVE          230
Note          201
News          188
Resume        120
Name: data_label, dtype: int64


##Split the dataframe into train , test and validation

In [7]:
train_df, dummy_df=train_test_split(df, test_size=0.3, shuffle=True,random_state=0)
test_df, valid_df= train_test_split(dummy_df, test_size=0.5, shuffle=True, random_state=0)
print (f"train size: {len(train_df)} test size: {len(test_df)}  valid size: {len(valid_df)}")
length=len(test_df)

train size: 2437 test size: 522  valid size: 523


##Image data generator

In [8]:
batch_size=32
seed_value = 42
def scalar(x):
    return x/127.5-1 # rescales pixels to range -1 to +1

trgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar)

train_gen=trgen.flow_from_dataframe(train_df, x_col='image_path', y_col='data_label', target_size=(224,224), class_mode='categorical',batch_size=batch_size, shuffle=True, seed=seed_value)

tvgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar)

valid_gen=tvgen.flow_from_dataframe(valid_df, x_col='image_path', y_col='data_label', target_size=(224,224), class_mode='categorical',batch_size=batch_size, shuffle=False, seed=seed_value)

# determine test generator batch size and steps to go through the test set exactly once for predictions
test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=batch_size],reverse=True)[0]  

test_steps=int(length/test_batch_size)

test_gen=tvgen.flow_from_dataframe(test_df, x_col='image_path', y_col='data_label', target_size=(224,224), class_mode='categorical',batch_size=test_batch_size, shuffle=False, seed=seed_value)

test_labels=test_gen.labels

Found 2437 validated image filenames belonging to 10 classes.
Found 523 validated image filenames belonging to 10 classes.
Found 522 validated image filenames belonging to 10 classes.


##Transfer learning model creation using ResNet-50 as base model

In [9]:
img_shape=(224,224,3)
class_count = len(set(df['data_label']))
dropout=0.2
lr= 0.001
seed_value = 42


base_model= tf.keras.applications.ResNet50(include_top=False,
                   input_shape=(224,224,3),
                   pooling='avg',classes=10,
                   weights='imagenet')

base_model.trainable=False

inputs = keras.Input(shape=img_shape)
x = base_model(inputs, training=False)

# x=tf.keras.layers.Dropout(rate=dropout, seed=seed_value)(x)
x =tf.keras.layers.Dense(2048, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006), bias_regularizer=regularizers.l1(0.006) ,activation='relu', kernel_initializer= tf.keras.initializers.GlorotUniform(seed=seed_value))(x)
# x=tf.keras.layers.Dropout(rate=dropout, seed=seed_value)(x)
outputs=tf.keras.layers.Dense(class_count, activation='softmax',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=seed_value))(x)
model=Model(inputs, outputs)

model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 dense (Dense)               (None, 2048)              4196352   
                                                                 
 dense_1 (Dense)             (None, 10)                20490     
                                                                 
Total params: 27,804,554
Trainable params: 4,216,842
Non-trainable params: 23,587,712
_________________________________________________________________


In [11]:
y_true=test_gen.classes
print(y_true)

[1, 1, 3, 4, 5, 3, 4, 4, 1, 1, 0, 1, 6, 7, 3, 1, 3, 2, 1, 1, 9, 2, 1, 4, 4, 0, 2, 1, 4, 7, 0, 0, 7, 4, 1, 3, 4, 1, 3, 1, 4, 9, 1, 0, 4, 2, 2, 0, 3, 3, 1, 3, 3, 4, 4, 5, 6, 2, 3, 7, 0, 0, 2, 1, 4, 4, 3, 3, 9, 7, 2, 9, 4, 8, 3, 1, 7, 5, 4, 1, 4, 1, 0, 1, 3, 8, 6, 3, 4, 0, 9, 6, 2, 0, 4, 1, 3, 6, 6, 3, 0, 1, 4, 9, 9, 2, 7, 2, 2, 1, 0, 6, 4, 3, 1, 9, 5, 1, 5, 1, 0, 6, 3, 5, 0, 4, 9, 5, 5, 4, 6, 6, 7, 4, 3, 5, 2, 4, 4, 9, 4, 4, 8, 2, 4, 0, 1, 1, 1, 3, 1, 2, 1, 3, 0, 2, 4, 4, 4, 6, 2, 3, 4, 4, 6, 4, 1, 9, 1, 3, 4, 9, 1, 4, 1, 4, 2, 3, 9, 2, 3, 9, 5, 8, 3, 2, 0, 0, 2, 3, 6, 9, 4, 4, 9, 2, 3, 2, 2, 6, 1, 4, 2, 9, 9, 8, 3, 2, 1, 3, 0, 1, 3, 7, 4, 8, 3, 9, 5, 2, 1, 0, 4, 3, 4, 3, 1, 3, 4, 6, 4, 3, 7, 6, 3, 3, 2, 2, 6, 4, 4, 4, 2, 4, 4, 6, 1, 5, 1, 1, 3, 8, 7, 2, 3, 1, 3, 6, 1, 4, 3, 7, 4, 4, 9, 5, 9, 4, 3, 3, 6, 1, 2, 4, 0, 2, 7, 4, 7, 3, 4, 3, 4, 5, 2, 6, 9, 2, 2, 4, 1, 4, 3, 1, 7, 8, 4, 1, 2, 7, 4, 6, 6, 0, 1, 1, 3, 1, 3, 1, 6, 4, 1, 4, 7, 7, 3, 5, 4, 4, 4, 1, 1, 4, 0, 2, 9, 3, 4, 6, 3, 1, 3, 

##Model Training keeping the base_model as non trainable

In [12]:
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint("/model-{epoch:03d}-{accuracy:03f}-{val_accuracy:03f}.h5", verbose=1, monitor='val_loss',save_best_only=True, mode='auto') 

history=model.fit(x=train_gen,  epochs=3, verbose=2, validation_data=valid_gen, callbacks=[checkpoint],
               validation_steps=None,  shuffle=False,  initial_epoch=0)


Epoch 1/3

Epoch 1: val_loss improved from inf to 2.86214, saving model to /model-001-0.382027-0.411090.h5
77/77 - 918s - loss: 8.9739 - accuracy: 0.3820 - val_loss: 2.8621 - val_accuracy: 0.4111 - 918s/epoch - 12s/step
Epoch 2/3

Epoch 2: val_loss improved from 2.86214 to 2.31078, saving model to /model-002-0.448913-0.491396.h5
77/77 - 80s - loss: 2.5092 - accuracy: 0.4489 - val_loss: 2.3108 - val_accuracy: 0.4914 - 80s/epoch - 1s/step
Epoch 3/3

Epoch 3: val_loss improved from 2.31078 to 2.09154, saving model to /model-003-0.491178-0.478011.h5
77/77 - 78s - loss: 2.1719 - accuracy: 0.4912 - val_loss: 2.0915 - val_accuracy: 0.4780 - 78s/epoch - 1s/step


In [13]:
epochs = 3
base_model.trainable=True
fine_tune_epochs= 2
total_epochs=epochs + fine_tune_epochs

model.compile(Adamax(learning_rate=lr/10), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 dense (Dense)               (None, 2048)              4196352   
                                                                 
 dense_1 (Dense)             (None, 10)                20490     
                                                                 
Total params: 27,804,554
Trainable params: 27,751,434
Non-trainable params: 53,120
_________________________________________________________________


In [14]:
history_fine = model.fit(x=train_gen,  epochs=total_epochs, verbose=2, validation_data=valid_gen,
               validation_steps=None,  shuffle=False,  initial_epoch=epochs)

Epoch 4/5
77/77 - 114s - loss: 2.4134 - accuracy: 0.3250 - val_loss: 2.0360 - val_accuracy: 0.4054 - 114s/epoch - 1s/step
Epoch 5/5
77/77 - 86s - loss: 1.9140 - accuracy: 0.4661 - val_loss: 1.8899 - val_accuracy: 0.4551 - 86s/epoch - 1s/step


In [15]:
model.evaluate( test_gen, batch_size=test_batch_size, verbose=1, steps=test_steps, return_dict=True)



{'accuracy': 0.4444444477558136, 'loss': 1.8917964696884155}

In [16]:
y_pred = model.predict(test_gen)
y_pred = np.argmax(y_pred, axis=1)
print(y_pred)


[1 1 3 3 5 3 3 1 1 1 0 1 1 3 3 1 3 4 1 4 4 2 1 3 3 0 4 1 3 3 0 0 3 3 1 3 3
 1 3 1 3 3 3 0 4 2 3 0 3 3 1 3 1 2 3 5 4 5 3 3 0 0 2 1 3 3 3 3 3 4 2 5 3 3
 3 1 9 0 1 1 1 1 3 1 4 3 1 3 3 0 5 1 3 0 4 1 3 6 4 3 0 1 3 5 1 5 3 0 2 3 0
 1 3 1 1 0 5 1 5 1 0 3 3 5 0 3 5 0 5 3 1 1 9 3 3 3 2 3 3 1 3 1 3 2 3 0 1 1
 1 3 1 4 1 3 0 3 3 3 9 1 3 3 3 3 5 3 3 3 1 3 3 1 1 1 1 3 3 3 3 5 3 9 0 3 1
 3 0 0 0 1 1 3 3 3 3 2 6 3 2 6 3 3 2 3 9 3 3 0 1 3 0 1 4 3 2 2 3 3 0 3 1 0
 3 1 4 3 3 3 3 6 3 3 3 1 1 3 2 3 0 1 3 1 0 3 3 4 1 1 1 1 3 3 4 3 1 1 3 1 1
 3 3 1 3 3 5 0 3 3 3 6 3 1 2 3 0 2 3 3 0 3 1 3 3 0 0 1 3 3 6 3 1 5 6 4 3 4
 3 1 3 4 2 3 6 0 1 1 3 1 3 1 1 4 1 3 3 9 3 5 3 3 1 1 1 3 0 0 3 6 3 3 3 1 3
 0 3 3 1 1 1 1 3 1 3 3 2 0 5 2 3 3 3 3 4 3 1 4 3 3 1 1 0 3 3 1 0 3 3 3 2 4
 3 1 3 1 3 3 3 3 3 9 3 3 3 2 3 3 0 1 1 3 3 5 3 1 3 3 1 3 3 3 1 3 3 1 0 3 1
 2 4 3 1 4 3 3 1 3 4 9 5 3 1 3 3 1 3 1 1 9 1 2 3 1 2 3 3 2 2 1 3 1 0 3 9 3
 3 4 9 1 3 3 3 0 1 0 0 3 3 3 9 3 3 3 2 3 1 3 3 1 2 1 3 3 1 3 1 5 3 0 5 3 3
 3 3 1 0 1 3 0 4 0 1 3 1 

In [17]:
y_true=test_gen.classes
print(y_true)

[1, 1, 3, 4, 5, 3, 4, 4, 1, 1, 0, 1, 6, 7, 3, 1, 3, 2, 1, 1, 9, 2, 1, 4, 4, 0, 2, 1, 4, 7, 0, 0, 7, 4, 1, 3, 4, 1, 3, 1, 4, 9, 1, 0, 4, 2, 2, 0, 3, 3, 1, 3, 3, 4, 4, 5, 6, 2, 3, 7, 0, 0, 2, 1, 4, 4, 3, 3, 9, 7, 2, 9, 4, 8, 3, 1, 7, 5, 4, 1, 4, 1, 0, 1, 3, 8, 6, 3, 4, 0, 9, 6, 2, 0, 4, 1, 3, 6, 6, 3, 0, 1, 4, 9, 9, 2, 7, 2, 2, 1, 0, 6, 4, 3, 1, 9, 5, 1, 5, 1, 0, 6, 3, 5, 0, 4, 9, 5, 5, 4, 6, 6, 7, 4, 3, 5, 2, 4, 4, 9, 4, 4, 8, 2, 4, 0, 1, 1, 1, 3, 1, 2, 1, 3, 0, 2, 4, 4, 4, 6, 2, 3, 4, 4, 6, 4, 1, 9, 1, 3, 4, 9, 1, 4, 1, 4, 2, 3, 9, 2, 3, 9, 5, 8, 3, 2, 0, 0, 2, 3, 6, 9, 4, 4, 9, 2, 3, 2, 2, 6, 1, 4, 2, 9, 9, 8, 3, 2, 1, 3, 0, 1, 3, 7, 4, 8, 3, 9, 5, 2, 1, 0, 4, 3, 4, 3, 1, 3, 4, 6, 4, 3, 7, 6, 3, 3, 2, 2, 6, 4, 4, 4, 2, 4, 4, 6, 1, 5, 1, 1, 3, 8, 7, 2, 3, 1, 3, 6, 1, 4, 3, 7, 4, 4, 9, 5, 9, 4, 3, 3, 6, 1, 2, 4, 0, 2, 7, 4, 7, 3, 4, 3, 4, 5, 2, 6, 9, 2, 2, 4, 1, 4, 3, 1, 7, 8, 4, 1, 2, 7, 4, 6, 6, 0, 1, 1, 3, 1, 3, 1, 6, 4, 1, 4, 7, 7, 3, 5, 4, 4, 4, 1, 1, 4, 0, 2, 9, 3, 4, 6, 3, 1, 3, 

In [19]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.62      0.94      0.75        36
           1       0.59      0.86      0.70        87
           2       0.88      0.41      0.55        69
           3       0.30      0.81      0.44        86
           4       0.14      0.04      0.06       101
           5       0.46      0.48      0.47        23
           6       0.44      0.12      0.20        32
           7       0.00      0.00      0.00        28
           8       0.00      0.00      0.00        18
           9       0.50      0.14      0.22        42

    accuracy                           0.44       522
   macro avg       0.39      0.38      0.34       522
weighted avg       0.42      0.44      0.38       522



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
