<a href="https://colab.research.google.com/github/R4HUL-ROY/Multimodal_Results/blob/main/VGG16_Stroring_result_in_CSV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import pathlib
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model, load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras.callbacks import ModelCheckpoint

In [2]:
data_root = pathlib.Path('/content/drive/MyDrive/Tobacco3482-jpg/')

print(data_root)
for item in data_root.iterdir():
  print(item)

/content/drive/MyDrive/Tobacco3482-jpg
/content/drive/MyDrive/Tobacco3482-jpg/Tobacco3482-jpg
/content/drive/MyDrive/Tobacco3482-jpg/ADVE
/content/drive/MyDrive/Tobacco3482-jpg/Form
/content/drive/MyDrive/Tobacco3482-jpg/Note
/content/drive/MyDrive/Tobacco3482-jpg/Email
/content/drive/MyDrive/Tobacco3482-jpg/News
/content/drive/MyDrive/Tobacco3482-jpg/Resume
/content/drive/MyDrive/Tobacco3482-jpg/Scientific
/content/drive/MyDrive/Tobacco3482-jpg/Memo
/content/drive/MyDrive/Tobacco3482-jpg/Report
/content/drive/MyDrive/Tobacco3482-jpg/Letter


In [3]:
def get_file_paths_and_labels(data_root):
     img_paths = [str(path) for path in data_root.glob('*/*.jpg')]
     labels = [p.split("/")[-2] for p in img_paths]
     return img_paths, labels

img_paths, labels = get_file_paths_and_labels(data_root)
# print(img_paths)
# print(labels)
print(len(img_paths))
print(len(labels))

3482
3482


In [4]:
df = pd.DataFrame(list(zip(img_paths, labels)),
               columns =['image_path', 'data_label'])
df.head()

Unnamed: 0,image_path,data_label
0,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
1,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
2,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
3,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,ADVE
4,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/03...,ADVE


In [5]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
df['data_label']= le.fit_transform(df['data_label'])
df.head()

Unnamed: 0,image_path,data_label
0,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,0
1,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,0
2,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,0
3,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/00...,0
4,/content/drive/MyDrive/Tobacco3482-jpg/ADVE/03...,0


In [14]:
df['data_label'] = [str(i) for i in df['data_label']]

In [15]:
labels_dict = {}
for idx, this_path in enumerate(df['image_path']):
    labels_dict[this_path.split("/")[-2]] = df['data_label'][idx]
    
print(labels_dict)

{'ADVE': '0', 'Form': '2', 'Note': '6', 'Email': '1', 'News': '5', 'Resume': '8', 'Scientific': '9', 'Memo': '4', 'Report': '7', 'Letter': '3'}


In [16]:
balance=df['data_label'].value_counts()
print (balance)

4    620
1    599
3    567
2    431
7    265
9    261
0    230
6    201
5    188
8    120
Name: data_label, dtype: int64


In [17]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import random
random.seed(56)

In [18]:
batch_size=32
epochs = 2
fine_tune_epochs = 2
def scalar(x):
    return x/127.5-1

d = {
    "precision" : [],
     "recall" : [],
     "f1_score" : [],
     "accuracy" : [],
     "seed_value" : []
}
seed = []


for i in range(2):
    print('#Run: ', i+1)
    sd = random.randint(0, 100)
    seed.append(sd)

    train_df, dummy_df=train_test_split(df, test_size=0.3, shuffle=True, random_state = sd)
    test_df, valid_df= train_test_split(dummy_df, test_size=0.5, shuffle=True, random_state = sd)
    length=len(test_df)

    ######## Keras datagenerator #########
    trgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar)
    train_gen=trgen.flow_from_dataframe(train_df, x_col='image_path', y_col='data_label', target_size=(224,224), 
                                        class_mode='categorical',batch_size=batch_size, shuffle=True, seed=sd)

    tvgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar)
    valid_gen=tvgen.flow_from_dataframe(valid_df, x_col='image_path', y_col='data_label', target_size=(224,224), 
                                        class_mode='categorical',batch_size=batch_size, shuffle=False, seed=sd)

    # determine test generator batch size and steps to go through the test set exactly once for predictions
    test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=batch_size],reverse=True)[0]  
    test_steps=int(length/test_batch_size)

    ttgen=tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=scalar)
    test_gen=ttgen.flow_from_dataframe(test_df, x_col='image_path', y_col='data_label', target_size=(224,224), 
                                    class_mode='categorical',batch_size=test_batch_size, shuffle=False, seed=sd)

    test_labels=test_gen.labels

    img_shape=(224,224,3)
    class_count = 10
    dropout=0.2
    lr= 0.001

    base_model= VGG16(include_top=False,
                   input_shape=(224,224,3),
                   pooling='avg',classes=10,
                   weights='imagenet')

    base_model.trainable= False

    inputs = keras.Input(shape=img_shape)
    x = base_model(inputs, training=False)

    # x=tf.keras.layers.Dropout(rate=dropout, seed=seed_value)(x)
    x =tf.keras.layers.Dense(512, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006), 
                            bias_regularizer=regularizers.l1(0.006) ,activation='relu', kernel_initializer= tf.keras.initializers.GlorotUniform(seed=sd))(x)
    # x=tf.keras.layers.Dropout(rate=dropout, seed=seed_value)(x)
    outputs=tf.keras.layers.Dense(class_count, activation='softmax',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=sd))(x)
    model=Model(inputs, outputs)

    model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy']) 

    checkpoint = ModelCheckpoint("/model-{epoch:03d}-{accuracy:03f}-{val_accuracy:03f}.h5", verbose=1, monitor='val_loss',save_best_only=True, mode='auto') 

    history=model.fit(x=train_gen,  epochs=epochs, verbose=2, validation_data=valid_gen, callbacks=[checkpoint],
                validation_steps=None,  shuffle=False,  initial_epoch=0)
    

    ####### Fine-Tune ######
    base_model.trainable=True
    tune_epochs = fine_tune_epochs
    total_epochs= epochs + tune_epochs

    model.compile(Adamax(learning_rate=lr/100), loss='categorical_crossentropy', metrics=['accuracy'])
    
    history_fine = model.fit(x=train_gen,  epochs=total_epochs, verbose=2, validation_data=valid_gen,
               validation_steps=None,  shuffle=False,  initial_epoch=epochs)
    

    y_pred = model.predict(test_gen)
    y_pred = np.argmax(y_pred, axis=1)

    y_true = test_gen.classes

    d["precision"].append(float(precision_score(y_true, y_pred, average='weighted')))
    d["recall"].append(float(recall_score(y_true, y_pred, average='weighted')))
    d["f1_score"].append(float(f1_score(y_true, y_pred, average='weighted')))
    d['accuracy'].append(float(accuracy_score(y_true, y_pred)))

#Run:  1
Found 2437 validated image filenames belonging to 10 classes.
Found 523 validated image filenames belonging to 10 classes.
Found 522 validated image filenames belonging to 10 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/2

Epoch 1: val_loss improved from inf to 2.97558, saving model to /model-001-0.360279-0.481836.h5
77/77 - 964s - loss: 5.4579 - accuracy: 0.3603 - val_loss: 2.9756 - val_accuracy: 0.4818 - 964s/epoch - 13s/step
Epoch 2/2

Epoch 2: val_loss improved from 2.97558 to 2.22831, saving model to /model-002-0.480098-0.508604.h5
77/77 - 77s - loss: 2.5034 - accuracy: 0.4801 - val_loss: 2.2283 - val_accuracy: 0.5086 - 77s/epoch - 1s/step
Epoch 3/4
77/77 - 91s - loss: 2.0713 - accuracy: 0.5433 - val_loss: 1.9890 - val_accuracy: 0.5851 - 91s/epoch - 1s/step
Epoch 4/4
77/77 - 84s - loss: 1.9364 - accuracy: 0.6016 - val_loss: 1.9044 - val_accuracy: 0.6310 - 84s/epo

  _warn_prf(average, modifier, msg_start, len(result))


#Run:  2
Found 2437 validated image filenames belonging to 10 classes.
Found 523 validated image filenames belonging to 10 classes.
Found 522 validated image filenames belonging to 10 classes.
Epoch 1/2

Epoch 1: val_loss improved from inf to 2.96767, saving model to /model-001-0.374641-0.500956.h5
77/77 - 77s - loss: 5.4411 - accuracy: 0.3746 - val_loss: 2.9677 - val_accuracy: 0.5010 - 77s/epoch - 998ms/step
Epoch 2/2

Epoch 2: val_loss improved from 2.96767 to 2.18337, saving model to /model-002-0.516619-0.518164.h5
77/77 - 74s - loss: 2.4966 - accuracy: 0.5166 - val_loss: 2.1834 - val_accuracy: 0.5182 - 74s/epoch - 966ms/step
Epoch 3/4
77/77 - 85s - loss: 2.0367 - accuracy: 0.5659 - val_loss: 1.9172 - val_accuracy: 0.6214 - 85s/epoch - 1s/step
Epoch 4/4
77/77 - 84s - loss: 1.9062 - accuracy: 0.6262 - val_loss: 1.8417 - val_accuracy: 0.6272 - 84s/epoch - 1s/step


  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
from google.colab import files
print(seed)
d['seed_value'] = seed
result = pd.DataFrame(d, index = ["run-"+str(i+1) for i in range(2)])
print(result)
result.to_csv("VGG16_result.csv")

# files.download('VGG16_result.csv')

[71, 1]
       precision    recall  f1_score  accuracy  seed_value
run-1   0.576105  0.616858  0.564930  0.616858          71
run-2   0.537816  0.593870  0.529264  0.593870           1
