<a href="https://colab.research.google.com/github/ataSeyfi/comp4902/blob/main/vgg19_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import math
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import confusion_matrix, classification_report

from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
NUM_CLASSES = 3
CLASSES = ['COVID-19', 'Non-COVID', 'Normal']

IMAGE_SIZE = (224, 224)


In [None]:
from google.colab import drive
drive.mount('/content/drive')
!ls '/content/drive/My Drive'
%cd '/content/drive/My Drive'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 20220327_173301.jpg  'Colab Notebooks'   dataset   digitalTransformation.gslides   Quiz6.zip
/content/drive/My Drive


In [None]:

train_dir = '/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Train/'
test_dir = '/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Test/'
val_dir = '/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Val/'




In [None]:
train_dir

'/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Train/'

In [None]:
def create_dataframe_from_directory(data_dir):
    filenames, labels = [], []
    for folder_name in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, folder_name, 'images')
        list = [os.path.join(folder_name, 'images', filename)
                for filename in os.listdir(folder_path)]
        filenames.extend(list)
        labels.extend([folder_name] * len(list))
    return pd.DataFrame({
        'filename': filenames,
        'label': labels
    })


train_df = create_dataframe_from_directory(train_dir)
val_df = create_dataframe_from_directory(val_dir)
test_df = create_dataframe_from_directory(test_dir)

In [None]:
train_df

Unnamed: 0,filename,label
0,COVID-19/images/covid_4149.png,COVID-19
1,COVID-19/images/covid_4037.png,COVID-19
2,COVID-19/images/covid_4048.png,COVID-19
3,COVID-19/images/covid_3451.png,COVID-19
4,COVID-19/images/covid_3683.png,COVID-19
...,...,...
3723,Normal/images/Normal (2082).png,Normal
3724,Normal/images/Normal (2097).png,Normal
3725,Normal/images/Normal (2093).png,Normal
3726,Normal/images/Normal (2086).png,Normal


In [None]:
NUM_TRAINING_IMAGES = len(train_df.index)
NUM_VALIDATION_IMAGES = len(val_df.index)
NUM_TEST_IMAGES = len(test_df.index)


print("Training images:", NUM_TRAINING_IMAGES)
train_label_counts = train_df['label'].value_counts().rename(
    index=dict(enumerate(CLASSES)))
print(", ".join([f"{label}: {count}" for label,
      count in train_label_counts.items()]))
print()

print("Validation images:", NUM_VALIDATION_IMAGES)
val_label_counts = val_df['label'].value_counts().rename(
    index=dict(enumerate(CLASSES)))
print(", ".join([f"{label}: {count}" for label,
      count in val_label_counts.items()]))
print()

print("Test images:", NUM_TEST_IMAGES)
test_label_counts = test_df['label'].value_counts().rename(
    index=dict(enumerate(CLASSES)))
print(", ".join([f"{label}: {count}" for label,
      count in test_label_counts.items()]))

Training images: 3728
COVID-19: 1864, Non-COVID: 932, Normal: 932

Validation images: 932
COVID-19: 466, Non-COVID: 233, Normal: 233

Test images: 1166
COVID-19: 583, Non-COVID: 292, Normal: 291


In [None]:
NUM_TRAINING_IMAGES

3728

In [None]:
NUM_VALIDATION_IMAGES

932

In [None]:
NUM_TEST_IMAGES

1166

In [None]:


# Define the base paths
base_path_train = "/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Train/"
base_path_test = "/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Test/"
base_path_val = "/content/drive/My Drive/dataset/Infection Segmentation Data/Infection Segmentation Data/Val/"

#  load function
def load_image(base_path, file_path):
    img = load_img(os.path.join(base_path, file_path), target_size=(224, 224))
    img = img_to_array(img)
    img = preprocess_input(img)
    return img

# Load images
train_df['image'] = train_df['filename'].apply(lambda x: load_image(base_path_train, x))
val_df['image'] = val_df['filename'].apply(lambda x: load_image(base_path_val, x))
test_df['image'] = test_df['filename'].apply(lambda x: load_image(base_path_test, x))


In [None]:
train_df.head(5)

Unnamed: 0,filename,label,image
0,COVID-19/images/covid_4149.png,COVID-19,"[[[-101.939, -114.779, -121.68], [-101.939, -1..."
1,COVID-19/images/covid_4037.png,COVID-19,"[[[-8.939003, -21.779, -28.68], [-24.939003, -..."
2,COVID-19/images/covid_4048.png,COVID-19,"[[[40.060997, 27.221, 20.32], [40.060997, 27.2..."
3,COVID-19/images/covid_3451.png,COVID-19,"[[[113.061, 100.221, 93.32], [109.061, 96.221,..."
4,COVID-19/images/covid_3683.png,COVID-19,"[[[117.061, 104.221, 97.32], [114.061, 101.221..."


In [None]:

# Split the data into training and validation sets
X_train = np.stack(train_df['image'].to_numpy())
y_train = pd.get_dummies(train_df['label'])
X_val = np.stack(val_df['image'].to_numpy())
y_val = pd.get_dummies(val_df['label'])
X_test = np.stack(test_df['image'].to_numpy())
y_test = pd.get_dummies(test_df['label'])

In [None]:
# Load the VGG19
base_model = VGG19(
    include_top=False,
    weights='imagenet',
    input_shape=(224, 224, 3)
    )

# custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(len(y_train.columns), activation='softmax')(x)

# final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers of the VGG19 model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x794a407c1120>

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test accuracy:", test_accuracy)

Test accuracy: 0.9202401638031006


In [None]:
model.save('model_v1.keras')

In [None]:
'''model = VGG19(weights='imagenet', include_top=True, input_shape=(224, 224, 3))'''

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
'''model.summary()'''

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     