# Import Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import os

# file_path = '/content/drive/MyDrive/NNDL_HW2/HW2_Dataset/Train/'

file_path  = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/'
file1_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Cats/'
file2_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Dogs/'

test_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Test/'

all_files = [f for f in os.listdir(file_path) if os.path.isfile(os.path.join(file_path, f))]

files1 = [f for f in os.listdir(file1_path) if os.path.isfile(os.path.join(file1_path, f))]
files2 = [f for f in os.listdir(file2_path) if os.path.isfile(os.path.join(file2_path, f))]

test_files = [f for f in os.listdir(test_path) if os.path.isfile(os.path.join(test_path, f))]


print(f'All Files Count in {file_path} : {len(all_files)}')
print('--------------------------------------------------')
print(f"Number of files in {file1_path} Before Augmentation : {len(files1)}")
print(f"Number of files in {file2_path} Before Augmentation : {len(files2)}")
print('-------------------------------------------------')
print(f'Number of files for testing model in {test_path} : {len(test_files)}')

Mounted at /content/drive
All Files Count in /content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/ : 0
--------------------------------------------------
Number of files in /content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Cats/ Before Augmentation : 0
Number of files in /content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Dogs/ Before Augmentation : 0
-------------------------------------------------
Number of files for testing model in /content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Test/ : 0


# Prepocessing images

In [None]:
from PIL import Image
import os
import random

file_path  = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/'
file1_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Cats'
file2_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Dogs'

test_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Test/'

save_path = '/content/drive/MyDrive/NNDL_HW2_After_Augment/'

# Define the augmentation parameters
rotation_angle = 30
zoom_range = (0.75, 1.25)

# Function to perform image augmentation
def augment_image(image_path , save_path):
    image = Image.open(image_path)

    # Horizontal flipping
    flipped_image = image.transpose(Image.FLIP_LEFT_RIGHT)
    flipped_image.save(os.path.join(save_path, f"flipped_{os.path.basename(image_path)}"))

    # Rotation
    angle = random.randint(-rotation_angle, rotation_angle)
    rotated_image = image.rotate(angle)
    rotated_image.save(os.path.join(save_path, f"rotated_{os.path.basename(image_path)}"))

    # Scaling or zooming
    zoom_factor = random.uniform(zoom_range[0], zoom_range[1])
    scaled_image = image.resize((int(image.width * zoom_factor), int(image.height * zoom_factor)))
    scaled_image.save(os.path.join(save_path, f"scaled_{os.path.basename(image_path)}"))


# Iterate over all image files in the directory
for file_name in os.listdir(file1_path):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        file_path = os.path.join(file1_path, file_name)
        augment_image(file_path, save_path)

# Iterate over all image files in the directory
for file_name in os.listdir(file2_path):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        file_path = os.path.join(file2_path, file_name)
        augment_image(file_path, save_path)

files1 = [f for f in os.listdir(file1_path) if os.path.isfile(os.path.join(file1_path, f))]
files2 = [f for f in os.listdir(file2_path) if os.path.isfile(os.path.join(file2_path, f))]

augmented_files = [f for f in os.listdir(save_path) if os.path.isfile(os.path.join(save_path , f))]

print(f"Number of files in {file1_path} After Augmentation : {len(files1)}")
print(f"Number of files in {file2_path} After Augmentation : {len(files2)}")
print('-----------------------------------------------')

print(f"Number of files in {save_path} After Augmentation : {len(augmented_files)}")

Number of files in /content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Cats After Augmentation : 350
Number of files in /content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/Dogs After Augmentation : 352
-----------------------------------------------
Number of files in /content/drive/MyDrive/NNDL_HW2_After_Augment/ After Augmentation : 2106


# ResNet Model

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define hyperparameters
initial_lr = 0.001
lr_decay_rate = 0.1
momentum = 0.9
batch_size = 32
epochs = 10

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Replace the FC layers with new ones for binary classification (dog vs cat)
x = Flatten()(base_model.output)
output = Dense(2, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

# Freeze all layers except the new FC layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
opt = SGD(learning_rate=initial_lr, momentum=momentum)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Data augmentation for training images
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   validation_split=0.2)  # Splitting the dataset into training and validation
# all_train_path = ''
all_train_path  = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/'

train_generator = train_datagen.flow_from_directory(all_train_path,
                                                    target_size=(224, 224),
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    subset='training')  # Training set

validation_generator = train_datagen.flow_from_directory(all_train_path,
                                                         target_size=(224, 224),
                                                         batch_size=batch_size,
                                                         class_mode='categorical',
                                                         subset='validation')  # Validation set

# Train the model with training and validation data
model.fit(train_generator, epochs=epochs, validation_data=validation_generator)

# Unfreeze the last CONV block
for layer in model.layers[:15]:
    layer.trainable = False

# Recompile the model to apply the changes
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# all_test_path = '/content/drive/MyDrive/NNDL_HW2/HW2_Dataset/Test/'
all_test_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Test/'

# Test data generator for evaluation
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(all_test_path ,
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,
                                                  class_mode='categorical')

# Evaluate the model on the test set
model.evaluate(test_generator)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 562 images belonging to 2 classes.
Found 140 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 100 images belonging to 2 classes.


[21.56424331665039, 0.5]

# VGG16

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define hyperparameters
initial_lr = 0.1
lr_decay_rate = 0.002
momentum = 0.9
batch_size = 10
epochs = 50

# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Replace the FC layers with new ones for binary classification (dog vs cat)
x = Flatten()(base_model.output)
output = Dense(2, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

# Freeze all layers except the new FC layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
opt = SGD(learning_rate=initial_lr, momentum=momentum)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Data augmentation for training images
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   validation_split=0.2)  # Splitting the dataset into training and validation


all_train_path  = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Train/'

train_generator = train_datagen.flow_from_directory(all_train_path,
                                                    target_size=(224, 224),
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    subset='training')  # Training set

validation_generator = train_datagen.flow_from_directory(all_train_path,
                                                         target_size=(224, 224),
                                                         batch_size=batch_size,
                                                         class_mode='categorical',
                                                         subset='validation')  # Validation set

# Train the model with training and validation data
model.fit(train_generator, epochs=epochs, validation_data=validation_generator)

# Unfreeze the last CONV block
for layer in model.layers[:15]:
    layer.trainable = False

# Recompile the model to apply the changes
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

all_test_path = '/content/drive/MyDrive/NNDL_HW2_Before_Augmentation/HW2_Dataset/Test/'

# Test data generator for evaluation
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(all_test_path ,
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,
                                                  class_mode='categorical')

# Evaluate the model on the test set
model.evaluate(test_generator)

Found 0 images belonging to 2 classes.
Found 0 images belonging to 2 classes.


ValueError: Asked to retrieve element 0, but the Sequence has length 0