In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import random
import torch.optim as optim
import torch.nn.functional as F
from shutil import copyfile
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import os

In [None]:
if(torch.cuda.is_available()):
    device = torch.device('cuda')
elif(torch.backends.mps.is_available()):
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [None]:
CAT_DIR = '/kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Cat'
DOG_DIR = '/kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Dog'

In [None]:
try:
    os.mkdir('/kaggle/working/tmp')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs/training')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs/validation')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs/training/cats')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs/training/dogs')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs/validation/cats')
    os.mkdir('/kaggle/working/tmp/cats-v-dogs/validation/dogs')
except OSError:
    print('Error failed to make directory')

In [None]:
def split_data(main_dir, training_dir, validation_dir, split_size):
    """
    Splits the data into train and test sets

    Args:
    main_dir (string):  path containing the images
    training_dir (string):  path to be used for training
    validation_dir (string):  path to be used for validation
    split_size (float): size of the dataset to be used for training
    """
    files = []
    for file in os.listdir(main_dir):
        if  os.path.getsize(os.path.join(main_dir, file)): # check if the file's size isn't 0
            files.append(file) # appends file name to a list

    shuffled_files = random.sample(files,  len(files)) # shuffles the data
    split = int(0.9 * len(shuffled_files)) #the training split casted into int for numeric rounding
    train = shuffled_files[:split] #training split
    validation = shuffled_files[split:] # validation split

    for element in train:
        copyfile(os.path.join(main_dir,  element), os.path.join(training_dir, element)) # copy files into training directory

    for element in validation:
        copyfile(os.path.join(main_dir,  element), os.path.join(validation_dir, element))# copy files into validation directory
        
split_data(CAT_DIR, '/kaggle/working/tmp/cats-v-dogs/training/cats','/kaggle/working/tmp/cats-v-dogs/validation/cats', 0.9)
split_data(DOG_DIR, '/kaggle/working/tmp/cats-v-dogs/training/dogs', '/kaggle/working/tmp/cats-v-dogs/validation/dogs', 0.9)

In [None]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:
train_dataset = datasets.ImageFolder('/kaggle/working/tmp/cats-v-dogs/training' , transform=train_transform)
valid_dataset = datasets.ImageFolder('/kaggle/working/tmp/cats-v-dogs/validation',transform=valid_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)

In [None]:
def unnormalize(img):
        # unnormalize
    img[0]= img[0]*std[0] + mean[0]
    img[1]= img[1]*std[1] + mean[1]
    img[2]= img[2]*std[2] + mean[2]
    return img
    
def imshow(img):
    # unnormalize
    img = unnormalize(img)
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
# For train_loader
train_batch = next(iter(train_loader))
images_train, labels_train = train_batch

# For val_loader
val_batch = next(iter(val_loader))
images_val, labels_val = val_batch
# For train_loader
random_index_train = np.random.randint(images_train.size(0))
random_image_train = images_train[random_index_train]
random_label_train = labels_train[random_index_train]

# For val_loader
random_index_val = np.random.randint(images_val.size(0))
random_image_val = images_val[random_index_val]
random_label_val = labels_val[random_index_val]
# For train_loader
random_image_train_np = random_image_train.numpy()
plt.imshow(np.transpose(random_image_train_np, (1, 2, 0)))
plt.title(f"Label: {random_label_train.item()}")
plt.axis('off')
plt.show()

# For val_loader
random_image_val_np = random_image_val.numpy()
plt.imshow(np.transpose(random_image_val_np, (1, 2, 0)))
plt.title(f"Label: {random_label_val.item()}")
plt.axis('off')
plt.show()


In [None]:
data_iter = iter(train_loader)
first_batch = next(data_iter)

print("Training Batch Size = " ,first_batch[0].shape , "| Val Batch Size" , first_batch[1].shape)

# Extracting Features Using Transfer Learning

In [None]:
conv_base = torchvision.models.vgg16(pretrained = True)

In [None]:
# Feature Extraction Part 
conv_base.features

In [None]:
# Classifier Part In Pretrained Model
conv_base.classifier

In [None]:
def extract_features(conv_base, images ):
    
    conv_base.eval()
    with torch.no_grad():
        features = conv_base.features(images)
    return features

# Implementing Classifier Part with Trainable Parameters

In [None]:
class Net(nn.Module):
    
    def __init__(self):
        
        super().__init__()
        self.fc1 = nn.Linear(25088, 512)  
        self.fc2 = nn.Linear(512, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.flatten(x,1)
        
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        x = x.reshape(-1)
        
        return x

In [None]:
model = Net()
model.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)

In [None]:
num_epochs = 5
# Training loop
train_acc = []
train_loss = []
val_acc = []
val_loss = []

for epoch in range(num_epochs):
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    model.train()  # Set the model to training mode

    for images, labels in train_loader:
        
        images = conv_base.features(images)
        images = images.to(device)  # Move the input tensor to the GPU
        labels = labels.to(device)  # Move the labels tensor to the GPU

        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        labels = labels.to(torch.float32)

        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate training accuracy
        predicted = torch.round(outputs)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    training_loss = running_loss / len(train_loader)
    training_accuracy = 100 * correct_train / total_train

    # Evaluation on test set
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct_test = 0
    total_test = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)  # Move the input tensor to the GPU
            labels = labels.to(device)  # Move the labels tensor to the GPU

            outputs = model(images)
            labels = labels.to(torch.float32)

            # Adjust the shape of labels to match the output shape
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Calculate test accuracy
            predicted = torch.round(outputs)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()

    test_loss /= len(val_loader)
    test_accuracy = 100 * correct_test / total_test

    train_acc.append(training_accuracy)
    train_loss.append(training_loss)
    val_acc.append(test_accuracy)
    val_loss.append(test_loss)

    # Print the average loss and accuracy for this epoch
    print(f"Epoch {epoch+1}:")
    print(f"  Train Loss: {training_loss:.4f} | Train Accuracy: {training_accuracy:.2f}%")
    print(f"  Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.2f}%")
    print("*************************")

In [None]:
# final_state = model.state_dict()
# torch.save(final_state, 'model_state.pth')

In [None]:
# f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# t = f.suptitle(' Vanilla CNN Performance', fontsize=12)
# f.subplots_adjust(top=0.85, wspace=0.3)

# epoch_list = list(range(1,num_epochs+1))
# ax1.plot(epoch_list, train_acc, label='Train Accuracy')
# ax1.plot(epoch_list, val_acc, label='Validation Accuracy')
# ax1.set_xticks(np.arange(0, 60, 5))
# ax1.set_ylabel('Accuracy Value')
# ax1.set_xlabel('Epoch')
# ax1.set_title('Accuracy')
# l1 = ax1.legend(loc="best")

# ax2.plot(epoch_list, train_loss, label='Train Loss')
# ax2.plot(epoch_list, val_loss, label='Validation Loss')
# ax2.set_xticks(np.arange(0, 60, 5))
# ax2.set_ylabel('Loss Value')
# ax2.set_xlabel('Epoch')
# ax2.set_title('Loss')
# l2 = ax2.legend(loc="best")