In [1]:
import zipfile
import glob
import time
import multiprocessing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from PIL import Image
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
import pandas as pd
# from torchviz import make_dot
import random

from dataclasses import make_dataclass

In [2]:
SEED = 1
os.environ["PL_GLOBAL_SEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)


In [3]:
BATCH_SIZE = 100
NUM_EPOCHS = 10

NUM_CLASSES = 2

TRAIN_DIR = './train/train'
TEST_DIR = './test1/test1'

train_list = glob.glob(os.path.join(TRAIN_DIR,'*.jpg'))
test_list = glob.glob(os.path.join(TEST_DIR, '*.jpg'))

len(train_list), len(test_list)

(25000, 12500)

In [4]:
if torch.cuda.is_available():
    print(torch.backends.cudnn.benchmark , torch.backends.cudnn.deterministic)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    
device = 'cuda' if torch.cuda.is_available() else 'cpu'

False False


In [5]:
trlist, valist = train_test_split(train_list, test_size=0.2)

In [6]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0.1),
        transforms.RandomAffine(degrees=40, translate=None, scale=(1, 2), shear=15),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    'test': transforms.Compose([
        transforms.Resize([256,256]),
        transforms.ToTensor(), 
    ])
}

In [7]:
class dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = self.data[idx]
        img = Image.open(img_path)
        if self.transform:
            img_aug = self.transform(img)
            
        label = img_path.split('\\')[-1].split('.')[0]
        if label == 'dog':
            label = 1
        elif label == 'cat':
            label = 0
        
        return img_aug, label 

In [8]:
trlist, valist = train_test_split(train_list, test_size=0.25)

training_dataset= dataset(trlist, transform=data_transforms['train'])
validation_dataset = dataset(valist, transform=data_transforms['test'])
testing_dataset = dataset(test_list, transform=data_transforms['test'])

num_workers = multiprocessing.cpu_count()
training_dataloader = DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory = True)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory = True)


In [9]:
print(len(training_dataloader.dataset))
print(len(validation_dataloader.dataset))

18750
6250


In [10]:
vgg16 = models.vgg16( weights='DEFAULT' )

In [11]:
for param in vgg16.features.parameters():
    param.require_grad = False


num_features = vgg16.classifier[6].in_features
features = list(vgg16.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, 2)]) # Add our layer with 4 outputs
vgg16.classifier = nn.Sequential(*features) # Replace the model classifier
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [12]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

In [13]:
from tqdm import tqdm
import copy
def train_model(net, dataloader_dict, criterion, optimizer, num_epoch):
    
    since = time.time()
    best_model_wts = copy.deepcopy(net.state_dict())
    best_acc = 0.0
    net = net.to(device)
    
    for epoch in range(num_epoch):
        print('Epoch {}/{}'.format(epoch + 1, num_epoch))
        print('-'*20)
        
        for phase in ['train', 'val']:
            
            if phase == 'train':
                net.train()
                
            else:
                net.eval()
                
            epoch_loss = 0.0
            epoch_corrects = 0
            
            for inputs, labels in tqdm(dataloader_dict):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
                    
            epoch_loss = epoch_loss / len(dataloader_dict.dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dict.dataset)
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(net.state_dict())
                
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    net.load_state_dict(best_model_wts)
    return net

In [14]:
vgg16 = train_model(vgg16 , training_dataloader , criterion , optimizer ,  10)

Epoch 1/10
--------------------


100%|██████████| 188/188 [12:37<00:00,  4.03s/it]


train Loss: 0.1414 Acc: 0.9380


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0757 Acc: 0.9701
Epoch 2/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.40s/it]


train Loss: 0.0784 Acc: 0.9676


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0608 Acc: 0.9750
Epoch 3/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.40s/it]


train Loss: 0.0705 Acc: 0.9714


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0560 Acc: 0.9774
Epoch 4/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.40s/it]


train Loss: 0.0646 Acc: 0.9731


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0501 Acc: 0.9804
Epoch 5/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.40s/it]


train Loss: 0.0563 Acc: 0.9772


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0472 Acc: 0.9812
Epoch 6/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.39s/it]


train Loss: 0.0537 Acc: 0.9792


100%|██████████| 188/188 [03:33<00:00,  1.14s/it]


val Loss: 0.0472 Acc: 0.9825
Epoch 7/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.39s/it]


train Loss: 0.0476 Acc: 0.9807


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0414 Acc: 0.9842
Epoch 8/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.40s/it]


train Loss: 0.0475 Acc: 0.9810


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0390 Acc: 0.9848
Epoch 9/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.39s/it]


train Loss: 0.0433 Acc: 0.9824


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]


val Loss: 0.0390 Acc: 0.9847
Epoch 10/10
--------------------


100%|██████████| 188/188 [13:46<00:00,  4.40s/it]


train Loss: 0.0437 Acc: 0.9842


100%|██████████| 188/188 [03:34<00:00,  1.14s/it]

val Loss: 0.0388 Acc: 0.9855
Training complete in 172m 19s
Best val Acc: 0.985493





In [15]:
torch.save(vgg16.state_dict(), './vgg16transferLearning')

VGG 16 Evaluation

In [23]:
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate_model(model, dataloader, device):
    correct = 0
    total = 0
    predicted_labels = []
    true_labels = []

    model.to(device)
    model.eval()  # Set the model to evaluation mode
    
    with torch.no_grad():
        for data in dataloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            predicted_labels.extend(predicted.cpu().numpy())  # Convert predictions to numpy array
            true_labels.extend(labels.cpu().numpy())  # Convert true labels to numpy array
    
    accuracy = 100 * correct / total
    precision = precision_score(true_labels, predicted_labels, average='weighted')
    recall = recall_score(true_labels, predicted_labels, average='weighted')
    f1 = f1_score(true_labels, predicted_labels, average='weighted')
    
    return accuracy, precision, recall, f1

In [24]:
accuracy, precision, recall, f1 = evaluate_model(vgg16, validation_dataloader, device)
print('Accuracy: %.2f%%' % accuracy)
print('Precision: %.2f' % precision)
print('Recall: %.2f' % recall)
print('F1-score: %.2f' % f1)

Accuracy: 94.70%
Precision: 0.95
Recall: 0.95
F1-score: 0.95
