CNN Dog Breed Classificiation

In [2]:
import zipfile
import os

# unzip dataset
with zipfile.ZipFile("archive.zip", "r") as zip_ref:
    zip_ref.extractall()

!ls

annotations  archive.zip  images  sample_data


In [3]:
from genericpath import exists
import os
import shutil
from sklearn.model_selection import train_test_split

assert os.path.isdir("images/Images")

for dir in ["train", "val", "test"]:
    os.makedirs(os.path.join("dogs", dir), exist_ok=True)

classes = sorted(os.listdir("images/Images"))

train_ratio = 0.8

# temp:
val_ratio = 0.1
test_ratio = 0.1

for cls in classes:
    cls_dir = os.path.join("images/Images", cls)
    imgs = [img for img in os.listdir(cls_dir)]

    # splits train vs temp
    train_imgs, temp_imgs = train_test_split(imgs, test_size=(1.0 - train_ratio), random_state=42)

    # splits temp into val and test
    val_size = val_ratio / (val_ratio + test_ratio)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=(1.0 - val_size), random_state=42)

    def copy(file_list, dir):
      dst_cls = os.path.join("dogs", dir, cls)
      os.makedirs(dst_cls, exist_ok=True)
      for file in file_list:
        src = os.path.join(cls_dir, file)
        dst = os.path.join(dst_cls, file)

        # print(dst, src)
        shutil.copy(src, dst)

    copy(train_imgs, "train")
    copy(val_imgs,  "val")
    copy(test_imgs, "test")


In [27]:
import torch
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, Subset

batch = 32
epochs = 4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.7, 1.0)), transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),]),

    'val': transforms.Compose([
        transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),]),

    'test': transforms.Compose([
        transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),]),
    }

img_datasets = {
    x: datasets.ImageFolder(root=os.path.join("dogs", x), transform=data_transforms[x])
    for x in ['train', 'val', 'test']}

# MAX_TRAIN = 320000
# MAX_VAL = 6400
# MAX_TEST = 6400

# subset_dataset = {}

# for x, max_size in zip(["train", "val", "test"], [MAX_TRAIN, MAX_VAL, MAX_TEST]):
#   full_dataset = img_datasets[x]

#   indices = torch.randperm(len(full_dataset)).tolist()
#   subset_indices = indices[:max_size]

#   subset_dataset[x] = Subset(full_dataset, subset_indices)

#   img_datasets[x] = subset_dataset[x]

dataloaders = {
    x: DataLoader(img_datasets[x], batch_size=batch, shuffle=(x =="train"), num_workers=2)
    for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val', 'test']}
# class_names = img_datasets['train'].dataset.classes
class_names = img_datasets['train'].classes

print(class_names)

model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

in_features = model.fc.in_features
model.fc = torch.nn.Linear(in_features, len(class_names))

for param in model.fc.parameters():
    param.requires_grad = True

model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# train function
def train(model, dataloaders, dataset_sizes, device, num_epochs=25):
    best_acc = 0.0
    best_weigths = model.state_dict()

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs - 1}")
        print("-" * 10)

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)

                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_weigths = model.state_dict()

    model.load_state_dict(best_weigths)
    return model

# evaluate CNN
def evaluate(model, dataloaders, dataset_sizes, device):
    model.eval()
    model.to(device)

    running_corrects = 0

    with torch.no_grad():
      for inputs, labels in dataloaders["test"]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        running_corrects += torch.sum(preds == labels)

    test_acc = running_corrects.double() / dataset_sizes["test"]

    return test_acc

model = train(model, dataloaders, dataset_sizes, device, num_epochs=epochs)
test_acc = evaluate(model, dataloaders, dataset_sizes, device)
print("Test Accuracy: {:.4f}".format(test_acc))







['n02085620-Chihuahua', 'n02085782-Japanese_spaniel', 'n02085936-Maltese_dog', 'n02086079-Pekinese', 'n02086240-Shih-Tzu', 'n02086646-Blenheim_spaniel', 'n02086910-papillon', 'n02087046-toy_terrier', 'n02087394-Rhodesian_ridgeback', 'n02088094-Afghan_hound', 'n02088238-basset', 'n02088364-beagle', 'n02088466-bloodhound', 'n02088632-bluetick', 'n02089078-black-and-tan_coonhound', 'n02089867-Walker_hound', 'n02089973-English_foxhound', 'n02090379-redbone', 'n02090622-borzoi', 'n02090721-Irish_wolfhound', 'n02091032-Italian_greyhound', 'n02091134-whippet', 'n02091244-Ibizan_hound', 'n02091467-Norwegian_elkhound', 'n02091635-otterhound', 'n02091831-Saluki', 'n02092002-Scottish_deerhound', 'n02092339-Weimaraner', 'n02093256-Staffordshire_bullterrier', 'n02093428-American_Staffordshire_terrier', 'n02093647-Bedlington_terrier', 'n02093754-Border_terrier', 'n02093859-Kerry_blue_terrier', 'n02093991-Irish_terrier', 'n02094114-Norfolk_terrier', 'n02094258-Norwich_terrier', 'n02094433-Yorkshire_t