In [1]:
import os
import csv
import numpy as np
from PIL import Image
from skimage import io, transform

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import Dataset, SubsetRandomSampler, DataLoader
from torchvision import transforms, utils

device = torch.device("cuda:0")

import warnings
warnings.filterwarnings("ignore")

In [2]:
train_folder = "your_path_to_train_folder"
test_folder = "your_path_to_test_folder"

In [3]:
#create custom class to prepare data for DataLoader
class CatOrDog(Dataset):
    def __init__(self, folder, transform=None):
        self.transform = transform
        self.folder = folder
        
    def __len__(self):
        count = 0
        for filename in os.listdir(self.folder):
            count+=1
        return count
    
    def __getitem__(self, index):        

        if torch.is_tensor(index):
            index = idx.tolist()
        
        path = os.path.join(self.folder, os.listdir(self.folder)[index])

        images_name = os.listdir(self.folder)
        img_name = images_name[index]
        image = io.imread(path)
        
        if self.transform:
            image = transforms.functional.to_pil_image(image)
            image = self.transform(image)
            image = np.asarray(image)

        if ('cat' in img_name):
            y = 0
            y = torch.tensor(y, dtype=torch.long)
        else:
            y = 1
            y = torch.tensor(y, dtype=torch.long)
        
        return (image, y)

In [4]:
train_dataset = CatOrDog(train_folder, 
                       transform=transforms.Compose([
                           transforms.Resize((224, 224)),
                           transforms.ToTensor(),
                           # Use mean and std for pretrained models
                           # https://pytorch.org/docs/stable/torchvision/models.html
                           transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])                         
                       ])
                      )
test_dataset = CatOrDog(test_folder, 
                       transform=transforms.Compose([
                           transforms.Resize((224, 224)),
                           transforms.ToTensor(),
                           # Use mean and std for pretrained models
                           # https://pytorch.org/docs/stable/torchvision/models.html
                           transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])                         
                       ])
                      )

In [5]:
batch_size = 64
data_size = len(train_dataset)
validation_fraction = .2
#split data on train and validation part
val_split = int(np.floor((validation_fraction) * data_size))
indices = list(range(data_size))
np.random.seed(42)
np.random.shuffle(indices)

val_indices, train_indices = indices[:val_split], indices[val_split:]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                         sampler=val_sampler)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [6]:
def train_model(model, train_loader, val_loader, loss, optimizer, num_epochs):
    """
    The main function for the training model.
    
    Returns: history of training    
    """
    loss_history = []
    train_history = []
    val_history = []
    for epoch in range(num_epochs):
        model.train()
        
        loss_accum = 0
        correct_samples = 0
        total_samples = 0

        for i_step, (x, y) in enumerate(train_loader):
             
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            prediction = model(x_gpu)    
            loss_value = loss(prediction, y_gpu)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            _, indices = torch.max(prediction, 1)
            correct_samples += torch.sum(indices == y_gpu)
            total_samples += y.shape[0]
            
            loss_accum += loss_value

        ave_loss = loss_accum / i_step
        train_accuracy = float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(model, val_loader)
        
        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)
        
        print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" % (ave_loss, train_accuracy, val_accuracy))
        
    return loss_history, train_history, val_history
        
def compute_accuracy(model, loader):
    """
    Computes accuracy on the validation part of the dataset
    
    Returns: accuracy as a float value between 0 and 1
    """
    model.eval() 
    
    correct_samples = 0
    total_samples = 0
    for i_step, (x, y) in enumerate(val_loader):
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            prediction = model(x_gpu)      
            _, indices = torch.max(prediction, 1)
            correct_samples += torch.sum(indices == y_gpu)
            total_samples += y.shape[0]
    val_accuracy = float(correct_samples) / total_samples
    return val_accuracy

In [7]:
#Load the pretrained model and reset final layer
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

model = model.to(device)

In [8]:
parameters = model.parameters()
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(parameters, lr=0.001, momentum=0.9)

loss_history, train_history, val_history = train_model(model, train_loader, val_loader, loss, optimizer, 5)

Average loss: 0.577004, Train accuracy: 0.730673, Val accuracy: 0.945000
Average loss: 0.193776, Train accuracy: 0.960100, Val accuracy: 0.975000
Average loss: 0.094560, Train accuracy: 0.978803, Val accuracy: 0.970000
Average loss: 0.058930, Train accuracy: 0.990025, Val accuracy: 0.970000
Average loss: 0.046579, Train accuracy: 0.996259, Val accuracy: 0.980000


In [24]:
def test_model(model, test_loader):
    """
    Checks the model on the test data
    
    Returns: list with predicted values 'cat' or 'dog'
    """
    output = []     
    model.eval() 
    
    for i_step, (x, _) in enumerate(test_loader):
        x_gpu = x.to(device)
        prediction = model(x_gpu)
    for i in range(len(prediction)):
        out = prediction[i].cpu().data.numpy().argmax()
        if out == 0:
            output.append('cat')
        else:
            output.append('dog')
    return output

In [25]:
test_model(model, test_loader)

['dog', 'dog', 'cat', 'dog', 'cat', 'dog', 'cat', 'cat', 'cat', 'cat']

We have good validation accuracy and correct prediction on test data, let's save the model.

In [13]:
torch.save(model.state_dict(), "your_path_to_work_folder")