In [1]:
! pip install torch torchvision



In [2]:
import torch
from torch import nn
import numpy as np
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
import os

In [3]:
if torch.cuda.is_available():
    dev = "cuda:0"
elif torch.backends.mps.is_available():
    dev = "mps"
else:
    dev = "cpu"
device = torch.device(dev)
device

device(type='mps')

In [4]:
train_dir = 'dogs-vs-cats/train/'
dog_dir = train_dir + 'dog/'
cat_dir = train_dir + 'cat/'
os.makedirs(dog_dir, exist_ok=True)
os.makedirs(cat_dir, exist_ok=True)

In [5]:
for filename in os.listdir(train_dir):
    if 'dog.' in filename.lower():
        os.rename(os.path.join(train_dir, filename), os.path.join(dog_dir, filename))
    if 'cat.' in filename.lower():
        os.rename(os.path.join(train_dir, filename), os.path.join(cat_dir, filename))

In [6]:
data_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

In [7]:
train_ds = ImageFolder(root=train_dir, transform=data_transforms)

In [8]:
train_ds, valid_ds = torch.utils.data.random_split(train_ds, [0.8, 0.2])

In [9]:
mini_batch_size = 512
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=mini_batch_size, shuffle=True, drop_last=False)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=mini_batch_size)

In [10]:
class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        for b in self.dl:
            yield (self.func(*b))


def put_to_gpu(x, y):
    return x.to(device), y.to(device)

In [11]:
# model = models.alexnet(pretrained=True)
model = models.mobilenet_v3_small(pretrained=True)



In [12]:
for param in model.parameters():
    param.requires_grad = False

In [13]:
model

MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), 

In [14]:
# model.classifier[6].out_features = 2
model.classifier[3].out_features = 2

In [15]:
for param in model.classifier.parameters():
    param.requires_grad = True

In [16]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

1615848

In [17]:
model = model.to(device)

In [18]:
optimizer = torch.optim.Adam(model.parameters())

In [19]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0
        self.best_model_state = None

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.best_model_state = model.state_dict()
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.best_model_state = model.state_dict()
            self.counter = 0

    def load_best_model(self, model):
        model.load_state_dict(self.best_model_state)

In [20]:
early_stopping = EarlyStopping(patience=3, delta=0.01)

In [21]:
def fit(epochs, model, optimizer, train_dl, valid_dl=None):
    loss_func = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()

        for X_mb, y_mb in train_dl:
            y_hat = model(X_mb)

            loss = loss_func(y_hat, y_mb)
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

        model.eval()
        with torch.no_grad():
            train_loss = sum(loss_func(model(X_mb), y_mb) for X_mb, y_mb in train_dl)
            valid_loss = sum(loss_func(model(X_mb), y_mb) for X_mb, y_mb in valid_dl)
        print('epoch {}, training loss {}'.format(epoch + 1, train_loss / len(train_dl)))
        print('epoch {}, validation loss {}'.format(epoch + 1, valid_loss / len(valid_dl)))

        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    print('Finished training')

    return model

In [22]:
epochs = 10

model = fit(epochs, model, optimizer, WrappedDataLoader(train_dl, put_to_gpu), WrappedDataLoader(valid_dl, put_to_gpu))

epoch 1, training loss 0.1630764901638031
epoch 1, validation loss 0.17329683899879456
epoch 2, training loss 0.1508682519197464
epoch 2, validation loss 0.1546304076910019
epoch 3, training loss 0.15421108901500702
epoch 3, validation loss 0.16263660788536072
epoch 4, training loss 0.16193102300167084
epoch 4, validation loss 0.17654253542423248
epoch 5, training loss 0.14107489585876465
epoch 5, validation loss 0.15625977516174316
Early stopping
Finished training


In [23]:
early_stopping.load_best_model(model)
model = model.cpu()

In [24]:
def evaluate(model, data_loader):    
    model.eval()
    accuracy = 0
    with torch.no_grad():
        for X, y in data_loader:
            y_hat = model(X).cpu().numpy()
            y_hat = np.argmax(y_hat, axis=1)
            accuracy += (y_hat == y.cpu().numpy()).mean()
    accuracy /= len(data_loader)

    return accuracy

In [25]:
evaluate(model.to(device), WrappedDataLoader(train_dl, put_to_gpu))

np.float64(0.941943359375)

In [26]:
evaluate(model.to(device), WrappedDataLoader(valid_dl, put_to_gpu))

np.float64(0.9380779655612244)