In [1]:
import torch
import os
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torchvision

In [3]:
!curl -O https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip



  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  786M  100  786M    0     0  2519k      0  0:05:19  0:05:19 --:--:-- 1742k  0  3850k      0  0:03:29  0:00:40  0:02:49 4490k 0:02:46 2739k    0     0  3594k      0  0:03:44  0:01:03  0:02:41 3244k   0     0  3479k      0  0:03:51  0:01:10  0:02:41 2131k04:14  0:01:41  0:02:33 3467k0     0  3170k      0  0:04:14  0:02:24  0:01:50 3441k  0:01:47 3600k0     0  3157k      0  0:04:15  0:03:13  0:01:02 2681k0     0  2944k      0  0:04:33  0:03:37  0:00:56     0662M    0     0  2793k      0  0:04:48  0:04:02  0:00:46 2827k:03 1338k


In [4]:
!unzip -q kagglecatsanddogs_5340.zip
!ls

 CDLA-Permissive-2.0.pdf       kagglecatsanddogs_5340.zip  'readme[1].txt'
 image_classif_scratch.ipynb   PetImages


In [5]:
num_skipped = 0
for folder_name in ("Cat", "Dog"):
    folder_path = os.path.join("PetImages", folder_name)
    for fname in os.listdir(folder_path):
        fpath = os.path.join(folder_path, fname)
        try:
            fobj = open(fpath, "rb")
            is_jfif = b"JFIF" in fobj.peek(10)
        finally:
            fobj.close()

        if not is_jfif:
            num_skipped += 1
            # Delete corrupted image
            os.remove(fpath)

print(f"Deleted {num_skipped} images.")

Deleted 1560 images.


In [6]:
def train_val_split(dataset, split_rate=0.2, seed=None):
    num_samples = len(dataset)
    num_val = int(split_rate * num_samples)
    num_train = num_samples - num_val

    generator = torch.Generator().manual_seed(seed) if seed is not None else None

    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_train, num_val], generator=generator)
    return train_dataset, val_dataset


In [7]:
TRAIN_DATA_PATH = "PetImages/"
image_size = (180, 180)
batch_size = 128

data_transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
])

dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform = data_transform)

In [8]:
train_dataset, val_dataset = train_val_split(dataset, split_rate = 0.2, seed = 42)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


mean = 0.0
std = 0.0
total_samples = 0

for data, _ in train_loader:
    batch_samples = data.size(0)
    data = data.view(batch_samples, data.size(1), -1)
    mean += data.mean(2).sum(0)
    std += data.std(2).sum(0)
    total_samples += batch_samples

mean /= total_samples
std /= total_samples

print("Mean:", mean.tolist())
print("Std:", std.tolist())

Mean: [0.48789748549461365, 0.4552934169769287, 0.41715604066848755]
Std: [0.22483637928962708, 0.22028988599777222, 0.22069817781448364]


In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_loader:
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(np.transpose(images[i], (1, 2, 0)))
        plt.title(int(labels[i]))
        plt.axis("off")

In [18]:
augmentation_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.RandomRotation((-20, 20)),
    transforms.ToTensor()
])

full_transform = transforms.Compose([
    data_transform,
    augmentation_transform,
    transforms.Normalize(mean = mean, std = std)
    
])

dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform = full_transform)


In [34]:
import torch.nn.functional as F
class SeparableConv2d(nn.Module):
  def __init__(self, in_channels, out_channels):
    super(SeparableConv2d, self).__init__()
    self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, groups=in_channels)
    self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
    self.bn = nn.BatchNorm2d(out_channels)

    self.depthwise2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, groups=out_channels)
    self.pointwise2 = nn.Conv2d(out_channels, out_channels, kernel_size=1)
    self.bn2 = nn.BatchNorm2d(out_channels)


  def forward(self, x):
    x = F.relu(x)
    x = self.depthwise(x)
    x = self.pointwise(x)
    x = self.bn(x)
    
    x = F.relu(x)
    x = self.depthwise2(x)
    x = self.pointwise2(x)
    self.bn2(x)

  
    return x
    

class ClassificationModel(nn.Module):
  def __init__(self, num_classes):
    super(ClassificationModel, self).__init__()
    self.c1 = nn.Conv2d(3, 128, kernel_size=3, stride = 2, padding=1)
    self.bn1 = nn.BatchNorm2d(128)
    self.relu1 = nn.ReLU()
    self.conv_block = nn.ModuleList([
      SeparableConv2d(128, 256),
      SeparableConv2d(256, 512),
      SeparableConv2d(512, 728),
    ])
    self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
    self.residual = nn.Conv2d(128, 1024, kernel_size=1, stride=1, padding=1)

    self.sep_conv = SeparableConv2d(728, 1024)
    self.bn_sep = nn.BatchNorm2d(1024)

    self.relu2 = nn.ReLU()
    self.gavgpool = nn.AdaptiveAvgPool2d(1)
    self.dropout = nn.Dropout(0.25)
    if num_classes == 2:
      self.fc = nn.Linear(1024, 1)
    else:
      self.fc = nn.Linear(1024, num_classes)


  def forward(self, x):
    x = self.c1(x)
    x = self.bn1(x)
    x = F.relu(x)
    prev_block_residual = x
    for block in self.conv_block:
      x = block(x)
      x = self.maxpool(x)
      residual = self.residual(x)(prev_block_residual)
      x += residual
      prev_block_residual = x
    x = self.sep_conv(x)
    x = self.bn_sep(x)
    x = F.relu(x)
    x = self.gavgpool(x)
    x = self.dropout(x)
    x = self.fc(x)


In [35]:
model = ClassificationModel(2)
epochs = 25
learning_rate = 3e-4

model_checkpoint_path = "save_at_{epoch}.pth"

critetion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle=False)

In [36]:
from tqdm import tqdm
for epoch in tqdm(range(epochs)):
  for inputs, labels in train_loader:
    outputs = model(inputs)
    loss = critetion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()



  0%|          | 0/25 [00:00<?, ?it/s]

In [None]:
model.eval()

with torch.no_grad():
  val_loss = 0
  total_samples = 0
  correct_pred = 0

  for inputs, labels in val_loader:
    outputs = model(inputs)
    batch_loss = critetion(outputs, labels)
    val_loss += batch_loss.item()

  avg_val_loss = val_loss / len(val_loader)

  torch.save(model.state_dict(), model_checkpoint_path.format(epoch=epoch + 1))
