In [186]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as func
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import datasets
import matplotlib.pyplot as plt
import numpy as np
import kagglehub
import os

In [187]:
path = kagglehub.dataset_download("tongpython/cat-and-dog")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/tongpython/cat-and-dog/versions/1


## Import library
    - Pytorch
    - Pytorch model (pytorch.nn)
        + Import function (ReLU, tanh, ...)
    - Optimization
    - DataLoader
## Create the model
    - Init
        ! Super(NN) to properly initialize
        + Create conv layer (3 channels with 3x3 kernels)
        + Max pooling layer inbetween
        + create hidden layer (linear)
    - Forward
        

In [188]:
num_classes = 2
batch_size = 32
learning_rate = 0.00012
num_epochs = 15

In [189]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.dropout = nn.Dropout(p=0.25)
        self.hlayer1 = nn.Linear(input_size, 128)
        self.hlayer2 = nn.Linear(128, 128)
        self.hlayer3 = nn.Linear(128, num_classes)
    def forward(self, x):
        x = func.relu(self.hlayer1(x))
        x = self.dropout(x)
        x = func.relu(self.hlayer2(x))
        return x
device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [190]:
class CNN(nn.Module):
  def __init__(self, in_channel = 3, num_classes = 2):
      super(CNN, self).__init__()
      self.relu = nn.ReLU()
      self.linear_dropout = nn.Dropout(p=0.25)
      self.convd_dropout = nn.Dropout2d(p=0.2)
      self.conv = nn.Conv2d(in_channels = in_channel, out_channels = 64, kernel_size=3,stride=1,padding=1)  #convolutional layer 64 layers with kernel 3x3
      self.bn1 = nn.BatchNorm2d(64)
      self.bn2 = nn.BatchNorm2d(128)
      self.bn3 = nn.BatchNorm2d(256)
      self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  #pooling layer help reduce size from 128x128 -> 64x64
      self.conv2 = nn.Conv2d(in_channels= 64, out_channels= 128, kernel_size=3, stride=1,padding=1)
      self.conv3 = nn.Conv2d(in_channels= 128, out_channels= 256, kernel_size=3, stride=1,padding=1)
      self.fc1 = nn.Linear(256 * 16 * 16 , 128)  # Adjusted input size for Linear layer
      self.fc2 = nn.Linear(128, num_classes)  # Added another fully connected layer


  def forward(self, x):
    x = self.pool(self.relu(self.bn1(self.conv(x))))
    x = self.pool(self.relu(self.bn2(self.conv2(x))))
    x = self.pool(self.relu(self.bn3(self.conv3(x))))
    x = self.convd_dropout(x)
    x = x.reshape(x.shape[0],-1)
    x = func.relu(self.fc1(x))
    x = self.linear_dropout(x)
    x = self.fc2(x)
    return x
device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [191]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),  # Augment data
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
# Load dataset
train_dataset = datasets.ImageFolder(root=os.path.join(path,"training_set","training_set"), transform=transform)
test_dataset = datasets.ImageFolder(root=os.path.join(path,"test_set","test_set"), transform=transform)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [192]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= learning_rate,betas=(0.9,0.999))


In [193]:
for epoch in range(num_epochs):
  total_loss = 0
  model.train()
  for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        scores = model(data)
        loss = criterion(scores, targets)
        total_loss += loss
        average_loss = total_loss / (batch_idx + 1)
        if batch_idx % 5 == 0:
          print(f'epoch: {epoch}, batch: {batch_idx} with loss: {loss} and average loss:{average_loss}')
          print(f"Epoch {epoch+1}: LR={optimizer.param_groups[0]['lr']:.6f}")
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

epoch: 0, batch: 0 with loss: 0.7017897963523865 and average loss:0.7017897963523865
Epoch 1: LR=0.000120
epoch: 0, batch: 5 with loss: 1.1800613403320312 and average loss:1.5168883800506592
Epoch 1: LR=0.000120
epoch: 0, batch: 10 with loss: 0.8124530911445618 and average loss:1.210898518562317
Epoch 1: LR=0.000120
epoch: 0, batch: 15 with loss: 0.7448480725288391 and average loss:1.0582395792007446
Epoch 1: LR=0.000120
epoch: 0, batch: 20 with loss: 0.5829296708106995 and average loss:0.9576705694198608
Epoch 1: LR=0.000120
epoch: 0, batch: 25 with loss: 0.5910103917121887 and average loss:0.9085599184036255
Epoch 1: LR=0.000120
epoch: 0, batch: 30 with loss: 0.6702612638473511 and average loss:0.8712359666824341
Epoch 1: LR=0.000120
epoch: 0, batch: 35 with loss: 0.7536793947219849 and average loss:0.8422365188598633
Epoch 1: LR=0.000120
epoch: 0, batch: 40 with loss: 0.5886200666427612 and average loss:0.8188891410827637
Epoch 1: LR=0.000120
epoch: 0, batch: 45 with loss: 0.6628890

In [194]:
def check_accuracy(loader,model):
    num_correct = 0
    num_samples = 0
    model.eval()   #evaluation mode

    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)   #max(1) finds the highest value and predictions to store the indices of the value (like from 0-9 then just need the highest value to find the number, the value itself is not needed)
            num_correct += (predictions==y).sum()
            num_samples += predictions.size(0)

        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
        model.train()

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Got 7016 / 8005 with accuracy 87.65
Got 1695 / 2023 with accuracy 83.79


In [196]:
PATH = 'cnn.pt'
torch.save(model.state_dict(), PATH)