<a href="https://colab.research.google.com/github/Nithya07shree/colab-notes-aiml/blob/main/ImageClassifierResnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms

In [36]:
# dataset transform logic
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet stats (mean adn sd, colored images unlike mnist grayscale)
])

get the cats and dogs dataset

In [3]:
import kagglehub
path = kagglehub.dataset_download("marquis03/cats-and-dogs")
print("path to dataset files: ", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/marquis03/cats-and-dogs?dataset_version_number=2...


100%|██████████| 9.75M/9.75M [00:00<00:00, 11.8MB/s]

Extracting files...





path to dataset files:  /root/.cache/kagglehub/datasets/marquis03/cats-and-dogs/versions/2


In [37]:
train_dir = path + "/train"
test_dir = path + "/val"
train_dataset = datasets.ImageFolder(train_dir, data_transforms)
test_dataset = datasets.ImageFolder(test_dir, data_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

# load pretrained model
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# freeze backbone
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(512,2)
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [38]:
def train(epochs=5):
  model.train()
  for epoch in range(epochs):
    total_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad() # reset gradients
            output = model(data) # train model
            loss = criterion(output, target) # calculate loss
            loss.backward() # calculate gradient
            optimizer.step() # update weights
            total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")

def test():
  model.eval()
  correct=0
  with torch.no_grad():
    for images, labels in test_loader:
      outputs = model(images) # train model
      pred = outputs.argmax(dim=1, keepdim=True)
      correct += pred.eq(labels.view_as(pred)).sum().item()
  accuracy = 100. * correct / len(test_loader.dataset)
  print(f"Test Accuracy: {accuracy}%")

In [8]:
train(3)

Epoch 1/3, Loss: 0.6148517098691728
Epoch 2/3, Loss: 0.37327496541870964
Epoch 3/3, Loss: 0.28703515397177803


In [9]:
test()

Test Accuracy: 88.57142857142857%


In [39]:
train(8)

Epoch 1/8, Loss: 0.6212421258290609
Epoch 2/8, Loss: 0.40520697832107544
Epoch 3/8, Loss: 0.296100421084298
Epoch 4/8, Loss: 0.2457369863986969
Epoch 5/8, Loss: 0.18628277546829647
Epoch 6/8, Loss: 0.19267796145545113
Epoch 7/8, Loss: 0.1744388457801607
Epoch 8/8, Loss: 0.16138765547010633


In [43]:
test()

Test Accuracy: 98.57142857142857%


In [71]:
from PIL import Image
img = Image.open("/image5.jpg").convert('RGB')
img_tensor = data_transforms(img).unsqueeze(0)

In [72]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
img_tensor = img_tensor.to(device)

model.eval()
with torch.no_grad():
    output = model(img_tensor)
    probs = torch.nn.functional.softmax(output, dim=1)
    conf, pred = torch.max(probs, 1)

if conf < 0.90: # Only trust predictions above 90% confidence
    print("Unknown Object")
print(conf)
classes = ['Cat', 'Dog']
print(f"Prediction: {classes[pred.item()]}")

Unknown Object
tensor([0.8426])
Prediction: Cat
