# Dependencies, Loading Dataset

In [None]:
!pip install kagglehub



In [None]:
import kagglehub
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
path = kagglehub.dataset_download("jessicali9530/stanford-dogs-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/stanford-dogs-dataset


In [None]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.8, 1.2)),
    transforms.ColorJitter(0.3, 0.3, 0.3),
    transforms.ToTensor(),
])


train_dataset = datasets.ImageFolder(root = '/kaggle/input/stanford-dogs-dataset/images/Images', transform = train_transform)
dataset = datasets.ImageFolder(root = '/kaggle/input/stanford-dogs-dataset/images/Images', transform = transform)

class_names= [name.split('-')[1].replace('_',' ') for name in dataset.classes] # Readable version of class names without random characters


image, label = dataset[0]
print("Label Index: ", label)
print("Label Name: ", class_names[label])

Label Index:  0
Label Name:  Chihuahua


In [None]:
from torch.utils.data import random_split

total_size = len(dataset)
train_size = int(0.8 * total_size)
val_size = int(0.1*total_size)
test_size = int(0.1*total_size)

train_dataset,_,_ = random_split(train_dataset, [train_size, val_size, test_size])
_, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size= 32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size= 32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size= 32, shuffle=False)

# Building and Training the Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import json

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained = True)

# Freezing all the layers in the model
for param in model.parameters():
  param.requires_grad = False

# Unfreezing last block
for param in model.layer4.parameters():
  param.requires_grad = True

for param in model.layer3.parameters():
  param.requires_grad = True

num_classes = len(dataset.classes)
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(model.fc.in_features, num_classes)
)

model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 192MB/s]


In [None]:
# Setting up loss function and optimizer

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam([
    {'params': model.layer3.parameters(), 'lr': 1e-5},   # small, safe updates
    {'params': model.layer4.parameters(), 'lr': 1e-4},   # moderate updates
    {'params': model.fc.parameters(),   'lr': 1e-4}      # faster learning for new layer
], weight_decay=1e-4)

In [None]:

epochs = 10

for epoch in range(epochs):
  model.train() # Enabling dropout, batch normalization

  running_loss = 0.0
  correct = 0
  total = 0

  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device) # Moving images to GPU if available
    optimizer.zero_grad() # Resets gradient from the previous iteration

    outputs = model(images) # Forward pass, images are processed through the model

    loss = loss_function(outputs, labels)
    loss.backward()

    optimizer.step()

    running_loss += loss.item()

    _, predicted = torch.max(outputs, 1)
    correct += (predicted == labels).sum().item()
    total += labels.size(0)


  # Switching to validation phase

  model.eval()

  with torch.no_grad():
    correct_val_top3 = 0
    correct_val_top1 = 0
    total_val = 0

    for images, labels in val_loader:
      images, labels = images.to(device), labels.to(device)

      outputs = model(images)

      # Gets the top 3 predictions
      _, top3 = torch.topk(outputs, k=3, dim=1)
      _, predicted = torch.max(outputs, 1)

      correct_val_top1 += (predicted == labels).sum().item()

      for i in range(labels.size(0)):
        if labels[i] in top3[i]:
          correct_val_top3 += 1

      total_val += labels.size(0)

    val_acc_top3 = correct_val_top3/total_val
    val_acc_top1 = correct_val_top1/total_val

  print(f"Epoch {epoch+1}/{epochs}")
  print(f"Train Accuracy: {correct/total:.4f}")
  print(f"Top 1 Validation Accuracy: {val_acc_top1:.4f}")
  print(f"Top 3 Validation Accuracy: {val_acc_top3:.4f}")

Epoch 1/10
Train Accuracy: 0.5166
Top 1 Validation Accuracy: 0.7362
Top 3 Validation Accuracy: 0.9329
Epoch 2/10
Train Accuracy: 0.7157
Top 1 Validation Accuracy: 0.8042
Top 3 Validation Accuracy: 0.9587
Epoch 3/10
Train Accuracy: 0.7650
Top 1 Validation Accuracy: 0.8392
Top 3 Validation Accuracy: 0.9694
Epoch 4/10
Train Accuracy: 0.7980
Top 1 Validation Accuracy: 0.8343
Top 3 Validation Accuracy: 0.9742
Epoch 5/10
Train Accuracy: 0.8172
Top 1 Validation Accuracy: 0.8513
Top 3 Validation Accuracy: 0.9742
Epoch 6/10
Train Accuracy: 0.8381
Top 1 Validation Accuracy: 0.8727
Top 3 Validation Accuracy: 0.9786
Epoch 7/10
Train Accuracy: 0.8475
Top 1 Validation Accuracy: 0.8858
Top 3 Validation Accuracy: 0.9820
Epoch 8/10
Train Accuracy: 0.8661
Top 1 Validation Accuracy: 0.8926
Top 3 Validation Accuracy: 0.9825
Epoch 9/10
Train Accuracy: 0.8763
Top 1 Validation Accuracy: 0.8950
Top 3 Validation Accuracy: 0.9845
Epoch 10/10
Train Accuracy: 0.8870
Top 1 Validation Accuracy: 0.9018
Top 3 Validat

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

all_preds, all_labels = [], []

model.eval()
with torch.no_grad():
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)

    all_preds.extend(predicted.cpu().numpy())
    all_labels.extend(labels.cpu().numpy())

print(confusion_matrix(all_labels, all_preds))
print(classification_report(all_labels, all_preds))

[[13  0  0 ...  0  0  0]
 [ 0 18  0 ...  0  0  0]
 [ 0  0 23 ...  0  0  0]
 ...
 [ 0  0  0 ... 19  0  0]
 [ 0  0  0 ...  1 14  0]
 [ 0  0  0 ...  0  0 20]]
              precision    recall  f1-score   support

           0       0.59      0.93      0.72        14
           1       1.00      0.95      0.97        19
           2       0.82      1.00      0.90        23
           3       1.00      0.83      0.91        12
           4       1.00      0.71      0.83        14
           5       1.00      1.00      1.00        22
           6       0.96      1.00      0.98        26
           7       1.00      0.96      0.98        23
           8       0.86      1.00      0.93        19
           9       1.00      0.96      0.98        24
          10       1.00      0.86      0.92        21
          11       0.74      0.95      0.83        21
          12       1.00      0.92      0.96        13
          13       1.00      0.94      0.97        16
          14       0.77      0.94

In [None]:
torch.save(model.state_dict(), 'model_weights.pth')

# Testing the model on my own samples


In [None]:
import torch.nn.functional as F
from PIL import Image


model = models.resnet50()
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(model.fc.in_features, num_classes)
)
model.load_state_dict(torch.load("model_weights.pth", map_location="cpu"))
model.eval()


class_names = dataset.classes

# Image transform for inference
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize any image to 224x224
    transforms.ToTensor(),
])


def predict_top3(image_path_or_pil):
    # Load image (from path or directly if PIL image passed)
    if isinstance(image_path_or_pil, str):
        image = Image.open(image_path_or_pil).convert("RGB")
    else:
        image = image_path_or_pil.convert("RGB")

    # Apply transforms
    input_tensor = transform(image).unsqueeze(0)

    with torch.no_grad():
        outputs = model(input_tensor)
        probabilities = F.softmax(outputs, dim=1)
        top3_probs, top3_indices = torch.topk(probabilities, k=3)

    # Format predictions
    results = []
    for i in range(3):
        label = class_names[top3_indices[0][i]]
        breed_name = label.split('-')[1]
        breed_name = breed_name.replace('_', ' ')
        breed = breed_name.title()
        confidence = top3_probs[0][i].item()
        results.append((breed, confidence))

    print(results)


[('Blenheim Spaniel', 0.9996905326843262), ('Japanese Spaniel', 0.00010945724352495745), ('Brittany Spaniel', 0.00010513477900531143)]
[('Chihuahua', 0.9975343942642212), ('Toy Terrier', 0.0014673855621367693), ('Miniature Pinscher', 0.0006605515372939408)]
[('Labrador Retriever', 0.4983520805835724), ('Golden Retriever', 0.2927256226539612), ('Kuvasz', 0.09886112809181213)]
[('Vizsla', 0.49344974756240845), ('Redbone', 0.3451319932937622), ('Rhodesian Ridgeback', 0.08466936647891998)]
[('Doberman', 0.4211139380931854), ('Redbone', 0.17528751492500305), ('Rhodesian Ridgeback', 0.12169906497001648)]
[('Redbone', 0.7397857904434204), ('Bloodhound', 0.0725398138165474), ('Beagle', 0.039827797561883926)]
[('Toy Poodle', 0.6697661876678467), ('Maltese Dog', 0.14959900081157684), ('Standard Poodle', 0.05148473381996155)]


# Uploading to Hugging Face

In [None]:
!pip install -q huggingface_hub

In [None]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Redefining the model

import torch
import torch.nn as nn
from torchvision import models

model = models.resnet50()

model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(model.fc.in_features, num_classes)
)

model.load_state_dict(torch.load("model_weights.pth", map_location="cpu"))
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
import os
import torch

repo_name = "resnet50-dog-breeds"
os.makedirs(repo_name, exist_ok=True)

# Save weights
torch.save(model.state_dict(), f"{repo_name}/pytorch_model.bin")

In [None]:
from huggingface_hub import create_repo, upload_folder

create_repo(repo_name, private=False)

upload_folder(
    folder_path=repo_name,
    repo_id="markosbont/" + repo_name,
    commit_message="Upload resnet50 dog breed classifier"
)

Uploading...:   0%|          | 0.00/95.3M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/markosbont/resnet50-dog-breeds/commit/e345c25a40ff72ecbe6b4169a2c9fa9425f86bd0', commit_message='Upload resnet50 dog breed classifier', commit_description='', oid='e345c25a40ff72ecbe6b4169a2c9fa9425f86bd0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/markosbont/resnet50-dog-breeds', endpoint='https://huggingface.co', repo_type='model', repo_id='markosbont/resnet50-dog-breeds'), pr_revision=None, pr_num=None)