In [1]:
!pip install -q kaggle timm albumentations opencv-python matplotlib grad-cam tqdm


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/7.8 MB[0m [31m62.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.8/7.8 MB[0m [31m133.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m94.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for grad-cam (pyproject.toml) ... [?25l[?25hdone


In [2]:
from google.colab import files
files.upload()  # Upload kaggle.json when prompted


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"ashwinipanada","key":"27a6c02a4501e2c0f7c59026c6778477"}'}

In [3]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [4]:
!kaggle datasets download -d tongpython/cat-and-dog
!unzip -q cat-and-dog.zip -d data/


Dataset URL: https://www.kaggle.com/datasets/tongpython/cat-and-dog
License(s): CC0-1.0
Downloading cat-and-dog.zip to /content
 71% 155M/218M [00:00<00:00, 1.62GB/s]
100% 218M/218M [00:00<00:00, 983MB/s] 


In [8]:
import os

print(os.listdir("data/training_set/training_set"))
print("Cats images:", len(os.listdir("data/training_set/training_set/cats")))
print("Dogs images:", len(os.listdir("data/training_set/training_set/dogs")))

print(os.listdir("data/test_set/test_set"))



['dogs', 'cats']
Cats images: 4001
Dogs images: 4006
['dogs', 'cats']


In [11]:
import os
import torch
import timm
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# ------------------- Config -------------------
DEVICE    = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE  = 320
BATCH     = 32
EPOCHS    = 8
DATA_ROOT = "data/training_set/training_set"

# ------------------- Dataset Class -------------------
class CatDogDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        img = self.transform(img)
        label = torch.tensor(self.labels[idx])
        return img, label

# ------------------- Load Images -------------------
def load_data():
    image_paths = []
    labels = []

    for folder_name, class_id in [("cats", 0), ("dogs", 1)]:
        folder_path = os.path.join(DATA_ROOT, folder_name)
        for file in os.listdir(folder_path):
            if file.lower().endswith((".jpg", ".jpeg", ".png")):
                image_paths.append(os.path.join(folder_path, file))
                labels.append(class_id)

    return image_paths, labels

# ------------------- Transforms -------------------
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

# ------------------- Prepare Dataset -------------------
paths, labels = load_data()
print("Total Images:", len(paths))

X_train, X_val, y_train, y_val = train_test_split(
    paths, labels, test_size=0.15, stratify=labels
)

train_dataset = CatDogDataset(X_train, y_train, train_transform)
val_dataset   = CatDogDataset(X_val, y_val, val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=BATCH)

# ------------------- Model -------------------
model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=2)
model.to(DEVICE)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

# ------------------- Training Loop -------------------
for epoch in range(EPOCHS):
    model.train()
    progress = tqdm(train_loader)

    for images, targets in progress:
        images, targets = images.to(DEVICE), targets.to(DEVICE)

        outputs = model(images)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        progress.set_description(f"Epoch {epoch+1}")
        progress.set_postfix(loss=loss.item())

    # ---------- Validation ----------
    model.eval()
    correct = 0

    with torch.no_grad():
        for images, targets in val_loader:
            images, targets = images.to(DEVICE), targets.to(DEVICE)
            preds = model(images).argmax(1)
            correct += (preds == targets).sum().item()

    accuracy = correct / len(val_dataset)
    print(f"Epoch {epoch+1} Validation Accuracy: {accuracy:.4f}")

# ------------------- Save Model -------------------
torch.save(model.state_dict(), "cat_dog_model.pth")
print("✅ Model saved as cat_dog_model.pth")


Total Images: 8005


Epoch 1: 100%|██████████| 213/213 [01:39<00:00,  2.14it/s, loss=0.57]


Epoch 1 Validation Accuracy: 0.9833


Epoch 2: 100%|██████████| 213/213 [01:37<00:00,  2.19it/s, loss=0.675]


Epoch 2 Validation Accuracy: 0.9842


Epoch 3: 100%|██████████| 213/213 [01:38<00:00,  2.16it/s, loss=0.456]


Epoch 3 Validation Accuracy: 0.9842


Epoch 4: 100%|██████████| 213/213 [01:37<00:00,  2.18it/s, loss=0.0123]


Epoch 4 Validation Accuracy: 0.9808


Epoch 5: 100%|██████████| 213/213 [01:37<00:00,  2.17it/s, loss=0.109]


Epoch 5 Validation Accuracy: 0.9850


Epoch 6: 100%|██████████| 213/213 [01:37<00:00,  2.18it/s, loss=0.101]


Epoch 6 Validation Accuracy: 0.9892


Epoch 7: 100%|██████████| 213/213 [01:37<00:00,  2.18it/s, loss=0.0977]


Epoch 7 Validation Accuracy: 0.9942


Epoch 8: 100%|██████████| 213/213 [01:37<00:00,  2.18it/s, loss=0.00376]


Epoch 8 Validation Accuracy: 0.9892
✅ Model saved as cat_dog_model.pth


In [12]:
!pip install -q grad-cam


In [23]:
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
import cv2

def predict(image_path):
    model.eval()

    img = Image.open(image_path).convert("RGB")
    input_tensor = val_tf(img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        outputs = model(input_tensor)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        class_id = torch.argmax(probs).item()
        confidence = probs[0][class_id].item()

    # Class label
    label = "Cat" if class_id == 0 else "Dog"

    # ✅ ADD THIS PART (confidence threshold)
    if confidence < 0.85:
        label = "Unknown"

    # Print result
    print(f"Prediction: {label}")
    print(f"Confidence: {confidence:.4f}")

    target_layer = model.conv_head if hasattr(model, "conv_head") else list(model.children())[-2]
    cam = GradCAM(model=model, target_layers=[target_layer])

    targets = [ClassifierOutputTarget(class_id)]
    grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0]

    rgb_img = np.array(img.resize((IMG_SIZE, IMG_SIZE))).astype(np.float32) / 255.0
    cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

    cv2.imwrite("cam_output.jpg", cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR))
    print("✅ Heatmap saved as cam_output.jpg")


In [24]:
from google.colab import files
uploaded = files.upload()

for name in uploaded.keys():
    print("Uploaded:", name)


Saving 1.jpg to 1.jpg
Uploaded: 1.jpg


In [25]:
predict("1.jpg")


Prediction: Cat
Confidence: 0.8878
✅ Heatmap saved as cam_output.jpg
