In [2]:
!pip install torch torchvision timm --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m101.8 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# Kaggle already has torch & torchvision installed correctly
!pip install timm --quiet


**Srat Here**

In [4]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms, models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cuda


In [5]:
train_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.3, 0.3, 0.3, 0.1),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [6]:
dataset_root = "/kaggle/input/caltech/caltech-101/101_ObjectCategories/101_ObjectCategories"

full_dataset = datasets.ImageFolder(dataset_root)

bg_idx = full_dataset.class_to_idx.get("BACKGROUND_Google", None)

valid_indices = [
    i for i, (_, y) in enumerate(full_dataset.samples)
    if y != bg_idx
]

class_map = {}
new_idx = 0
for cls, old_idx in full_dataset.class_to_idx.items():
    if cls != "BACKGROUND_Google":
        class_map[old_idx] = new_idx
        new_idx += 1

class_names = [
    cls for cls in full_dataset.classes if cls != "BACKGROUND_Google"
]

class CaltechDataset(Dataset):
    def __init__(self, base_dataset, indices, transform):
        self.base = base_dataset
        self.indices = indices
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        img, label = self.base[self.indices[idx]]
        img = self.transform(img)
        label = class_map[label]
        return img, label

train_size = int(0.8 * len(valid_indices))
val_size = len(valid_indices) - train_size

train_idx, val_idx = random_split(valid_indices, [train_size, val_size])

train_ds = CaltechDataset(full_dataset, train_idx, train_tf)
val_ds = CaltechDataset(full_dataset, val_idx, val_tf)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)

num_classes = len(class_names)
print("Classes:", num_classes)


Classes: 101


In [7]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Freeze backbone
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 197MB/s]


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)

def train_epoch(loader):
    model.train()
    total, correct, loss_sum = 0, 0, 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item() * x.size(0)
        correct += (out.argmax(1) == y).sum().item()
        total += y.size(0)

    return loss_sum / total, correct / total

for epoch in range(8):
    loss, acc = train_epoch(train_loader)
    print(f"[Classifier] Epoch {epoch+1} - Loss: {loss:.4f} Acc: {acc:.4f}")


[Classifier] Epoch 1 - Loss: 2.3517 Acc: 0.5202
[Classifier] Epoch 2 - Loss: 0.9941 Acc: 0.7881
[Classifier] Epoch 3 - Loss: 0.7003 Acc: 0.8440
[Classifier] Epoch 4 - Loss: 0.5660 Acc: 0.8644
[Classifier] Epoch 5 - Loss: 0.5034 Acc: 0.8755
[Classifier] Epoch 6 - Loss: 0.4339 Acc: 0.8908
[Classifier] Epoch 7 - Loss: 0.4002 Acc: 0.8973
[Classifier] Epoch 8 - Loss: 0.3654 Acc: 0.9040


In [9]:
for param in model.layer4.parameters():
    param.requires_grad = True

optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(10):
    loss, acc = train_epoch(train_loader)
    print(f"[Fine-tune] Epoch {epoch+1} - Loss: {loss:.4f} Acc: {acc:.4f}")


[Fine-tune] Epoch 1 - Loss: 0.2589 Acc: 0.9255
[Fine-tune] Epoch 2 - Loss: 0.1514 Acc: 0.9568
[Fine-tune] Epoch 3 - Loss: 0.1101 Acc: 0.9719
[Fine-tune] Epoch 4 - Loss: 0.0896 Acc: 0.9735
[Fine-tune] Epoch 5 - Loss: 0.0729 Acc: 0.9817
[Fine-tune] Epoch 6 - Loss: 0.0549 Acc: 0.9867
[Fine-tune] Epoch 7 - Loss: 0.0628 Acc: 0.9839
[Fine-tune] Epoch 8 - Loss: 0.0483 Acc: 0.9869
[Fine-tune] Epoch 9 - Loss: 0.0448 Acc: 0.9872
[Fine-tune] Epoch 10 - Loss: 0.0338 Acc: 0.9922


In [10]:
torch.save({
    "model": model.state_dict(),
    "classes": class_names
}, "resnet18_caltech101_generalized.pth")

print("Model saved ✅")


Model saved ✅


In [11]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image

# -------------------------
# Device
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -------------------------
# Load trained model
# -------------------------
checkpoint = torch.load(
    "resnet18_caltech101_generalized.pth",
    map_location=device
)

class_names = checkpoint["classes"]
num_classes = len(class_names)

model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(checkpoint["model"])

model.to(device)
model.eval()

print("Model loaded successfully")
print("Number of classes:", num_classes)

# -------------------------
# Image Transform (MUST MATCH TRAINING)
# -------------------------
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# -------------------------
# Prediction Function
# -------------------------
def predict_image(image_path, topk=5):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image)
        probs = torch.softmax(outputs, dim=1)
        top_probs, top_idxs = probs.topk(topk)

    print(f"\nImage: {image_path}")
    print("Top predictions:")
    for p, idx in zip(top_probs[0], top_idxs[0]):
        print(f"{class_names[idx]} : {p.item()*100:.2f}%")

# -------------------------
# Example Usage
# -------------------------
predict_image("/kaggle/input/infer-image/CelingFan.jpg")


Device: cuda
Model loaded successfully
Number of classes: 101

Image: /kaggle/input/infer-image/CelingFan.jpg
Top predictions:
ceiling_fan : 100.00%
dragonfly : 0.00%
anchor : 0.00%
octopus : 0.00%
starfish : 0.00%


In [12]:
import os
print(os.listdir("/kaggle/working"))

['resnet18_caltech101_generalized.pth', '.virtual_documents']


In [13]:
from IPython.display import FileLink

# This generates a blue clickable link in the output area below the cell
FileLink(r'resnet18_caltech101_generalized.pth')