Import necessary libraries

In [None]:
import os
import glob
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator


Download and extract the Caltech-256 dataset

In [None]:
!wget https://caltech256-bucket.s3.amazonaws.com/256_ObjectCategories.tar
!tar -xf 256_ObjectCategories.tar

--2023-05-25 13:26:47--  https://caltech256-bucket.s3.amazonaws.com/256_ObjectCategories.tar
Resolving caltech256-bucket.s3.amazonaws.com (caltech256-bucket.s3.amazonaws.com)... 3.5.25.92, 54.231.134.105, 52.217.39.60, ...
Connecting to caltech256-bucket.s3.amazonaws.com (caltech256-bucket.s3.amazonaws.com)|3.5.25.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1183006720 (1.1G) [application/x-tar]
Saving to: ‘256_ObjectCategories.tar’


2023-05-25 13:28:14 (13.1 MB/s) - ‘256_ObjectCategories.tar’ saved [1183006720/1183006720]



running ResNeXt50

In [None]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from PIL import Image
import torch.nn.functional as F

# Set the seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Define dataset_path and load the data
dataset_path = './256_ObjectCategories'
all_images = glob.glob(f'{dataset_path}/*/*.jpg')
all_labels = [os.path.basename(os.path.dirname(img)) for img in all_images]

# Train-test split (4:1 test_size=0.2)
train_images, test_images, train_labels, test_labels = train_test_split(all_images, all_labels, test_size=0.2, random_state=42, stratify=all_labels)

# Define custom dataset
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        labels = [int(label[:3])-1 for label in labels]
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations
img_size = 299  # ResNeXt requires input size of (299, 299)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    normalize
])

# Create custom datasets
train_dataset = CustomDataset(train_images, train_labels, transform=transform)
test_dataset = CustomDataset(test_images, test_labels, transform=transform)

# Create data loaders
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Create ResNeXt model
model = models.resnext50_32x4d(weights='ResNeXt50_32X4D_Weights.IMAGENET1K_V2')
num_classes = len(np.unique(all_labels))
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00005)

# Train the model
epochs = 10
for epoch in range(epochs):
    train_loss = 0.0
    train_accuracy = 0.0
    model.train()

    for batch_x, batch_y in train_loader:
        
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
       
        train_loss += loss.item()
        
        _, predicted = torch.max(outputs.data, 1)
       
        train_accuracy += (predicted == batch_y).sum().item()
    
    train_loss /= len(train_loader)
    train_accuracy /= len(train_dataset)

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f}")
    # Evaluate the model
    model.eval()
    test_loss = 0.0
    test_accuracy = 0.0

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)

            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_accuracy += (predicted == batch_y).sum().item()

        test_loss /= len(test_loader)
        test_accuracy /= len(test_dataset)


    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f}")
    print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f}")


Epoch 1/10
Train Loss: 2.1581 | Train Accuracy: 0.5855
Epoch 1/10
Train Loss: 2.1581 | Train Accuracy: 0.5855
Test Loss: 0.6250 | Test Accuracy: 0.8500
Epoch 2/10
Train Loss: 0.4676 | Train Accuracy: 0.9004
Epoch 2/10
Train Loss: 0.4676 | Train Accuracy: 0.9004
Test Loss: 0.4453 | Test Accuracy: 0.8873
Epoch 3/10
Train Loss: 0.1710 | Train Accuracy: 0.9631
Epoch 3/10
Train Loss: 0.1710 | Train Accuracy: 0.9631
Test Loss: 0.4359 | Test Accuracy: 0.8919
Epoch 4/10
Train Loss: 0.0784 | Train Accuracy: 0.9835
Epoch 4/10
Train Loss: 0.0784 | Train Accuracy: 0.9835
Test Loss: 0.4541 | Test Accuracy: 0.8894
Epoch 5/10
Train Loss: 0.0522 | Train Accuracy: 0.9887
Epoch 5/10
Train Loss: 0.0522 | Train Accuracy: 0.9887
Test Loss: 0.4936 | Test Accuracy: 0.8896
Epoch 6/10
Train Loss: 0.0431 | Train Accuracy: 0.9903
Epoch 6/10
Train Loss: 0.0431 | Train Accuracy: 0.9903
Test Loss: 0.4957 | Test Accuracy: 0.8878
Epoch 7/10
Train Loss: 0.0366 | Train Accuracy: 0.9910
Epoch 7/10
Train Loss: 0.0366 | T