In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import transforms
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm
import random
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def delete_all_in_folder(folder_path):
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print(f'Failed to delete {file_path}. Reason: {e}')
    else:
        print(f'The folder {folder_path} does not exist.')
delete_all_in_folder('/kaggle/working/')

In [None]:
def set_seed(seed_value: int):
    """
    Set the seed for reproducibility in Python, NumPy, and PyTorch.
    Args:
    - seed_value (int): The seed value to use for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_value = 42
set_seed(seed_value)

In [None]:
class CIFAR100Dataset(Dataset):
    def __init__(self, folder, transform=None):
        self.folder = folder
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(folder))
        for label, class_name in enumerate(self.classes):
            class_folder = os.path.join(folder, class_name)
            for file in os.listdir(class_folder):
                if file.endswith((".jpg", ".png")):
                    self.image_paths.append(os.path.join(class_folder, file))
                    self.labels.append(label)
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label = self.labels[index]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label
class ResNet50ForCIFAR10(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet50ForCIFAR10, self).__init__()
        self.resnet = models.resnet50(pretrained=False)
        self.resnet.fc = nn.Sequential(
            nn.Linear(self.resnet.fc.in_features, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    def forward(self, x):
        return self.resnet(x)

In [None]:
def plot_random_images_from_dataset(dataset, num_images=10):
    indices = random.sample(range(len(dataset)), num_images)
    class_names = dataset.classes
    plt.figure(figsize=(15, 8))
    for i, idx in enumerate(indices):
        image, label = dataset[idx]
        image = image.permute(1, 2, 0)
        plt.subplot(2, 5, i+1)
        plt.imshow(image)
        plt.title(f"Class: {class_names[label]}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()
def plot_predictions(model, dataset, num_images=10):
    model.eval()
    indices = random.sample(range(len(dataset)), num_images)
    class_names = dataset.classes
    plt.figure(figsize=(15, 8))
    with torch.no_grad():
        for i, idx in enumerate(indices):
            image, true_label = dataset[idx]
            image_for_model = image.unsqueeze(0).to(device)
            outputs = model(image_for_model)
            _, predicted_label = torch.max(outputs, 1)
            image = image.permute(1, 2, 0)
            plt.subplot(2, 5, i+1)
            plt.imshow(image)
            plt.title(f"True: {class_names[true_label]}\nPred: {class_names[predicted_label.item()]}")
            plt.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
def train_and_val(model, train_loader, val_loader, criterion, optimizer, dataset, num_epochs=25, save_path='best_model.pth'):
    best_val_acc = 0.0
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        model.train()
        running_loss = 0.0
        running_corrects = 0
        train_bar = tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}", leave=False)
        for inputs, labels in train_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            current_loss = running_loss / len(train_loader.dataset)
            current_acc = running_corrects.double() / len(train_loader.dataset)
            train_bar.set_postfix(Loss=f"{current_loss:.4f}", Accuracy=f"{current_acc:.4f}")
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        print(f"Training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
        model.eval()
        val_loss = 0.0
        val_corrects = 0
        val_bar = tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}", leave=False)
        with torch.no_grad():
            for inputs, labels in val_bar:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data)
                current_val_loss = val_loss / len(val_loader.dataset)
                current_val_acc = val_corrects.double() / len(val_loader.dataset)
                val_bar.set_postfix(Val_Loss=f"{current_val_loss:.4f}", Val_Accuracy=f"{current_val_acc:.4f}")
        val_loss /= len(val_loader.dataset)
        val_acc = val_corrects.double() / len(val_loader.dataset)
        print(f"Validation Loss: {val_loss:.4f} Acc: {val_acc:.4f}")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), save_path)
            print(f"Best model saved with accuracy: {best_val_acc:.4f}")
def test(model, test_loader, criterion, model_path='best_model.pth'):
    model.load_state_dict(torch.load(model_path))
    model.eval()
    test_loss = 0.0
    test_corrects = 0
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Testing", leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            test_loss += loss.item() * inputs.size(0)
            test_corrects += torch.sum(preds == labels.data)
            tqdm.write(f"Test Loss: {test_loss / len(test_dataset):.4f} Test Acc: {test_corrects.double() / len(test_dataset):.4f}", end="\r")
    test_loss /= len(test_dataset)
    test_acc = test_corrects.double() / len(test_dataset)
    print(f"Test Loss: {test_loss:.4f} Acc: {test_acc:.4f}")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
    transforms.Resize((225, 225)),
    transforms.ToTensor(),
])
train_dataset = CIFAR100Dataset(folder='/kaggle/input/cifar10/cifar10/train', transform=transform)
test_dataset = CIFAR100Dataset(folder='/kaggle/input/cifar10/cifar10/test', transform=transform)
train_idx, val_idx = train_test_split(
    list(range(len(train_dataset))), test_size=0.20, stratify=train_dataset.labels
)
train_set = Subset(train_dataset, train_idx)
val_set = Subset(train_dataset, val_idx)
print(f'Training set size: {len(train_set)}')
print(f'Validation set size: {len(val_set)}')
print(f'Test set size: {len(test_dataset)}')
batch_size = 64
num_workers = 8
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
model = ResNet50ForCIFAR10().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

In [None]:
plot_random_images_from_dataset(train_dataset)

In [None]:
train_and_val(model, train_loader, val_loader, criterion, optimizer, train_dataset, num_epochs=50, save_path='/kaggle/working/best_model.pth')

In [None]:
test(model, test_loader, criterion, model_path='/kaggle/working/best_model.pth')