In [1]:
!pip install torch==2.3.1 torchvision==0.18.1

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.1)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.1)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.1)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.3.1)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.3.1)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch==2.3.1)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose, ToTensor, Resize
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

# Define transforms
transform = Compose([
    Resize((128, 128)),  # Resize images to a more manageable size
    ToTensor()
])

# Load CIFAR-10 dataset
dataset = CIFAR10(root='data/cifar10', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=2)  # Use multiple workers

# Define YOLO-like model for CIFAR-10
class C2f(nn.Module):
    def __init__(self, in_channels, out_channels, shortcut=True):
        super(C2f, self).__init__()
        self.shortcut = shortcut
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.shortcut:
            out += identity
        return F.relu(out)

class YOLOv8(nn.Module):
    def __init__(self, num_classes=10):
        super(YOLOv8, self).__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(128, 128),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(256, 256),
            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(512, 512),
        )

        self.head = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(256, 256),
            nn.Conv2d(256, num_classes, kernel_size=1)  # Output channels should be num_classes
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        x = x.view(x.size(0), -1)  # Flatten the output for classification
        return x

# Initialize model
model = YOLOv8(num_classes=10)  # CIFAR-10 has 10 classes
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, dataloader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        all_labels = []
        all_preds = []
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)

            # Compute loss
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())

        # Print average loss and accuracy
        avg_loss = running_loss / len(dataloader)
        accuracy = accuracy_score(all_labels, all_preds)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

# Train the model
train(model, dataloader, criterion, optimizer)


Files already downloaded and verified
Epoch [1/5], Loss: 1.7194, Accuracy: 0.3774
Epoch [2/5], Loss: 1.1789, Accuracy: 0.5776
Epoch [3/5], Loss: 0.8534, Accuracy: 0.7012
Epoch [4/5], Loss: 0.6738, Accuracy: 0.7639
Epoch [5/5], Loss: 0.5363, Accuracy: 0.8126


Below is Much Better

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose, ToTensor, Resize, Normalize
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

# Define transforms
transform = Compose([
    Resize((128, 128)),  # Resize images to a more manageable size
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize images
])

# Load CIFAR-10 dataset
dataset = CIFAR10(root='data/cifar10', train=True, download=True, transform=transform)

# Split dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

# Define YOLO-like model for CIFAR-10
class C2f(nn.Module):
    def __init__(self, in_channels, out_channels, shortcut=True):
        super(C2f, self).__init__()
        self.shortcut = shortcut
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.shortcut:
            out += identity
        return F.relu(out)

class YOLOv8(nn.Module):
    def __init__(self, num_classes=10):
        super(YOLOv8, self).__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(128, 128),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(256, 256),
            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(512, 512),
        )

        self.head = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            C2f(256, 256),
            nn.Conv2d(256, num_classes, kernel_size=1)  # Output channels should be num_classes
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        x = x.view(x.size(0), -1)  # Flatten the output for classification
        return x

# Initialize model
model = YOLOv8(num_classes=10)  # CIFAR-10 has 10 classes
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

def train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler, epochs=5):
    best_accuracy = 0.0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_preds = []
        for images, labels in train_dataloader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)

            # Compute loss
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())

        # Update learning rate
        scheduler.step()

        # Validate model
        model.eval()
        val_labels = []
        val_preds = []
        with torch.no_grad():
            for images, labels in val_dataloader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                val_labels.extend(labels.cpu().numpy())
                val_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())

        # Calculate accuracy
        avg_loss = running_loss / len(train_dataloader)
        train_accuracy = accuracy_score(all_labels, all_preds)
        val_accuracy = accuracy_score(val_labels, val_preds)

        print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Save the model with the best validation accuracy
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_yolov8_cifar10.pth')
            print('Model saved with best validation accuracy')

# Train the model
train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler)


Files already downloaded and verified
Epoch [1/5], Loss: 1.7701, Train Accuracy: 0.3539, Val Accuracy: 0.5200
Model saved with best validation accuracy
Epoch [2/5], Loss: 1.2001, Train Accuracy: 0.5675, Val Accuracy: 0.6104
Model saved with best validation accuracy
Epoch [3/5], Loss: 0.9336, Train Accuracy: 0.6692, Val Accuracy: 0.6507
Model saved with best validation accuracy
Epoch [4/5], Loss: 0.7540, Train Accuracy: 0.7341, Val Accuracy: 0.7321
Model saved with best validation accuracy
Epoch [5/5], Loss: 0.6165, Train Accuracy: 0.7847, Val Accuracy: 0.7399
Model saved with best validation accuracy


In [11]:
import torch

def save_model(model, model_path):
    torch.save(model.state_dict(), model_path)
    print(f"Model saved as {model_path}")

# Example usage after training the model
model = YOLOv8(num_classes=10)
# Assuming you've trained the model and it's ready to be saved
save_model(model, 'yolov8_cifar10_new.pth')


Model saved as yolov8_cifar10_new.pth


In [18]:
import torch
import torchvision.transforms as transforms
from PIL import Image

# Define CIFAR-10 class labels
class_labels = [
    'Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck'
]

def load_and_preprocess_image(image_path):
    # Define preprocessing transformations
    transform = transforms.Compose([
        transforms.Resize((128, 128)),  # Resize to 128x128
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize images
    ])

    # Load image
    image = Image.open(image_path).convert('RGB')  # Ensure image is in RGB format
    # Apply transformations
    image = transform(image)
    # Add batch dimension
    image = image.unsqueeze(0)
    return image

def load_model(model_path, num_classes=10):
    model = YOLOv8(num_classes=num_classes)
    model.load_state_dict(torch.load(model_path))
    model.eval()  # Set the model to evaluation mode
    return model

def predict_image(image_path, model, device):
    image = load_and_preprocess_image(image_path)
    image = image.to(device)
    model.to(device)
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        outputs = model(image)
        pred = torch.argmax(outputs, dim=1)
    return pred.item()

# Example usage
if __name__ == "__main__":
    image_path = '/content/ship.jpg'
    model_path = 'best_yolov8_cifar10.pth'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load the model
    model = load_model(model_path, num_classes=10)

    # Predict
    predicted_class_index = predict_image(image_path, model, device)

    # Get class label
    predicted_class_label = class_labels[predicted_class_index]

    print(f"Predicted class index: {predicted_class_index}")
    print(f"Predicted class label: {predicted_class_label}")


Predicted class index: 8
Predicted class label: Ship
