In [4]:
!pip install pillow



In [211]:
import numpy as np
from PIL import Image

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim

import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


In [212]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [213]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    # Define mean and std for normalization
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

In [214]:
train_data = torchvision.datasets.CIFAR10(root='/data',train = True ,transform=transform,download = True)
test_data = torchvision.datasets.CIFAR10(root='/data',train = False ,transform=transform,download = True)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 10, shuffle=True, num_workers = 2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = 10, shuffle=False, num_workers = 2)

In [215]:
image, label = train_data[0]

In [216]:
image.shape

torch.Size([3, 32, 32])

In [256]:
class_name = ['plane', 'car', 'bird', 'cat', 'dog', 'frog', 'horse', 'ship', 'truck']


class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Input shape: (3, 32, 32) - assuming CIFAR-10
        self.conv1 = nn.Conv2d(3, 12, 5)       # Output: (12, 28, 28)
        self.pool = nn.MaxPool2d(2, 2)         # Output: (12, 14, 14)
        self.conv2 = nn.Conv2d(12, 16, 5)      # Output: (16, 10, 10) -> then pooled to (16, 5, 5)

        # Flattened shape = 16 * 5 * 5 = 400
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)          # 10 classes for CIFAR-10

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # (12, 14, 14)
        x = self.pool(F.relu(self.conv2(x)))   # (16, 5, 5)
        x = torch.flatten(x, 1)                # Flatten except batch dim
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)                        # Raw output logits
        return x


In [218]:
net = NeuralNet()
loss_fuction = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001,momentum = 0.9)

In [219]:
for epoch in range(25):
    print(f"Training epoch {epoch}....")

    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data


        optimizer.zero_grad()

        outputs = net(inputs)
        loss = loss_fuction(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f"Loss :  {running_loss/ len(train_loader)}..:4f ")

Training epoch 0....
Loss :  1.7520251557111741..:4f 
Training epoch 1....
Loss :  1.3869882946550847..:4f 
Training epoch 2....
Loss :  1.2432738650500774..:4f 
Training epoch 3....
Loss :  1.1636890930354595..:4f 
Training epoch 4....
Loss :  1.1025043841332198..:4f 
Training epoch 5....
Loss :  1.0542764886647462..:4f 
Training epoch 6....
Loss :  1.011657535135746..:4f 
Training epoch 7....
Loss :  0.9823353225022554..:4f 
Training epoch 8....
Loss :  0.9525776308715344..:4f 
Training epoch 9....
Loss :  0.9296823762834072..:4f 
Training epoch 10....
Loss :  0.912336868211627..:4f 
Training epoch 11....
Loss :  0.8938086249530316..:4f 
Training epoch 12....
Loss :  0.8778356862187385..:4f 
Training epoch 13....
Loss :  0.8671634325325489..:4f 
Training epoch 14....
Loss :  0.8564256098523736..:4f 
Training epoch 15....
Loss :  0.8463052444770932..:4f 
Training epoch 16....
Loss :  0.8317365749612451..:4f 
Training epoch 17....
Loss :  0.8308298120498657..:4f 
Training epoch 18....


In [257]:
torch.save(net.state_dict(),"trained_net.pth")

In [258]:
net = NeuralNet()

net.load_state_dict(torch.load("trained_net.pth"))

<All keys matched successfully>

In [259]:
correct = 0
total = 0
net.eval()

with torch.no_grad():
  for data in test_loader:
    images, labels = data
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

accurcy = 100 * correct /total

print(f'Accurcy : {accurcy}%')

Accurcy : 68.3%


In [268]:

new_transform = transforms.Compose([
    transforms.Resize((32, 32)), # Resize to 32x32, the size used for training
    transforms.ToTensor(),
    # Keep the same normalization used during training
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

# Assuming image_paths is defined elsewhere and contains the paths to your images
image_paths = ["/content/images/plan.jpg","/content/images/Dog01.jpg"]
images = [load_image(img) for img in image_paths]


In [269]:
net.eval()
with torch.no_grad():
  # Iterate through the list of loaded images
  for image in images:
    outputs = net(image)

    _, predicted = torch.max(outputs, 1)
    # Print the predicted class for each image
    print("Predicted class:", class_name[predicted.item()])

Predicted class: plane
Predicted class: dog
