# Loading the Dataset

In [1]:
import torch
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader

In [2]:
class ImageDataset(Dataset):
    def __init__(self,image_dir,transform=None):
        self.image_dir=image_dir
        self.image_paths=[]
        self.labels=[]
        self.class_name={}
        self.transform=transform
        for label,class_dir in enumerate(os.listdir(image_dir)):
            self.class_name[label]=class_dir
            class_path= os.path.join(image_dir,class_dir)
            for image_name in os.listdir(class_path):
                self.image_paths.append(os.path.join(class_path,image_name))
                self.labels.append(label)
                
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path= self.image_paths[idx]
        img=Image.open(img_path).convert("RGB")
        label=self.labels[idx]
        if self.transform:
            img=self.transform(img)
        return img,label

In [3]:
transform=transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])])

In [None]:
train_image_dir='Classification_dataset_v3/images/train'
test_image_dir='Classification_dataset_v3/images/test'
train_image_dataset=ImageDataset(train_image_dir,transform)
test_image_dataset=ImageDataset(test_image_dir,transform)


In [5]:
train_image_loader=DataLoader(dataset=train_image_dataset,batch_size=32,shuffle=True)
test_image_loader=DataLoader(dataset=test_image_dataset,batch_size=32,shuffle=True)

# Custom CNN Architecture

In [6]:
import torch.nn as nn 
import torch.optim as optim

In [11]:
class CustomCnnModel(nn.Module):
    def __init__(self,input_dim,num_classes):
        super(CustomCnnModel,self).__init__()
        self.input_dim=input_dim
        self.num_classes=num_classes
        self.conv_layers=nn.Sequential(
            #C1
            nn.Conv2d(3,32,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #C2
            nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #C3
            nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #C4
            nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self._to_linear=None
        self._get_conv_output(self.input_dim)
        
        self.fc_layer=nn.Sequential(
            nn.Linear(self._to_linear,512),
            nn.ReLU(),
            nn.Linear(512,128),
            nn.ReLU(),
            nn.Linear(128,self.num_classes)
        )

    def _get_conv_output(self,input_dim=128):
        with torch.no_grad():
            dummy_input=torch.zeros(1,3,input_dim,input_dim)
            output=self.conv_layers(dummy_input)
            self._to_linear=output.view(1,-1).size(1)
            


    def forward(self,x):
        x=self.conv_layers(x)
        x=x.view(x.size(0),-1)
        x=self.fc_layer(x)
        return x


In [12]:
#Initialize Model
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=CustomCnnModel(input_dim=128,num_classes=3).to(device)

In [13]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)

In [14]:
#training loop
epochs=10
for epoch in range(epochs):
    model.train()
    running_loss=0.0
    for images,labels in train_image_loader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
    print(f'epoch {epoch+1}/{epochs}, current loss: {running_loss/len(train_image_loader)}')

    

epoch 1/10, current loss: 1.1506975130030983
epoch 2/10, current loss: 0.6446014881134033
epoch 3/10, current loss: 0.5558558727565565
epoch 4/10, current loss: 0.5100406712607334
epoch 5/10, current loss: 0.4605522761219426
epoch 6/10, current loss: 0.41340242012550954
epoch 7/10, current loss: 0.3731941525873385
epoch 8/10, current loss: 0.3252238731635244
epoch 9/10, current loss: 0.3270696421987132
epoch 10/10, current loss: 0.2917503323994185


In [15]:
torch.save(model.state_dict(),"cnn_model.pth")

In [16]:
#Evaluate model
model.eval()
correct=0
total=0
with torch.no_grad():
    for images,labels in test_image_loader:
        images,labels=images.to(device),labels.to(device)
        outputs=model(images)
        _,predicted=torch.max(outputs,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()
print(f"test accuracy is: {100* correct/total:.2f}%")


test accuracy is: 68.00%


In [17]:
import cv2

In [18]:
class ImageClassifier:
    def __init__(self,model_path,class_names):
        self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model=CustomCnnModel(input_dim=128,num_classes=3).to(device)
        self.model.load_state_dict(torch.load(model_path,map_location=self.device))
        self.model.eval()
        self.class_names=class_names
        self.transform=transforms.Compose([
            transforms.Resize((128,128)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])])
    
    def predict(self,image_path):
        img=Image.open(image_path).convert("RGB")
        input_tensor=self.transform(img).unsqueeze(0).to(self.device)
        with torch.no_grad():
            output=self.model(input_tensor)
            _,predicted=torch.max(output,1)
        label=self.class_names[predicted.item()]
        img=cv2.imread(image_path)
        cv2.putText(img,label,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2)
        cv2.imwrite("output_image.jpg",img)
        return label
        



In [19]:
import os
print(os.getcwd())

c:\Users\ragha\OneDrive\Desktop\CV COURSE\pytorch


In [20]:
classifier=ImageClassifier("cnn_model.pth",train_image_dataset.class_name)
label=classifier.predict("Classification_dataset_v3/images/test/Cat/cat_1005.jpg")
print(label)

Cat
