In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset
import torchvision.transforms as transforms
from PIL import Image
import os

In [7]:
class ImageDataSet(Dataset):
    def __init__(self,image_dir,transform=None):
        self.image_dir=image_dir
        self.transform=transform
        self.class_names={}
        self.image_paths=[]
        self.labels=[]
        for label,class_name in enumerate(os.listdir(self.image_dir)):
            self.class_names[label]=class_name
            for img_path in os.listdir(os.path.join(self.image_dir,class_name)):
                self.image_paths.append(os.path.join(self.image_dir,class_name,img_path))
                self.labels.append(label)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, index):
        img_path=self.image_paths[index]
        label=self.labels[index]
        img=Image.open(img_path).convert('RGB')
        if self.transform:
            img=self.transform(img)
        return img,label

In [8]:
transform=transforms.Compose([transforms.Resize((227,227)),transforms.ToTensor(),transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])])

In [9]:
train_dir='flowersDataset/train'
test_dir='flowersDataset/test'

In [10]:
## turns out we could use torchvision.dataset.ImageFolder

In [11]:
train_dataset=ImageDataSet(train_dir,transform)
test_dataset=ImageDataSet(test_dir,transform)

In [12]:
len(test_dataset)


182

In [13]:
train_loader=DataLoader(train_dataset,32,True)
test_loader=DataLoader(test_dataset,32,False)

In [14]:
len(test_loader)

6

## AlexNet Architecture

In [15]:
class AlexNet(nn.Module):
    def __init__(self,num_classes):
        super(AlexNet,self).__init__()
        self.convLayers=nn.Sequential(
            nn.Conv2d(3,96,kernel_size=11,stride=4),
            nn.BatchNorm2d(96),
            nn.MaxPool2d(3,2),
            nn.ReLU(),
            nn.Conv2d(96,256,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(3,2),
            nn.ReLU(),
            nn.Conv2d(256,384,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(384,384,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(384,256,kernel_size=3,stride=1,padding=1),
            nn.MaxPool2d(3,2),
            nn.ReLU()
        )
        self.fcLayers=nn.Sequential(
            nn.Linear(256*6*6,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,num_classes)
        )

    def forward(self,x):
        x=self.convLayers(x)
        x=x.view(-1,256*6*6)
        x=self.fcLayers(x)
        return x
        



In [16]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=AlexNet(len(train_dataset.class_names)).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.01)

In [17]:
#training loop
epochs=3
train_losses,test_accuracies=[],[]
for epoch in range(epochs):
    model.train()
    running_loss=0.0
    for images,labels in train_loader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        running_loss+=loss.item()
        loss.backward()
        optimizer.step()
    avg_loss=running_loss/len(train_loader)
    train_losses.append(avg_loss)
    print(f'epoch {epoch+1}/{epochs}, loss: {avg_loss:.2f}')

    model.eval()
    correct,total=0,0
    with torch.no_grad():
        for images,labels in test_loader:
            images,labels=images.to(device),labels.to(device)
            outputs=model(images)
            _,predictions=torch.max(outputs,1)
            total+=labels.size(0)
            correct+=(predictions==labels).sum().item()
        accuracy=100*correct/total
        test_accuracies.append(accuracy)
        print(f'Accuracy on test set:{accuracy:.2f}')


epoch 1/3, loss: 8926.41
Accuracy on test set:57.69
epoch 2/3, loss: 1.00
Accuracy on test set:57.69
epoch 3/3, loss: 0.72
Accuracy on test set:42.31


In [28]:
def predict(image_path):
    img=Image.open(image_path)
    image=transform(img).unsqueeze(0)
    image=image.to(device)
    model.eval()
    with torch.no_grad():
        prediction=model(image)
        _,label=torch.max(prediction,1)
    print(f'the flower is a {train_dataset.class_names[label.item()]}')


In [22]:
import os
os.getcwd()

'c:\\Users\\ragha\\OneDrive\\Desktop\\CV COURSE\\image classification'

In [29]:
predict('flowersDataset/test/daisy/476857510_d2b30175de_n_jpg.rf.40ff83ae9c6f996b11149eaf1eafcc2e.jpg')

the flower is a daisy
