In [19]:
import torch
from torchvision import datasets, transforms
import pandas as pd

In [20]:
class ImageFolderWithCategories(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithCategories, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (int(path.split('\\')[1]),))
        return tuple_with_path

In [123]:
# Define a transform to normalize the data

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomCrop(25),
    #We are normalizing the input tensor with a mean of 0.5 and a standard deviation of 0.5. This scales the pixel values to be between -1 and 1, which can help with training stability.
    transforms.Normalize((0.5,), (0.5,))
])

trainset = ImageFolderWithCategories('Train', transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=48)

In [124]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(128 * 3 * 3, 512)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(512, 1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)
        
        x = x.view(x.size(0), -1)

        
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        
        return x

net = Net(num_classes=42)

In [130]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10

for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    i = 0
    for images,labels, category in trainloader:
        
        optimizer.zero_grad()
        outputs = net(images)
        category = category.float()
        loss = criterion(outputs, torch.tensor(category.tolist(), requires_grad=True))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        i = i + 1
        
    #print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(trainloader)}")

#print('Finished Training')


torch.Size([48, 1])
torch.Size([48])


RuntimeError: expected scalar type Long but found Float

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set: {100 * correct / total:.2f}%")