In [1]:
import torch.nn as nn
import torch
from torch.utils.data import Dataset

In [2]:

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

GPU: NVIDIA GeForce GTX 1650 is available.


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

root_dir = 'raw-img/raw-img'

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
dataset = datasets.ImageFolder(root=root_dir, transform=transform)
print(dataset.class_to_idx)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=True)

img = next(iter(train_loader))
print(img[0].shape)

{'Butterfly': 0, 'Cat': 1, 'Cow': 2, 'Dog': 3, 'Elephant': 4, 'Hen': 5, 'Horse': 6, 'Sheep': 7, 'Spider': 8, 'Squirell': 9}
torch.Size([16, 3, 256, 256])


In [5]:
torch.squeeze(img[0], dim=0).shape

torch.Size([16, 3, 256, 256])

In [12]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64*32*32, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))

        x = x.view(-1, 64*32*32)
        
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

In [7]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

In [8]:
from torchmetrics import Accuracy, Precision, Recall
accuracy = Accuracy(task='multiclass', num_classes=10, average='macro').to(device)
precision = Precision(task='multiclass',num_classes=10, average='macro').to(device)
recall = Recall(task='multiclass',num_classes=10, average='macro').to(device)

In [None]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
model = Net(num_classes=10)
model.to(device)
optimizer = Adam(model.parameters(), lr=0.001)
criterion = CrossEntropyLoss()
model.apply(init_weights)

for epoch in range(10):
    model.train()
    train_loss = 0.0
    for img, label in train_loader:
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        prec = precision(output, label)
        rec = recall(output, label)
        acc = accuracy(output, label)
    print(f'Epoch: {epoch}, Precision: {prec}, Recall: {rec}, Accuracy: {acc}')
    print(f'Epoch: {epoch}, train_loss: {train_loss/len(train_loader)}')

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for img, label in test_loader:
            img = img.to(device)
            label = label.to(device)
            output = model(img)
            acc = accuracy(output, label)
            prec = precision(output, label)
            rec = recall(output, label)
            val_loss += criterion(output, label).item()
    acc_score = accuracy.compute()
    prec_score = precision.compute()
    rec_score = recall.compute()
    print(f'Epoch: {epoch}, val_acc: {acc_score}, val_prec: {prec_score}, val_rec: {rec_score}')
    print(f'Epoch: {epoch}, val_loss: {val_loss/len(test_loader)}')

KeyboardInterrupt: 

Accuracy: 0.10000000149011612, Precision: 0.007830405607819557, Recall: 0.10000000149011612
