In [1]:
import torch
import torchvision
from torchvision import datasets, transforms
from imutils import paths
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

import os
import cv2
import numpy as np

In [2]:
data_path= './'
dataset =  datasets.Caltech101(data_path, download=True)

Downloading...
From (original): https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp
From (redirected): https://drive.usercontent.google.com/download?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp&confirm=t&uuid=f1720424-9f93-46fd-87f9-b65544b0f254
To: /content/caltech101/101_ObjectCategories.tar.gz
100%|██████████| 132M/132M [00:00<00:00, 174MB/s]


Extracting ./caltech101/101_ObjectCategories.tar.gz to ./caltech101


Downloading...
From (original): https://drive.google.com/uc?id=175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m
From (redirected): https://drive.usercontent.google.com/download?id=175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m&confirm=t&uuid=4b621762-4135-406c-9c1c-bb9cbeb64c83
To: /content/caltech101/Annotations.tar
100%|██████████| 14.0M/14.0M [00:00<00:00, 220MB/s]


Extracting ./caltech101/Annotations.tar to ./caltech101


In [3]:
# prompt: List all image path whose label is not BACKGROUND_Google

image_paths = list(paths.list_images('/content/caltech101/101_ObjectCategories'))
print(image_paths[:5])
image_paths_filtered = [path for path in image_paths if 'BACKGROUND_Google' not in path]

['/content/caltech101/101_ObjectCategories/dollar_bill/image_0010.jpg', '/content/caltech101/101_ObjectCategories/dollar_bill/image_0037.jpg', '/content/caltech101/101_ObjectCategories/dollar_bill/image_0039.jpg', '/content/caltech101/101_ObjectCategories/dollar_bill/image_0020.jpg', '/content/caltech101/101_ObjectCategories/dollar_bill/image_0051.jpg']


In [4]:
def get_image(path, transform=None):
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  if transform is not None:
    img = transform(img)

  return img


transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
# prompt: read the label and apply one-hot encoder

from sklearn.preprocessing import LabelBinarizer

# Assuming you have a list of labels called 'labels'
labels = [path.split(os.path.sep)[-2] for path in image_paths_filtered]

# Initialize the label encoder
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

# Initialize the one-hot encoder
lb = LabelBinarizer()
labels_one_hot = lb.fit_transform(labels_encoded)

# Print some examples
print("Original labels:", labels[:5])
print("Encoded labels:", labels_encoded[:5])
print("One-hot encoded labels:\n", labels_one_hot[:5])

Original labels: ['dollar_bill', 'dollar_bill', 'dollar_bill', 'dollar_bill', 'dollar_bill']
Encoded labels: [32 32 32 32 32]
One-hot encoded labels:
 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0

In [6]:
train_ratio = 0.6
validation_ratio = 0.2
test_ratio = 0.2

x_train, x_test, y_train, y_test = train_test_split(image_paths_filtered, labels_one_hot, test_size=1 - train_ratio)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio))

print(len(x_train))
print(len(x_val))
print(len(x_test))

5206
1735
1736


In [7]:
torch.tensor(y_train[0])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0])

In [8]:
class CustomDataset(Dataset):
    def __init__(self, x, y):
        if len(x) != len(y):
            print('Cannot create dataset: x and y have different length')
            return
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x = get_image(self.x[idx], transform)
        y = torch.tensor(self.y[idx])
        return x, y

In [9]:
train_dataset = CustomDataset(x_train, y_train)
val_dataset = CustomDataset(x_val, y_val)
test_dataset = CustomDataset(x_test, y_test)

In [10]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1) # (32, 112, 112)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()

        self.dwconv1 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32) # (32, 112, 112)
        self.bn2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()

        self.conv2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0) # (64, 112, 112)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()

        self.dwconv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, groups=64) # (64, 56, 56)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.ReLU()

        self.conv3 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0) # (128, 56, 56)
        self.bn5 = nn.BatchNorm2d(128)
        self.relu5 = nn.ReLU()

        self.dwconv3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128) # (128, 56, 56)
        self.bn6 = nn.BatchNorm2d(128)
        self.relu6 = nn.ReLU()

        self.conv4 = nn.Conv2d(128, 128, kernel_size=1, stride=1, padding=0) # (128, 56, 56)
        self.bn7 = nn.BatchNorm2d(128)
        self.relu7 = nn.ReLU()

        self.dwconv4 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1, groups=128) # (128, 28, 28)
        self.bn8 = nn.BatchNorm2d(128)
        self.relu8 = nn.ReLU()

        self.conv5 = nn.Conv2d(128, 256, kernel_size=1, stride=1, padding=0) # (256, 28, 28)
        self.bn9 = nn.BatchNorm2d(256)
        self.relu9 = nn.ReLU()

        self.dwconv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, groups=256) # (256, 28, 28)
        self.bn10 = nn.BatchNorm2d(256)
        self.relu10 = nn.ReLU()

        self.conv6 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) # (256, 28, 28)
        self.bn11 = nn.BatchNorm2d(256)
        self.relu11 = nn.ReLU()

        self.dwconv6 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1, groups=256) # (256, 14, 14)
        self.bn12 = nn.BatchNorm2d(256)
        self.relu12 = nn.ReLU()

        self.conv7 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0) # (512, 14, 14)
        self.bn13 = nn.BatchNorm2d(512)
        self.relu13 = nn.ReLU()

        # Repeat blocks for depthwise + pointwise convolutions 5 times
        self.dwconv_repeats = nn.Sequential(*[
            nn.Sequential(
                nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, groups=512),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0),
                nn.BatchNorm2d(512),
                nn.ReLU()
            )
            for _ in range(5)
        ])

        self.dwconv12 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1, groups=512) # (512, 7, 7)
        self.bn24 = nn.BatchNorm2d(512)
        self.relu24 = nn.ReLU()

        self.conv13 = nn.Conv2d(512, 1024, kernel_size=1, stride=1, padding=0) # (1024, 7, 7)
        self.bn25 = nn.BatchNorm2d(1024)
        self.relu25 = nn.ReLU()

        self.dwconv13 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) # (1024, 7, 7)
        self.bn26 = nn.BatchNorm2d(1024)
        self.relu26 = nn.ReLU()

        self.conv14 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) # (1024, 7, 7)
        self.bn27 = nn.BatchNorm2d(1024)
        self.relu27 = nn.ReLU()

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # (1024, 1, 1)
        self.fc = nn.Linear(1024, num_classes) # (num_classes)

    def forward(self, x):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.dwconv1(x)))
        x = self.relu3(self.bn3(self.conv2(x)))
        x = self.relu4(self.bn4(self.dwconv2(x)))
        x = self.relu5(self.bn5(self.conv3(x)))
        x = self.relu6(self.bn6(self.dwconv3(x)))
        x = self.relu7(self.bn7(self.conv4(x)))
        x = self.relu8(self.bn8(self.dwconv4(x)))
        x = self.relu9(self.bn9(self.conv5(x)))
        x = self.relu10(self.bn10(self.dwconv5(x)))
        x = self.relu11(self.bn11(self.conv6(x)))
        x = self.relu12(self.bn12(self.dwconv6(x)))
        x = self.relu13(self.bn13(self.conv7(x)))

        x = self.dwconv_repeats(x)

        x = self.relu24(self.bn24(self.dwconv12(x)))
        x = self.relu25(self.bn25(self.conv13(x)))
        x = self.relu26(self.bn26(self.dwconv13(x)))
        x = self.relu27(self.bn27(self.conv14(x)))

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [12]:
from tqdm import tqdm

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Yêu cầu 1

In [14]:
num_classes = len(lb.classes_)
model = CNN(num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print('Epoch '+str(epoch+1)+'/'+str(num_epochs))
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")


model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Train Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(val_loader, total=len(val_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Validation Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(test_loader, total=len(test_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Test Accuracy: {100 * correct / total}%")

Epoch 1/10


100%|██████████| 82/82 [00:34<00:00,  2.37it/s]


Epoch 1/10, Loss: 4.095532123635455
Epoch 2/10


100%|██████████| 82/82 [00:32<00:00,  2.51it/s]


Epoch 2/10, Loss: 3.7311997064730016
Epoch 3/10


100%|██████████| 82/82 [00:31<00:00,  2.63it/s]


Epoch 3/10, Loss: 3.4168789968257998
Epoch 4/10


100%|██████████| 82/82 [00:32<00:00,  2.49it/s]


Epoch 4/10, Loss: 3.1695559751696702
Epoch 5/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 5/10, Loss: 2.991819152017919
Epoch 6/10


100%|██████████| 82/82 [00:31<00:00,  2.60it/s]


Epoch 6/10, Loss: 2.8198303274992034
Epoch 7/10


100%|██████████| 82/82 [00:32<00:00,  2.54it/s]


Epoch 7/10, Loss: 2.681969380960232
Epoch 8/10


100%|██████████| 82/82 [00:33<00:00,  2.48it/s]


Epoch 8/10, Loss: 2.543353333705809
Epoch 9/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 9/10, Loss: 2.402984610417994
Epoch 10/10


100%|██████████| 82/82 [00:31<00:00,  2.58it/s]


Epoch 10/10, Loss: 2.273132894097305


100%|██████████| 82/82 [00:23<00:00,  3.55it/s]


Train Accuracy: 51.383019592777565%


100%|██████████| 28/28 [00:07<00:00,  3.96it/s]


Validation Accuracy: 39.711815561959654%


100%|██████████| 28/28 [00:07<00:00,  3.56it/s]

Test Accuracy: 40.89861751152074%





In [15]:
num_classes = len(lb.classes_)
model = CNN(num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001)

num_epochs = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print('Epoch '+str(epoch+1)+'/'+str(num_epochs))
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")


model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Train Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(val_loader, total=len(val_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Validation Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(test_loader, total=len(test_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Test Accuracy: {100 * correct / total}%")

Epoch 1/10


100%|██████████| 82/82 [00:33<00:00,  2.46it/s]


Epoch 1/10, Loss: 4.660675915276132
Epoch 2/10


100%|██████████| 82/82 [00:31<00:00,  2.60it/s]


Epoch 2/10, Loss: 4.638066466261701
Epoch 3/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 3/10, Loss: 4.614502278769889
Epoch 4/10


100%|██████████| 82/82 [00:32<00:00,  2.50it/s]


Epoch 4/10, Loss: 4.592762749369552
Epoch 5/10


100%|██████████| 82/82 [00:33<00:00,  2.45it/s]


Epoch 5/10, Loss: 4.569505394958869
Epoch 6/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 6/10, Loss: 4.549060879684076
Epoch 7/10


100%|██████████| 82/82 [00:32<00:00,  2.54it/s]


Epoch 7/10, Loss: 4.528073857470257
Epoch 8/10


100%|██████████| 82/82 [00:33<00:00,  2.42it/s]


Epoch 8/10, Loss: 4.510868049249416
Epoch 9/10


100%|██████████| 82/82 [00:32<00:00,  2.56it/s]


Epoch 9/10, Loss: 4.491658065377212
Epoch 10/10


100%|██████████| 82/82 [00:31<00:00,  2.59it/s]


Epoch 10/10, Loss: 4.472105997364696


100%|██████████| 82/82 [00:21<00:00,  3.78it/s]


Train Accuracy: 9.335382251248559%


100%|██████████| 28/28 [00:07<00:00,  3.73it/s]


Validation Accuracy: 9.337175792507205%


100%|██████████| 28/28 [00:06<00:00,  4.09it/s]

Test Accuracy: 9.735023041474655%





In [16]:
import torch
import torch.nn as nn

class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=None, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss

        if self.alpha is not None:
            if isinstance(self.alpha, float):
                focal_loss *= self.alpha
            elif isinstance(self.alpha, torch.Tensor):
                alpha_t = self.alpha[targets]
                focal_loss = alpha_t * focal_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss


In [17]:
num_classes = len(lb.classes_)
model = CNN(num_classes)
criterion = FocalLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print('Epoch '+str(epoch+1)+'/'+str(num_epochs))
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")


model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Train Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(val_loader, total=len(val_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Validation Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(test_loader, total=len(test_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Test Accuracy: {100 * correct / total}%")

Epoch 1/10


100%|██████████| 82/82 [00:34<00:00,  2.39it/s]


Epoch 1/10, Loss: 3.9536373353585965
Epoch 2/10


100%|██████████| 82/82 [00:34<00:00,  2.40it/s]


Epoch 2/10, Loss: 3.5060522294626004
Epoch 3/10


100%|██████████| 82/82 [00:33<00:00,  2.48it/s]


Epoch 3/10, Loss: 3.2131332944079145
Epoch 4/10


100%|██████████| 82/82 [00:34<00:00,  2.40it/s]


Epoch 4/10, Loss: 2.956688485494474
Epoch 5/10


100%|██████████| 82/82 [00:34<00:00,  2.39it/s]


Epoch 5/10, Loss: 2.7610703445062406
Epoch 6/10


100%|██████████| 82/82 [00:33<00:00,  2.45it/s]


Epoch 6/10, Loss: 2.588824417532944
Epoch 7/10


100%|██████████| 82/82 [00:34<00:00,  2.41it/s]


Epoch 7/10, Loss: 2.4238397784349397
Epoch 8/10


100%|██████████| 82/82 [00:34<00:00,  2.40it/s]


Epoch 8/10, Loss: 2.2770482839607613
Epoch 9/10


100%|██████████| 82/82 [00:33<00:00,  2.45it/s]


Epoch 9/10, Loss: 2.129527608068978
Epoch 10/10


100%|██████████| 82/82 [00:34<00:00,  2.41it/s]


Epoch 10/10, Loss: 1.972097372136465


100%|██████████| 82/82 [00:21<00:00,  3.81it/s]


Train Accuracy: 51.575105647330005%


100%|██████████| 28/28 [00:06<00:00,  4.12it/s]


Validation Accuracy: 38.674351585014406%


100%|██████████| 28/28 [00:07<00:00,  3.71it/s]

Test Accuracy: 39.80414746543779%





In [18]:
num_classes = len(lb.classes_)
model = CNN(num_classes)
criterion = FocalLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001)

num_epochs = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print('Epoch '+str(epoch+1)+'/'+str(num_epochs))
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")



model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Train Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(val_loader, total=len(val_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Validation Accuracy: {100 * correct / total}%")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(test_loader, total=len(test_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Test Accuracy: {100 * correct / total}%")

Epoch 1/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 1/10, Loss: 4.511571802744052
Epoch 2/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 2/10, Loss: 4.487471568875197
Epoch 3/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 3/10, Loss: 4.46093296423191
Epoch 4/10


100%|██████████| 82/82 [00:31<00:00,  2.58it/s]


Epoch 4/10, Loss: 4.438488884669979
Epoch 5/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 5/10, Loss: 4.414338204918838
Epoch 6/10


100%|██████████| 82/82 [00:31<00:00,  2.58it/s]


Epoch 6/10, Loss: 4.391597410527671
Epoch 7/10


100%|██████████| 82/82 [00:31<00:00,  2.62it/s]


Epoch 7/10, Loss: 4.368652931073817
Epoch 8/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 8/10, Loss: 4.347016863706635
Epoch 9/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 9/10, Loss: 4.326468421191704
Epoch 10/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 10/10, Loss: 4.306427618352378


100%|██████████| 82/82 [00:20<00:00,  3.93it/s]


Train Accuracy: 13.887821744141375%


100%|██████████| 28/28 [00:07<00:00,  3.78it/s]


Validation Accuracy: 12.507204610951009%


100%|██████████| 28/28 [00:06<00:00,  4.00it/s]

Test Accuracy: 13.248847926267281%





## Yêu cầu 2

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim

class CNN2(nn.Module):
    def __init__(self, num_classes):
        super(CNN2, self).__init__()
        # self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1) # (32, 112, 112)
        # self.bn1 = nn.BatchNorm2d(32)
        # self.relu1 = nn.ReLU()

        # self.dwconv1 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32) # (32, 112, 112)
        # self.bn2 = nn.BatchNorm2d(32)
        # self.relu2 = nn.ReLU()

        # self.conv2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0) # (64, 112, 112)
        # self.bn3 = nn.BatchNorm2d(64)
        # self.relu3 = nn.ReLU()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1) # (64, 112, 112)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()

        self.dwconv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, groups=64) # (64, 56, 56)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.ReLU()

        self.conv3 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0) # (128, 56, 56)
        self.bn5 = nn.BatchNorm2d(128)
        self.relu5 = nn.ReLU()

        self.dwconv3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128) # (128, 56, 56)
        self.bn6 = nn.BatchNorm2d(128)
        self.relu6 = nn.ReLU()

        self.conv4 = nn.Conv2d(128, 128, kernel_size=1, stride=1, padding=0) # (128, 56, 56)
        self.bn7 = nn.BatchNorm2d(128)
        self.relu7 = nn.ReLU()

        self.dwconv4 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1, groups=128) # (128, 28, 28)
        self.bn8 = nn.BatchNorm2d(128)
        self.relu8 = nn.ReLU()

        self.conv5 = nn.Conv2d(128, 256, kernel_size=1, stride=1, padding=0) # (256, 28, 28)
        self.bn9 = nn.BatchNorm2d(256)
        self.relu9 = nn.ReLU()

        self.dwconv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, groups=256) # (256, 28, 28)
        self.bn10 = nn.BatchNorm2d(256)
        self.relu10 = nn.ReLU()

        self.conv6 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) # (256, 28, 28)
        self.bn11 = nn.BatchNorm2d(256)
        self.relu11 = nn.ReLU()

        self.dwconv6 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1, groups=256) # (256, 14, 14)
        self.bn12 = nn.BatchNorm2d(256)
        self.relu12 = nn.ReLU()

        self.conv7 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0) # (512, 14, 14)
        self.bn13 = nn.BatchNorm2d(512)
        self.relu13 = nn.ReLU()

        # Repeat blocks for depthwise + pointwise convolutions 5 times
        self.dwconv_repeats = nn.Sequential(*[
            nn.Sequential(
                nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, groups=512),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0),
                nn.BatchNorm2d(512),
                nn.ReLU()
            )
            for _ in range(5)
        ])

        self.dwconv12 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1, groups=512) # (512, 7, 7)
        self.bn24 = nn.BatchNorm2d(512)
        self.relu24 = nn.ReLU()

        self.conv13 = nn.Conv2d(512, 1024, kernel_size=1, stride=1, padding=0) # (1024, 7, 7)
        self.bn25 = nn.BatchNorm2d(1024)
        self.relu25 = nn.ReLU()

        self.dwconv13 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1, groups=1024) # (1024, 7, 7)
        self.bn26 = nn.BatchNorm2d(1024)
        self.relu26 = nn.ReLU()

        self.conv14 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) # (1024, 7, 7)
        self.bn27 = nn.BatchNorm2d(1024)
        self.relu27 = nn.ReLU()

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # (1024, 1, 1)
        self.fc = nn.Linear(1024, num_classes) # (num_classes)

    def forward(self, x):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu4(self.bn4(self.dwconv2(x)))
        x = self.relu5(self.bn5(self.conv3(x)))
        x = self.relu6(self.bn6(self.dwconv3(x)))
        x = self.relu7(self.bn7(self.conv4(x)))
        x = self.relu8(self.bn8(self.dwconv4(x)))
        x = self.relu9(self.bn9(self.conv5(x)))
        x = self.relu10(self.bn10(self.dwconv5(x)))
        x = self.relu11(self.bn11(self.conv6(x)))
        x = self.relu12(self.bn12(self.dwconv6(x)))
        x = self.relu13(self.bn13(self.conv7(x)))

        x = self.dwconv_repeats(x)

        x = self.relu24(self.bn24(self.dwconv12(x)))
        x = self.relu25(self.bn25(self.conv13(x)))
        x = self.relu26(self.bn26(self.dwconv13(x)))
        x = self.relu27(self.bn27(self.conv14(x)))

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [20]:
from collections import Counter

labels_indices = [np.argmax(label) for label in labels_one_hot]
label_counts = Counter(labels_indices)
class_weights = [1/label_counts[i] for i in range(len(lb.classes_))]

In [21]:
import torch
import torch.nn as nn

class WeightedCrossEntropyLoss(nn.Module):
    def __init__(self, class_weights):
        super(WeightedCrossEntropyLoss, self).__init__()
        self.class_weights = torch.tensor(class_weights).float().to(device)

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        weighted_ce_loss = ce_loss * self.class_weights[targets]
        return weighted_ce_loss.mean()

In [22]:
num_classes = len(lb.classes_)
model = CNN2(num_classes).to(device)
criterion = WeightedCrossEntropyLoss(class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 10
for epoch in range(num_epochs):
    print('Epoch '+str(epoch+1)+'/'+str(num_epochs))
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, total=len(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(test_loader, total=len(test_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels_index = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels_index).sum().item()

print(f"Test Accuracy: {100 * correct / total}%")

Epoch 1/10


100%|██████████| 82/82 [00:32<00:00,  2.54it/s]


Epoch 1/10, Loss: 0.052841013206577886
Epoch 2/10


100%|██████████| 82/82 [00:31<00:00,  2.58it/s]


Epoch 2/10, Loss: 0.05121300991897176
Epoch 3/10


100%|██████████| 82/82 [00:31<00:00,  2.60it/s]


Epoch 3/10, Loss: 0.049841985197328936
Epoch 4/10


100%|██████████| 82/82 [00:31<00:00,  2.58it/s]


Epoch 4/10, Loss: 0.04859779525275638
Epoch 5/10


100%|██████████| 82/82 [00:31<00:00,  2.59it/s]


Epoch 5/10, Loss: 0.04699346509466811
Epoch 6/10


100%|██████████| 82/82 [00:31<00:00,  2.58it/s]


Epoch 6/10, Loss: 0.04512571252700759
Epoch 7/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 7/10, Loss: 0.043196042363600036
Epoch 8/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 8/10, Loss: 0.04079662908504649
Epoch 9/10


100%|██████████| 82/82 [00:31<00:00,  2.61it/s]


Epoch 9/10, Loss: 0.03849890675940892
Epoch 10/10


100%|██████████| 82/82 [00:31<00:00,  2.57it/s]


Epoch 10/10, Loss: 0.036045529061882964


100%|██████████| 28/28 [00:06<00:00,  4.25it/s]

Test Accuracy: 29.55069124423963%



