# Model RESNET18

In [1]:
import torch.nn as nn

class ResNet18(nn.Module):
    def __init__(self, num_classes):
        super(ResNet18, self).__init__()

        self.dropout_percentage = 0.5
        self.relu = nn.ReLU()

        # BLOCK-1 (starting block) input=(224x224) output=(112x112)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
        self.batchnorm1 = nn.BatchNorm2d(64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))

        # BLOCK-2 (1) input=(112x112) output = (56x56)
        self.conv2_1_1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm2_1_1 = nn.BatchNorm2d(64)
        self.conv2_1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm2_1_2 = nn.BatchNorm2d(64)
        self.dropout2_1 = nn.Dropout(p=self.dropout_percentage)

        # BLOCK-2 (2)
        self.conv2_2_1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm2_2_1 = nn.BatchNorm2d(64)
        self.conv2_2_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm2_2_2 = nn.BatchNorm2d(64)
        self.dropout2_2 = nn.Dropout(p=self.dropout_percentage)

        # BLOCK-3 (1) input=(56x56) output = (28x28)
        self.conv3_1_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.batchnorm3_1_1 = nn.BatchNorm2d(128)
        self.conv3_1_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm3_1_2 = nn.BatchNorm2d(128)
        self.concat_adjust_3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(1, 1), stride=(2, 2),
                                         padding=(0, 0))
        self.dropout3_1 = nn.Dropout(p=self.dropout_percentage)
        # BLOCK-3 (2)
        self.conv3_2_1 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm3_2_1 = nn.BatchNorm2d(128)
        self.conv3_2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm3_2_2 = nn.BatchNorm2d(128)
        self.dropout3_2 = nn.Dropout(p=self.dropout_percentage)

        # BLOCK-4 (1) input=(28x28) output = (14x14)
        self.conv4_1_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.batchnorm4_1_1 = nn.BatchNorm2d(256)
        self.conv4_1_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm4_1_2 = nn.BatchNorm2d(256)
        self.concat_adjust_4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(2, 2),
                                         padding=(0, 0))
        self.dropout4_1 = nn.Dropout(p=self.dropout_percentage)
        # BLOCK-4 (2)
        self.conv4_2_1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm4_2_1 = nn.BatchNorm2d(256)
        self.conv4_2_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm4_2_2 = nn.BatchNorm2d(256)
        self.dropout4_2 = nn.Dropout(p=self.dropout_percentage)

        # BLOCK-5 (1) input=(14x14) output = (7x7)
        self.conv5_1_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.batchnorm5_1_1 = nn.BatchNorm2d(512)
        self.conv5_1_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm5_1_2 = nn.BatchNorm2d(512)
        self.concat_adjust_5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 1), stride=(2, 2),
                                         padding=(0, 0))
        self.dropout5_1 = nn.Dropout(p=self.dropout_percentage)
        # BLOCK-5 (2)
        self.conv5_2_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm5_2_1 = nn.BatchNorm2d(512)
        self.conv5_2_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batchnorm5_2_2 = nn.BatchNorm2d(512)
        self.dropout5_2 = nn.Dropout(p=self.dropout_percentage)

        # Final Block input=(7x7)
        self.avgpool = nn.AvgPool2d(kernel_size=(7, 7), stride=(1, 1))
        self.fc = nn.Linear(in_features=1 * 1 * 512, out_features=1000)
        self.out = nn.Linear(in_features=1000, out_features=num_classes)
        # END

    def forward(self, x):
        # block 1 --> Starting block
        x = self.relu(self.batchnorm1(self.conv1(x)))
        op1 = self.maxpool1(x)

        # block2 - 1
        x = self.relu(self.batchnorm2_1_1(self.conv2_1_1(op1)))  # conv2_1
        x = self.batchnorm2_1_2(self.conv2_1_2(x))  # conv2_1
        x = self.dropout2_1(x)
        # block2 - Adjust - No adjust in this layer as dimensions are already same
        # block2 - Concatenate 1
        op2_1 = self.relu(x + op1)
        # block2 - 2
        x = self.relu(self.batchnorm2_2_1(self.conv2_2_1(op2_1)))  # conv2_2
        x = self.batchnorm2_2_2(self.conv2_2_2(x))  # conv2_2
        x = self.dropout2_2(x)
        # op - block2
        op2 = self.relu(x + op2_1)

        # block3 - 1[Convolution block]
        x = self.relu(self.batchnorm3_1_1(self.conv3_1_1(op2)))  # conv3_1
        x = self.batchnorm3_1_2(self.conv3_1_2(x))  # conv3_1
        x = self.dropout3_1(x)
        # block3 - Adjust
        op2 = self.concat_adjust_3(op2)  # SKIP CONNECTION
        # block3 - Concatenate 1
        op3_1 = self.relu(x + op2)
        # block3 - 2[Identity Block]
        x = self.relu(self.batchnorm3_2_1(self.conv3_2_1(op3_1)))  # conv3_2
        x = self.batchnorm3_2_2(self.conv3_2_2(x))  # conv3_2
        x = self.dropout3_2(x)
        # op - block3
        op3 = self.relu(x + op3_1)

        # block4 - 1[Convolition block]
        x = self.relu(self.batchnorm4_1_1(self.conv4_1_1(op3)))  # conv4_1
        x = self.batchnorm4_1_2(self.conv4_1_2(x))  # conv4_1
        x = self.dropout4_1(x)
        # block4 - Adjust
        op3 = self.concat_adjust_4(op3)  # SKIP CONNECTION
        # block4 - Concatenate 1
        op4_1 = self.relu(x + op3)
        # block4 - 2[Identity Block]
        x = self.relu(self.batchnorm4_2_1(self.conv4_2_1(op4_1)))  # conv4_2
        x = self.batchnorm4_2_2(self.conv4_2_2(x))  # conv4_2
        x = self.dropout4_2(x)
        # op - block4
        op4 = self.relu(x + op4_1)

        # block5 - 1[Convolution Block]
        x = self.relu(self.batchnorm5_1_1(self.conv5_1_1(op4)))  # conv5_1
        x = self.batchnorm5_1_2(self.conv5_1_2(x))  # conv5_1
        x = self.dropout5_1(x)
        # block5 - Adjust
        op4 = self.concat_adjust_5(op4)  # SKIP CONNECTION
        # block5 - Concatenate 1
        op5_1 = self.relu(x + op4)
        # block5 - 2[Identity Block]
        x = self.relu(self.batchnorm5_2_1(self.conv5_2_1(op5_1)))  # conv5_2
        x = self.batchnorm5_2_1(self.conv5_2_1(x))  # conv5_2
        x = self.dropout5_2(x)
        # op - block5
        op5 = self.relu(x + op5_1)

        # FINAL BLOCK - classifier
        x = self.avgpool(op5)
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.fc(x))
        x = self.out(x)

        return x

# Resnet18 trainning

In [2]:
# """
# Thanh Le  16 April 2024
# How to train/fine-tune a pre-trained model on a custom dataset (i.e., transfer learning)
# """
# import torch
# from torch import nn, save, load
# from tqdm import tqdm
# from torch.optim import Adam
# from torch.utils.data import DataLoader
# from torchvision.datasets import ImageFolder
# import torchvision.transforms as transforms
# from torchmetrics.functional import accuracy
# from torchvision.transforms import ToTensor, Resize
# import numpy as np
# import os
# import matplotlib.pyplot as plt

# # Setup CUDA
# def setup_cuda():
#     # Setting seeds for reproducibility
#     seed = 50
#     torch.backends.cudnn.enabled = True
#     torch.backends.cudnn.benchmark = True
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     if torch.cuda.is_available():
#         torch.cuda.manual_seed(seed)

#     return torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# def train_model():
#     """
#     Train the model over a single epoch
#     :return: training loss and training accuracy
#     """
#     train_loss = 0.0
#     train_acc = 0.0
#     model.train()

#     for (img, label) in tqdm(train_loader, ncols=80, desc='Training'):
#         # Get a batch
#         img, label = img.to(device, dtype=torch.float), label.to(device, dtype=torch.long)

#         # Set the gradients to zero before starting backpropagation
#         optimizer.zero_grad()

#         # Perform a feed-forward pass
#         logits = model(img)

#         # Compute the batch loss
#         loss = loss_fn(logits, label)

#         # Compute gradient of the loss fn w.r.t the trainable weights
#         loss.backward()

#         # Update the trainable weights
#         optimizer.step()

#         # Accumulate the batch loss
#         train_loss += loss.item()

#         # Get the predictions to calculate the accuracy for every iteration. Remember to accumulate the accuracy
#         prediction = logits.argmax(axis=1)
#         train_acc += accuracy(prediction, label, task='multiclass', average='macro', num_classes=len(class_names)).item()

#     return train_loss / len(train_loader), train_acc / len(train_loader)


# def validate_model():
#     """
#     Validate the model over a single epoch
#     :return: validation loss and validation accuracy
#     """
#     model.eval()
#     valid_loss = 0.0
#     val_acc = 0.0

#     with torch.no_grad():
#         for (img, label) in tqdm(val_loader, ncols=80, desc='Valid'):
#             # Get a batch
#             img, label = img.to(device, dtype=torch.float), label.to(device, dtype=torch.long)

#             # Perform a feed-forward pass
#             logits = model(img)

#             # Compute the batch loss
#             loss = loss_fn(logits, label)

#             # Accumulate the batch loss
#             valid_loss += loss.item()

#             # Get the predictions to calculate the accuracy for every iteration. Remember to accumulate the accuracy
#             prediction = logits.argmax(axis=1)
#             val_acc += accuracy(prediction, label, task='multiclass', average='macro', num_classes=len(class_names)).item()

#     return valid_loss / len(val_loader), val_acc / len(val_loader)


# # Example plotting function

# def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies):
#     epochs = range(1, len(train_losses) + 1)
#     # Losses
#     plt.figure(figsize=(15, 7))
#     plt.subplot(2, 1, 1)
#     plt.plot(epochs, train_losses, label='Training Loss', color='blue')
#     plt.plot(epochs, val_losses, label='Validation Loss', color='red')
#     plt.xlabel('Epochs')
#     plt.ylabel('Loss')
#     plt.title('Training and Validation Loss')
#     plt.legend()
#     plt.yscale('log')  # Log scale can help for loss curves with large values

#     # Accuracies
#     plt.subplot(2, 1, 2)
#     plt.plot(epochs, train_accuracies, label='Training Accuracy', color='green')
#     plt.plot(epochs, val_accuracies, label='Validation Accuracy', color='orange')
#     plt.xlabel('Epochs')
#     plt.ylabel('Accuracy')
#     plt.title('Training and Validation Accuracy')
#     plt.legend()

#     plt.tight_layout()
#     # Save the figure to a file
#     plt.savefig("trainplot.png")  # You can change the file name and format (e.g., .png, .jpg, .pdf)

#     plt.show()


# if __name__ == "__main__":
#     device = setup_cuda()

#     # 1. Load the dataset
#     transform = transforms.Compose([Resize((224, 224)), ToTensor()])
#     train_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/train', transform=transform)
#     val_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/val', transform=transform)
#     # Get class names
#     class_names = train_dataset.classes

#     # 2. Create data loaders
#     train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
#     val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

#     # 3. Create a new deep model without pre-trained weights
#     # from utils.seresnet18 import ResNet18
#     model = ResNet18(
#         num_classes=len(class_names),
#     ).to(device)

#     # 4. Specify loss function and optimizer
#     optimizer = Adam(model.parameters(), lr=1e-4)
#     loss_fn = torch.nn.CrossEntropyLoss()

#     # 5. Train the model with 100 epochs
#     # store the metrics for plotting
#     train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []

#     max_acc = 0
#     for epoch in range(100):

#         # 5.1. Train the model over a single epoch
#         train_loss, train_acc = train_model()
#         train_losses.append(train_loss)     # save train loss values
#         train_accuracies.append(train_acc)  # save train acc values

#         # 5.2. Validate the model after training
#         val_loss, val_acc = validate_model()
#         val_losses.append(val_loss)         # save val loss values
#         val_accuracies.append(val_acc)      # save val acc values

#         print(f'Epoch {epoch}: Train loss = {train_loss}, Train accuracy: {train_acc}')
#         print(f'Epoch {epoch}: Validation loss = {val_loss}, Validation accuracy: {val_acc}')

#         # 4.3. Save the model if the validation accuracy is increasing
#         if val_acc > max_acc:
#             print(f'Validation accuracy increased ({max_acc} --> {val_acc}). Model saved')
#             folder_path = 'checkpoints_resnet18'  # Define the folder name
#             if not os.path.exists(folder_path):
#                 os.makedirs(folder_path)  # Create the folder if it does not exist
#             file_path = os.path.join(folder_path,
#                                      'resnet18_epoch_' + str(epoch) + '_acc_{0:.4f}'.format(val_acc) + '.pt')
#             with open(file_path, 'wb') as f:
#                 save(model.state_dict(), f)
#             max_acc = val_acc

# # After training is complete, plot the metrics
# plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies)

# Testing for Resnet18

In [3]:
import torch
from torch import nn, save, load
from tqdm import tqdm
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torchmetrics.functional import accuracy
from torchvision.transforms import ToTensor, Resize
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

# Thiết lập biến cần thiết
train_dir = '/kaggle/input/dataset-split/dataset_split/train'
test_dir = '/kaggle/input/dataset-split/dataset_split/test'
valid_dir = '/kaggle/input/dataset-split/dataset_split/val'
NUM_WORKERS = os.cpu_count()
BATCH_SIZE = 32
IMG_SIZE = 224
manual_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])
patch_size = 16
CLASS = {} # KHONG CAN


# Thiết lập thiết bị (GPU hoặc CPU)
def setup_cuda():
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    return device


device = setup_cuda()

# Thiết lập các transform để xử lý ảnh
manual_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])


# Hàm dự đoán kết quả cho một hình ảnh
def predict_image(image_path, model, transform, class_names, device):
    model.eval()
    img = Image.open(image_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img_tensor)
        _, predicted_class = torch.max(output, 1)
    predicted_label = class_names[predicted_class.item()]
    return img, predicted_label


# Hàm chính để dự đoán các hình ảnh trong tập test
def test_model():
    # 1. Tải dữ liệu và lớp từ tập train
    transform = transforms.Compose([Resize((224, 224)), ToTensor()])
    train_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/train', transform=transform)
    test_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/test', transform=transform)
    # Get class names
    class_names = train_dataset.classes

    # 2. Tạo mô hình ViT và tải trạng thái từ checkpoint
    # from utils.resnet18 import ResNet18

    model = ResNet18(
        num_classes=len(class_names),
    ).to(device)

    folder_checkpoint = 'checkpoints_resnet18'  # Define the folder name
    file_name = '/kaggle/input/resnet-train-acc/ResNet/resnet18_epoch_10_acc_0.7310.pt' #best weight
    file_checkpoint = os.path.join(folder_checkpoint, file_name)  # lay best weight
    model.load_state_dict(torch.load(file_checkpoint, device))
    print('Model loaded from checkpoint.')
    # Ensure the output directory exists
    output_dir = "output_resnet18"
    os.makedirs(output_dir, exist_ok=True)

    # 3. Dự đoán kết quả cho mỗi hình ảnh trong tập test
    for image_path in tqdm(test_dataset.imgs, desc='Testing'):
        img, predicted_label = predict_image(image_path[0], model, manual_transforms, class_names, device)
        # plt.imshow(img)
        # plt.title(f'Predicted: {predicted_label}')
        # plt.show()

        # Convert the tensor image back to a PIL image if necessary
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)

        # Create a plot
        fig, ax = plt.subplots()

        # Set white background
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Remove axis
        ax.axis('off')

        # Display the image
        ax.imshow(img)

        # Add the predicted label as the title
        ax.set_title(f'Predicted: {predicted_label}', fontsize=12, pad=10)

        # Save the figure
        image_basename = os.path.basename(image_path[0])
        image_name, image_ext = os.path.splitext(image_basename)
        output_image_path = os.path.join(output_dir, f"{image_name}_pred_{predicted_label}.png")

        plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0.1)
        plt.close(fig)


if __name__ == '__main__':
    test_model()


  model.load_state_dict(torch.load(file_checkpoint, device))


Model loaded from checkpoint.


Testing: 100%|██████████| 8463/8463 [32:11<00:00,  4.38it/s]


# SEBLOCK

In [4]:
# import torch.nn as nn

# class SE_Block(nn.Module):
#     def __init__(self, c, r=16):
#         super(SE_Block, self).__init__()
#         self.squeeze = nn.AdaptiveAvgPool2d(1)
#         self.excitation = nn.Sequential(
#             nn.Linear(c, c // r, bias=False),
#             nn.ReLU(inplace=True),
#             nn.Linear(c // r, c, bias=False),
#             nn.Sigmoid()
#         )

#     def forward(self, x):
#         bs, c, _, _ = x.size()
#         y = self.squeeze(x).view(bs, c)
#         y = self.excitation(y).view(bs, c, 1, 1)
#         return x * y.expand_as(x)

# SEBLOCK + Resnet18

In [5]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F


# class SEBlock(nn.Module):
#     def __init__(self, input_channels, reduction_ratio=16):
#         super(SEBlock, self).__init__()
#         self.avg_pool = nn.AdaptiveAvgPool2d(1)
#         self.fc1 = nn.Linear(input_channels, input_channels // reduction_ratio, bias=False)
#         self.relu = nn.ReLU(inplace=True)
#         self.fc2 = nn.Linear(input_channels // reduction_ratio, input_channels, bias=False)
#         self.sigmoid = nn.Sigmoid()

#     def forward(self, x):
#         batch_size, channels, _, _ = x.size()
#         y = self.avg_pool(x).view(batch_size, channels)
#         y = self.fc1(y)
#         y = self.relu(y)
#         y = self.fc2(y)
#         y = self.sigmoid(y).view(batch_size, channels, 1, 1)
#         return x * y.expand_as(x)


# class SEResNet18(nn.Module):
#     def __init__(self, num_classes):
#         super(SEResNet18, self).__init__()

#         self.dropout_percentage = 0.5
#         self.relu = nn.ReLU()

#         # BLOCK-1 (starting block) input=(224x224) output=(56x56)
#         self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
#         self.batchnorm1 = nn.BatchNorm2d(64)
#         self.maxpool1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))

#         # BLOCK-2 (1) input=(56x56) output = (56x56)
#         self.conv2_1_1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm2_1_1 = nn.BatchNorm2d(64)
#         self.conv2_1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm2_1_2 = nn.BatchNorm2d(64)
#         self.dropout2_1 = nn.Dropout(p=self.dropout_percentage)
#         self.se2_1 = SEBlock(64)

#         # BLOCK-2 (2)
#         self.conv2_2_1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm2_2_1 = nn.BatchNorm2d(64)
#         self.conv2_2_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm2_2_2 = nn.BatchNorm2d(64)
#         self.dropout2_2 = nn.Dropout(p=self.dropout_percentage)
#         self.se2_2 = SEBlock(64)

#         # BLOCK-3 (1) input=(56x56) output = (28x28)
#         self.conv3_1_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
#         self.batchnorm3_1_1 = nn.BatchNorm2d(128)
#         self.conv3_1_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm3_1_2 = nn.BatchNorm2d(128)
#         self.concat_adjust_3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(1, 1), stride=(2, 2),
#                                          padding=(0, 0))
#         self.dropout3_1 = nn.Dropout(p=self.dropout_percentage)
#         self.se3_1 = SEBlock(128)

#         # BLOCK-3 (2)
#         self.conv3_2_1 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm3_2_1 = nn.BatchNorm2d(128)
#         self.conv3_2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm3_2_2 = nn.BatchNorm2d(128)
#         self.dropout3_2 = nn.Dropout(p=self.dropout_percentage)
#         self.se3_2 = SEBlock(128)

#         # BLOCK-4 (1) input=(28x28) output = (14x14)
#         self.conv4_1_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
#         self.batchnorm4_1_1 = nn.BatchNorm2d(256)
#         self.conv4_1_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm4_1_2 = nn.BatchNorm2d(256)
#         self.concat_adjust_4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(2, 2),
#                                          padding=(0, 0))
#         self.dropout4_1 = nn.Dropout(p=self.dropout_percentage)
#         self.se4_1 = SEBlock(256)

#         # BLOCK-4 (2)
#         self.conv4_2_1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm4_2_1 = nn.BatchNorm2d(256)
#         self.conv4_2_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm4_2_2 = nn.BatchNorm2d(256)
#         self.dropout4_2 = nn.Dropout(p=self.dropout_percentage)
#         self.se4_2 = SEBlock(256)

#         # BLOCK-5 (1) input=(14x14) output = (7x7)
#         self.conv5_1_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
#         self.batchnorm5_1_1 = nn.BatchNorm2d(512)
#         self.conv5_1_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm5_1_2 = nn.BatchNorm2d(512)
#         self.concat_adjust_5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 1), stride=(2, 2),
#                                          padding=(0, 0))
#         self.dropout5_1 = nn.Dropout(p=self.dropout_percentage)
#         self.se5_1 = SEBlock(512)

#         # BLOCK-5 (2)
#         self.conv5_2_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm5_2_1 = nn.BatchNorm2d(512)
#         self.conv5_2_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#         self.batchnorm5_2_2 = nn.BatchNorm2d(512)
#         self.dropout5_2 = nn.Dropout(p=self.dropout_percentage)
#         self.se5_2 = SEBlock(512)

#         # Final Block input=(7x7)
#         self.avgpool = nn.AvgPool2d(kernel_size=(7, 7), stride=(1, 1))
#         self.fc = nn.Linear(in_features=1 * 1 * 512, out_features=1000)
#         self.out = nn.Linear(in_features=1000, out_features=num_classes)
#         # END

#     def forward(self, x):
#         # block 1 --> Starting block
#         x = self.relu(self.batchnorm1(self.conv1(x)))
#         op1 = self.maxpool1(x)

#         # block2 - 1
#         x = self.relu(self.batchnorm2_1_1(self.conv2_1_1(op1)))  # conv2_1
#         x = self.batchnorm2_1_2(self.conv2_1_2(x))  # conv2_1
#         x = self.dropout2_1(x)
#         x = self.se2_1(x)
#         # block2 - Adjust - No adjust in this layer as dimensions are already same
#         # block2 - Concatenate 1
#         op2_1 = self.relu(x + op1)
#         # block2 - 2
#         x = self.relu(self.batchnorm2_2_1(self.conv2_2_1(op2_1)))  # conv2_2
#         x = self.batchnorm2_2_2(self.conv2_2_2(x))  # conv2_2
#         x = self.dropout2_2(x)
#         x = self.se2_2(x)
#         # op - block2
#         op2 = self.relu(x + op2_1)

#         # block3 - 1[Convolution block]
#         x = self.relu(self.batchnorm3_1_1(self.conv3_1_1(op2)))  # conv3_1
#         x = self.batchnorm3_1_2(self.conv3_1_2(x))  # conv3_1
#         x = self.dropout3_1(x)
#         x = self.se3_1(x)
#         # block3 - Adjust
#         op2 = self.concat_adjust_3(op2)  # SKIP CONNECTION
#         # block3 - Concatenate 1
#         op3_1 = self.relu(x + op2)
#         # block3 - 2[Identity Block]
#         x = self.relu(self.batchnorm3_2_1(self.conv3_2_1(op3_1)))  # conv3_2
#         x = self.batchnorm3_2_2(self.conv3_2_2(x))  # conv3_2
#         x = self.dropout3_2(x)
#         x = self.se3_2(x)
#         # op - block3
#         op3 = self.relu(x + op3_1)

#         # block4 - 1[Convolition block]
#         x = self.relu(self.batchnorm4_1_1(self.conv4_1_1(op3)))  # conv4_1
#         x = self.batchnorm4_1_2(self.conv4_1_2(x))  # conv4_1
#         x = self.dropout4_1(x)
#         x = self.se4_1(x)
#         # block4 - Adjust
#         op3 = self.concat_adjust_4(op3)  # SKIP CONNECTION
#         # block4 - Concatenate 1
#         op4_1 = self.relu(x + op3)
#         # block4 - 2[Identity Block]
#         x = self.relu(self.batchnorm4_2_1(self.conv4_2_1(op4_1)))  # conv4_2
#         x = self.batchnorm4_2_2(self.conv4_2_2(x))  # conv4_2
#         x = self.dropout4_2(x)
#         x = self.se4_2(x)
#         # op - block4
#         op4 = self.relu(x + op4_1)

#         # block5 - 1[Convolution Block]
#         x = self.relu(self.batchnorm5_1_1(self.conv5_1_1(op4)))  # conv5_1
#         x = self.batchnorm5_1_2(self.conv5_1_2(x))  # conv5_1
#         x = self.dropout5_1(x)
#         x = self.se5_1(x)
#         # block5 - Adjust
#         op4 = self.concat_adjust_5(op4)  # SKIP CONNECTION
#         # block5 - Concatenate 1
#         op5_1 = self.relu(x + op4)
#         # block5 - 2[Identity Block]
#         x = self.relu(self.batchnorm5_2_1(self.conv5_2_1(op5_1)))  # conv5_2
#         x = self.batchnorm5_2_2(self.conv5_2_1(x))  # conv5_2
#         x = self.dropout5_2(x)
#         x = self.se5_2(x)
#         # op - block5
#         op5 = self.relu(x + op5_1)

#         # FINAL BLOCK - classifier
#         x = self.avgpool(op5)
#         x = x.reshape(x.shape[0], -1)
#         x = self.relu(self.fc(x))
#         x = self.out(x)

#         return x


# Train SEBLock

In [6]:
# """
# Thanh Le  16 April 2024
# How to train/fine-tune a pre-trained model on a custom dataset (i.e., transfer learning)
# """
# import torch
# from torch import nn, save, load
# from tqdm import tqdm
# from torch.optim import Adam
# from torch.utils.data import DataLoader
# from torchvision.datasets import ImageFolder
# import torchvision.transforms as transforms
# from torchmetrics.functional import accuracy
# from torchvision.transforms import ToTensor, Resize
# import numpy as np
# import os
# import matplotlib.pyplot as plt

# # Setup CUDA
# def setup_cuda():
#     # Setting seeds for reproducibility
#     seed = 50
#     torch.backends.cudnn.enabled = True
#     torch.backends.cudnn.benchmark = True
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     if torch.cuda.is_available():
#         torch.cuda.manual_seed(seed)

#     return torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# def train_model():
#     """
#     Train the model over a single epoch
#     :return: training loss and training accuracy
#     """
#     train_loss = 0.0
#     train_acc = 0.0
#     model.train()

#     for (img, label) in tqdm(train_loader, ncols=80, desc='Training'):
#         # Get a batch
#         img, label = img.to(device, dtype=torch.float), label.to(device, dtype=torch.long)

#         # Set the gradients to zero before starting backpropagation
#         optimizer.zero_grad()

#         # Perform a feed-forward pass
#         logits = model(img)

#         # Compute the batch loss
#         loss = loss_fn(logits, label)

#         # Compute gradient of the loss fn w.r.t the trainable weights
#         loss.backward()

#         # Update the trainable weights
#         optimizer.step()

#         # Accumulate the batch loss
#         train_loss += loss.item()

#         # Get the predictions to calculate the accuracy for every iteration. Remember to accumulate the accuracy
#         prediction = logits.argmax(axis=1)
#         train_acc += accuracy(prediction, label, task='multiclass', average='macro', num_classes=len(class_names)).item()

#     return train_loss / len(train_loader), train_acc / len(train_loader)


# def validate_model():
#     """
#     Validate the model over a single epoch
#     :return: validation loss and validation accuracy
#     """
#     model.eval()
#     valid_loss = 0.0
#     val_acc = 0.0

#     with torch.no_grad():
#         for (img, label) in tqdm(val_loader, ncols=80, desc='Valid'):
#             # Get a batch
#             img, label = img.to(device, dtype=torch.float), label.to(device, dtype=torch.long)

#             # Perform a feed-forward pass
#             logits = model(img)

#             # Compute the batch loss
#             loss = loss_fn(logits, label)

#             # Accumulate the batch loss
#             valid_loss += loss.item()

#             # Get the predictions to calculate the accuracy for every iteration. Remember to accumulate the accuracy
#             prediction = logits.argmax(axis=1)
#             val_acc += accuracy(prediction, label, task='multiclass', average='macro', num_classes=len(class_names)).item()

#     return valid_loss / len(val_loader), val_acc / len(val_loader)

# # Example plotting function

# def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies):
#     epochs = range(1, len(train_losses) + 1)
#     # Losses
#     plt.figure(figsize=(15, 7))
#     plt.subplot(2, 1, 1)
#     plt.plot(epochs, train_losses, label='Training Loss', color='blue')
#     plt.plot(epochs, val_losses, label='Validation Loss', color='red')
#     plt.xlabel('Epochs')
#     plt.ylabel('Loss')
#     plt.title('Training and Validation Loss')
#     plt.legend()
#     plt.yscale('log')  # Log scale can help for loss curves with large values

#     # Accuracies
#     plt.subplot(2, 1, 2)
#     plt.plot(epochs, train_accuracies, label='Training Accuracy', color='green')
#     plt.plot(epochs, val_accuracies, label='Validation Accuracy', color='orange')
#     plt.xlabel('Epochs')
#     plt.ylabel('Accuracy')
#     plt.title('Training and Validation Accuracy')
#     plt.legend()

#     plt.tight_layout()
#     # Save the figure to a file
#     plt.savefig("trainplot.png")  # You can change the file name and format (e.g., .png, .jpg, .pdf)

#     plt.show()

# if __name__ == "__main__":
#     device = setup_cuda()

#     # 1. Load the dataset
#     transform = transforms.Compose([Resize((224, 224)), ToTensor()])
#     train_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/train', transform=transform)
#     val_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/val', transform=transform)
#     # Get class names
#     class_names = train_dataset.classes

#     # 2. Create data loaders
#     train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
#     val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

#     # 3. Create a new deep model without pre-trained weights
#     # from utils.seresnet18 import SEResNet18

#     model = SEResNet18(
#         num_classes=len(class_names),
#     ).to(device)

#     # 4. Specify loss function and optimizer
#     optimizer = Adam(model.parameters(), lr=1e-4)
#     loss_fn = torch.nn.CrossEntropyLoss()

#     # 5. Train the model with 100 epochs
#     # store the metrics for plotting
#     train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []

#     max_acc = 0
#     for epoch in range(100):

#         # 5.1. Train the model over a single epoch
#         train_loss, train_acc = train_model()
#         train_losses.append(train_loss)  # save train loss values
#         train_accuracies.append(train_acc)  # save train acc values

#         # 5.2. Validate the model after training
#         val_loss, val_acc = validate_model()
#         val_losses.append(val_loss)  # save val loss values
#         val_accuracies.append(val_acc)  # save val acc values

#         print(f'Epoch {epoch}: Train loss = {train_loss}, Train accuracy: {train_acc}')
#         print(f'Epoch {epoch}: Validation loss = {val_loss}, Validation accuracy: {val_acc}')

#         # 4.3. Save the model if the validation accuracy is increasing
#         if val_acc > max_acc:
#             print(f'Validation accuracy increased ({max_acc} --> {val_acc}). Model saved')
#             folder_path = 'checkpoints_se_resnet18'  # Define the folder name
#             if not os.path.exists(folder_path):
#                 os.makedirs(folder_path)  # Create the folder if it does not exist
#             file_path = os.path.join(folder_path,
#                                      'se_resnet18_epoch_' + str(epoch) + '_acc_{0:.4f}'.format(val_acc) + '.pt')
#             with open(file_path, 'wb') as f:
#                 save(model.state_dict(), f)
#             max_acc = val_acc
# # After training is complete, plot the metrics
# plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies)

# Testing SEResnet18

In [7]:
# import torch
# from torch import nn, save, load
# from tqdm import tqdm
# from torch.optim import Adam
# from torch.utils.data import DataLoader
# from torchvision.datasets import ImageFolder
# import torchvision.transforms as transforms
# from torchmetrics.functional import accuracy
# from torchvision.transforms import ToTensor, Resize
# import numpy as np
# import os
# from PIL import Image
# import matplotlib.pyplot as plt

# # Thiết lập biến cần thiết
# train_dir = '/kaggle/input/playcards/train'
# test_dir = '/kaggle/input/playcards/test'
# valid_dir = '/kaggle/input/playcards/valid'
# NUM_WORKERS = os.cpu_count()
# BATCH_SIZE = 32
# IMG_SIZE = 224
# manual_transforms = transforms.Compose([
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),
#     transforms.ToTensor(),
# ])
# patch_size = 16
# CLASS = {} # KHONG CAN DIEN CHI TIET


# # Thiết lập thiết bị (GPU hoặc CPU)
# def setup_cuda():
#     torch.backends.cudnn.enabled = True
#     torch.backends.cudnn.benchmark = True
#     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#     return device


# device = setup_cuda()

# # Thiết lập các transform để xử lý ảnh
# manual_transforms = transforms.Compose([
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),
#     transforms.ToTensor(),
# ])


# # Hàm dự đoán kết quả cho một hình ảnh
# def predict_image(image_path, model, transform, class_names, device):
#     model.eval()
#     img = Image.open(image_path).convert('RGB')
#     img_tensor = transform(img).unsqueeze(0).to(device)
#     with torch.no_grad():
#         output = model(img_tensor)
#         _, predicted_class = torch.max(output, 1)
#     predicted_label = class_names[predicted_class.item()]
#     return img, predicted_label


# # Hàm chính để dự đoán các hình ảnh trong tập test
# def test_model():
#     # 1. Tải dữ liệu và lớp từ tập train
#     transform = transforms.Compose([Resize((224, 224)), ToTensor()])
#     train_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/train', transform=transform)
#     test_dataset = ImageFolder(root='/kaggle/input/dataset-split/dataset_split/test', transform=transform)
#     # Get class names
#     class_names = train_dataset.classes

#     # 2. Tạo mô hình ViT và tải trạng thái từ checkpoint
#     from utils.resnet18 import ResNet18

#     model = ResNet18(
#         num_classes=len(class_names),
#     ).to(device)

#     folder_checkpoint = 'checkpoints_se_resnet18'  # Define the folder name
#     file_name = 'se_resnet18_epoch_7_acc_0.8208.pt' #best weight
#     file_checkpoint = os.path.join(folder_checkpoint, file_name)  # lay best weight
#     model.load_state_dict(torch.load(file_checkpoint, device))
#     print('Model loaded from checkpoint.')
#     # Ensure the output directory exists
#     output_dir = "output_resnet18"
#     os.makedirs(output_dir, exist_ok=True)

#     # 3. Dự đoán kết quả cho mỗi hình ảnh trong tập test
#     for image_path in tqdm(test_dataset.imgs, desc='Testing'):
#         img, predicted_label = predict_image(image_path[0], model, manual_transforms, class_names, device)
#         # plt.imshow(img)
#         # plt.title(f'Predicted: {predicted_label}')
#         # plt.show()

#         # Convert the tensor image back to a PIL image if necessary
#         if isinstance(img, torch.Tensor):
#             img = transforms.ToPILImage()(img)

#         # Create a plot
#         fig, ax = plt.subplots()

#         # Set white background
#         fig.patch.set_facecolor('white')
#         ax.set_facecolor('white')

#         # Remove axis
#         ax.axis('off')

#         # Display the image
#         ax.imshow(img)

#         # Add the predicted label as the title
#         ax.set_title(f'Predicted: {predicted_label}', fontsize=12, pad=10)

#         # Save the figure
#         image_basename = os.path.basename(image_path[0])
#         image_name, image_ext = os.path.splitext(image_basename)
#         output_image_path = os.path.join(output_dir, f"{image_name}_pred_{predicted_label}.png")

#         plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0.1)
#         plt.close(fig)


# if __name__ == '__main__':
#     test_model()
