<a href="https://colab.research.google.com/github/Shan-Lee2k/Machine-Learning/blob/main/ML_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Libraries

In [1]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader, Subset,ConcatDataset
from torchvision import datasets, transforms
import matplotlib.pylab as plt
import numpy as np
torch.__version__
print(torch.cuda.is_available())
# assert torch.cuda.get_device_name() == ""
torch.manual_seed(0)

True


<torch._C.Generator at 0x7c33205361b0>

# Datasets

In [2]:
# Helper function
def show_image(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

pre_processing = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

])


In [None]:
#If cannot download CelebA dataset, get from GG Drive
import zipfile
import os
from google.colab import drive
drive.mount('/content/drive')

zip_file_path = '/content/drive/MyDrive/Colab Notebooks/img_align_celeba.zip'
extract_path = '/dataset/celeba/'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Download CelebA dataset successfully!")

In [None]:
# Load CelebA
celeba_dataset = datasets.ImageFolder(root='/dataset/celeba/', transform=pre_processing)
celeba_dataset.targets = torch.tensor([1] * len(celeba_dataset))
train_size = 50000
test_size = 10000

indices = torch.randperm(len(celeba_dataset)).tolist()
train_indices = indices[:train_size]
test_indices = indices[train_size:train_size + test_size]


# Create custom dataset class to return the modified label
class CelebAWithLabel(Dataset):
    def __init__(self, dataset, label):
        self.dataset = dataset
        self.label = torch.tensor(label)

    def __getitem__(self, index):
        image, _ = self.dataset[index]  # Ignore the original label
        return image, self.label

    def __len__(self):
        return len(self.dataset)

# Create subsets with the custom dataset class
celeba_train_subset = Subset(CelebAWithLabel(celeba_dataset, label=1), train_indices)
celeba_test_subset = Subset(CelebAWithLabel(celeba_dataset, label=1), test_indices)

# Verify the change
print(type(celeba_train_subset[0][1]))  # Should print 1
print(celeba_test_subset[0][1])   # Should print 1




In [None]:
CIFAR_10_train = datasets.CIFAR10(root='/content/dataset', train = True, download=True,transform = pre_processing)
CIFAR_10_test = datasets.CIFAR10(root='/content/dataset', train = False, download=True, transform = pre_processing)
# Label 0 (not Face) for CIFAR_10
CIFAR_10_train.targets = torch.tensor([0] * len(CIFAR_10_train.targets))
CIFAR_10_test.targets = torch.tensor([0] * len(CIFAR_10_test.targets))


In [None]:
print(CIFAR_10_train[0][0].shape, CIFAR_10_train[0][1])
print(celeba_train_subset[0][0].shape, celeba_train_subset[0][1])

**INFORMATION DATASET**

In [None]:
print(f"CelebA dataset training: {len(celeba_train_subset)}")
print(f"CelebA dataset testing: {len(celeba_test_subset)}")
print(f"Type CelebA: {type(celeba_train_subset)}")
print(f"CIFAR_100 dataset training: {len(CIFAR_10_train)}")
print(f"CIFAR_100 dataset testing: {len(CIFAR_10_test)}")
print(f"Type CIFAR_100: {type(CIFAR_10_train)}")

In [None]:
# Kết hợp 2 bộ dữ liệu
train_dataset = ConcatDataset([celeba_train_subset,CIFAR_10_train])
test_dataset = ConcatDataset([CIFAR_10_test, celeba_test_subset])
# Tạo DataLoader cho bộ dữ liệu kết hợp
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# **Visualize Data**

# **BASIC CNN**



In [None]:
class Basic_CNN(nn.Module):

    # Contructor
    def __init__(self, out_1=12, out_2=24, out_3 = 48, out_4 = 72):
        super(Basic_CNN, self).__init__()
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels= out_1, kernel_size=3, stride = 2)
        self.maxpool1=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.cnn2 = nn.Conv2d(in_channels=out_1, out_channels=out_2, kernel_size=3, stride=2)
        self.maxpool2=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.cnn3 = nn.Conv2d(in_channels=out_2, out_channels=out_3, kernel_size=3, stride=2)
        self.maxpool3=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.cnn4 = nn.Conv2d(in_channels=out_3, out_channels=out_4, kernel_size=3, stride=2)
        self.maxpool4=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.fc1 = nn.Linear(out_4 * 5 * 5, 512)
        self.fc2 = nn.Linear(512, 2)

    # Prediction
    def forward(self, x):
        x = self.cnn1(x)
        x = torch.relu(x)
        x = self.maxpool1(x)

        x = self.cnn2(x)
        x = torch.relu(x)
        x = self.maxpool2(x)

        x = self.cnn3(x)
        x = torch.relu(x)
        x = self.maxpool3(x)

        x = self.cnn4(x)
        x = torch.relu(x)
        x = self.maxpool4(x)

        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x


In [None]:
def train_model(model,train_loader,validation_loader,optimizer,n_epochs=4):

    #global variable
    N_test=len(test_dataset)
    accuracy_list=[]
    loss_list=[]
    for epoch in range(n_epochs):
        for x, y in train_loader:
            x,y = x.to(device), y.to(device)
            model.train()
            optimizer.zero_grad()
            z = model(x)
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
            loss_list.append(loss)

        correct=0
        #perform a prediction on the validation  data
        for x_test, y_test in validation_loader:
            x_test, y_test = x_test.to(device), y_test.to(device)
            model.eval()
            z = model(x_test)
            _, yhat = torch.max(z.data, 1)
            correct += (yhat == y_test).sum().item()
        accuracy = correct / N_test
        accuracy_list.append(accuracy)

    return accuracy_list, loss_list

In [None]:
# Using GPU to train
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = Basic_CNN(out_1=12, out_2=24, out_3 = 48, out_4 = 60)
model.to(device)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size= 64)

In [None]:
accuracy_list_normal, loss_list_normal=train_model(model=model,n_epochs=5,train_loader=train_loader,validation_loader=validation_loader,optimizer=optimizer)

# **CNN with Batch Normalization**

In [None]:
class CNN_with_batch_norm(nn.Module):

    # Contructor
    def __init__(self, out_1=12, out_2=24, out_3 = 48, out_4 = 72):
        super(CNN_with_batch_norm, self).__init__()
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels= out_1, kernel_size=3, stride = 2)
        self.conv1_bn = nn.BatchNorm2d(out_1)
        self.maxpool1=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.cnn2 = nn.Conv2d(in_channels=out_1, out_channels=out_2, kernel_size=3, stride=2)
        self.conv2_bn = nn.BatchNorm2d(out_2)
        self.maxpool2=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.cnn3 = nn.Conv2d(in_channels=out_2, out_channels=out_3, kernel_size=3, stride=2)
        self.conv3_bn = nn.BatchNorm2d(out_3)
        self.maxpool3=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.cnn4 = nn.Conv2d(in_channels=out_3, out_channels=out_4, kernel_size=3, stride=2)
        self.conv4_bn = nn.BatchNorm2d(out_4)
        self.maxpool4=nn.MaxPool2d(kernel_size=2, stride = 1)

        self.fc1 = nn.Linear(out_4 * 5 * 5, 512)
        self.bn_fc1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 2)
        self.bn_fc2 = nn.BatchNorm1d(2)


    # Prediction
    def forward(self, x):
        x = self.cnn1(x)
        x = self.conv1_bn(x)
        x = torch.relu(x)
        x = self.maxpool1(x)

        x = self.cnn2(x)
        x = self.conv2_bn(x)
        x = torch.relu(x)
        x = self.maxpool2(x)

        x = self.cnn3(x)
        x = self.conv3_bn(x)
        x = torch.relu(x)
        x = self.maxpool3(x)

        x = self.cnn4(x)
        x = self.conv4_bn(x)
        x = torch.relu(x)
        x = self.maxpool4(x)

        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        x = self.bn_fc2(x)
        return x

In [None]:
# CNN-batch
model_batch=CNN_with_batch_norm(out_1=12, out_2=24, out_3 = 48, out_4 = 60)
model_batch.to(device)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.SGD(model_batch.parameters(), lr = learning_rate)
accuracy_list_batch, loss_list_batch=train_model(model=model_batch,n_epochs=5,train_loader=train_loader,validation_loader=validation_loader,optimizer=optimizer)

# Evaluation

In [None]:
loss_list_normal = [loss.cpu().item() for loss in loss_list_normal]
loss_list_batch  = [loss.cpu().item() for loss in loss_list_batch]

In [None]:
# Plot the loss CNN and CNN_batch
plt.plot(loss_list_normal, 'b',label='loss normal cnn ')
plt.plot(loss_list_batch,'r',label='loss batch cnn')
plt.xlabel('iteration')
plt.title("loss")
plt.legend()

In [None]:
# Plot the accuracy CNN and CNN_batch
plt.plot(accuracy_list_normal, 'b',label=' normal CNN')
plt.plot(accuracy_list_batch,'r',label=' CNN with Batch Norm')
plt.xlabel('Epoch')
plt.title("Accuracy ")
plt.legend()
plt.show()