In [None]:
import os # untuk berinteraksi dengan sistem operasi, seperti untuk mengakses file dan direktori
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split # untuk membagi dataset menjadi subset training dan testing
import torch # Library utama PyTorch untuk komputasi tensor dan deep learning
from torch.utils.data import Dataset, DataLoader # Kelas untuk memanipulasi dataset dan membuat batch data untuk training
from torchvision import transforms # Modul untuk melakukan transformasi pada gambar seperti augmentasi data
import torch.nn as nn # Modul yang berisi berbagai komponen neural network seperti lapisan (layers), fungsi aktivasi, dll
import torch.optim as optim # Modul untuk algoritma optimasi, seperti SGD, Adam, dll
# from torchvision.models import resnet50 # Model pre-trained ResNet-50 yang bisa digunakan untuk tugas klasifikasi gambar
import torchvision.models as models
import time # Library untuk mengukur waktu eksekusi kode
from torch.autograd import Variable # Kelas yang membungkus tensor untuk melacak sejarah operasi dan menghitung gradient
from PIL import Image # Library untuk membuka, memanipulasi, dan menyimpan gambar
from sklearn.metrics import classification_report, confusion_matrix # Fungsi untuk menghitung dan menampilkan laporan klasifikasi dan confusion matrix
import cv2 # Library OpenCV untuk manipulasi gambar dan video
from torchvision.io import read_image

In [None]:
data_path = '/kaggle/input/celeb-a/Dataset' # Menyimpan path ke direktori dataset CelebA

In [None]:
os.listdir(data_path) # Mengambil daftar semua file dan folder dalam direktori yang ditentukan

In [None]:
images_list = os.listdir(data_path +'/Images') # Mengambil daftar semua file dalam subdirektori Images dari data_path
images_list = [i for i in images_list if len(i) < 11] # Because there are Duplicate Images 'XXXXXX(1).jpg'
images_list.sort()

In [None]:
data = pd.DataFrame(pd.read_csv(data_path+'/list_attribute.txt', sep = '\s+', header = 0))#, index_col = 0))
data = data[['file_name', 'Male']]
new_data =  data[data['file_name'].isin(images_list)] # filter the data with available images only (5000 images)
new_data = new_data.replace(-1, 0)

In [None]:
new_data.head()

In [None]:
new_data.info()

In [None]:
sns.countplot(x = new_data['Male']);

In [None]:
data.describe()

In [None]:
new_data.describe()

In [None]:
new_data_male = new_data[new_data['Male'] == 1]
new_data_female = new_data[new_data['Male'] == 0]
new_data_male = new_data_male.sample(frac = 1, random_state = 42)
new_data_female = new_data_female.sample(frac = 1, random_state = 42)

In [None]:
train_data = pd.concat([new_data_male[:1500], new_data_female[:1500]], axis = 0)
test_data = pd.concat([new_data_male[1500:], new_data_female[1500:]], axis = 0)
train_data.sort_index(inplace = True)
test_data.sort_index(inplace = True)
del new_data_male
del new_data_female

In [None]:
class GenderDataset(Dataset):
    def __init__(self, data, image_folder_path, transform=None):
        self.data = data
        self.image_folder_path = image_folder_path
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_folder_path, self.data.iloc[idx, 0])
        
        # Load Image and Convert to RGB:
#         try:
#             image = Image.open(image_path).convert('RGB')
#         except Exception as e:
#             print(f"Error loading image {image_path}: {e}")
#             return None, None
        image = Image.open(image_path).convert('RGB')
        gender = self.data.iloc[idx, 1]
        
        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(gender, dtype = torch.long)

In [None]:
# Define the Transformations:
transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.Resize(256),
        transforms.RandomRotation(45),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
train_set = GenderDataset(train_data, image_folder_path = os.path.join(data_path, "Images"), transform = transforms['train'])
train_loader = DataLoader(train_set, batch_size = 32, shuffle = True, num_workers = 2)

In [None]:
test_set = GenderDataset(test_data, os.path.join(data_path, "Images"), transform = transforms['test'])
test_loader = DataLoader(test_set, batch_size = 32, shuffle = False, num_workers = 2)

In [None]:
dataloaders = {'train': train_loader, 'test': test_loader}
dataset_sizes = {'train': len(train_set), 'test': len(test_set)}

# Architecture

In [None]:
# VGG
model = models.vgg16()
model.classifier[6] = nn.Linear(4096, 1)

In [None]:
# Define the Optimizer:
optimizer = optim.Adam(model.parameters(), lr = 0.0001)

In [None]:
# Define the Loss Function:
criterion = nn.CrossEntropyLoss()  # More stable than BCELoss for binary classification

In [None]:
# Move the Model to GPU if Available:
use_gpu = torch.cuda.is_available()
if use_gpu:
    model = model.to('cuda')

In [None]:
# Train the model
def train_model_vgg(model, dataloaders, dataset_sizes, criterion, optimizer, use_gpu, epochs=25): 

    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(dataloaders['train'], 0):
            # Get the inputs
            inputs, labels = data
            inputs, labels = inputs.to('cuda'), labels.to('cuda')

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Print statistics
            running_loss += loss.item()
            if i % 100 == 99:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0

        # Test the model
        correct = 0
        total = 0
        with torch.no_grad():
            for data in dataloaders['test'] :
                images, labels = data
                images, labels = images.to('cuda'), labels.to('cuda')
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best Test Acc: {:4f}'.format(best_acc))

    print('Finished Training')

In [None]:
use_gpu = torch.cuda.is_available()

if use_gpu:
  model = model.to("cuda")

In [None]:
model = train_model(model, dataloaders, dataset_sizes, criterion, optimizer, use_gpu, 25)

## Model Evaluation

In [None]:
# For if loading weights onto a model, otherwise keep commented
model.load_state_dict(torch.load('/kaggle/working/model_last.pth')['model'])
if torch.cuda.is_available():
  model = model.to("cuda")

In [None]:
y_true = torch.empty((0)).cuda()
y_pred = torch.empty((0)).cuda()
for data in dataloaders['test']: 
      inputs, labels = data
      #print(labels)
      if torch.cuda.is_available():
          inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
      else:
          inputs, labels = Variable(inputs), Variable(labels)
      #print(labels)
      #_, lab = torch.max(labels.data, 1)
      outputs = model(inputs)
      #print(outputs)
      preds = (torch.sigmoid(outputs).squeeze() > 0.5).float()
      y_true = torch.cat((y_true, labels.data), -1)
      y_pred = torch.cat((y_pred, preds), -1)


In [None]:
y_true_np = y_true.detach().cpu().numpy()
y_pred_np = y_pred.detach().cpu().numpy()
print(classification_report(y_true_np, y_pred_np, digits = 4))