## Import Library

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
import time
from torch.autograd import Variable
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

## Data Preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_path='/content/drive/MyDrive/FaceReco/Dataset'

In [None]:
os.listdir(data_path)

['gender_classification.csv',
 'class_identity.txt',
 'list_attribute.txt',
 'gender_classification.xlsx',
 'Images']

In [None]:
images_list = os.listdir(data_path+'/Images')

In [None]:
# Membaca file list_attribute.txt yang berisi informasi atribut gambar. Merapikan data dengan separasi spasi dan skip kolom metadata
data = pd.read_csv(data_path+'/list_attribute.txt', sep='\s+', skiprows=1)

# images_list berisi nama file 5000 gambar yang benar-benar ada.
# Kode ini menyaring agar hanya atribut gambar tersebut yang diambil.
filtered_data = data[data.index.isin(images_list)]

# Hanya kolom 'Male' yang diambil dari dataset. reset_index() mengubah nama gambar dari index menjadi kolom biasa.
filtered_data = filtered_data[['Male']].reset_index()

# Kolom yang berisi nama gambar diubah namanya menjadi image_id agar lebih jelas.
filtered_data = filtered_data.rename(columns={'index': 'image_id'})

# Awalnya data 'Male' berisi 1 (laki-laki) dan -1 (bukan laki-laki). Diubah menjadi 1 (laki-laki) dan 0 (bukan laki-laki) supaya cocok untuk model machine learning.
filtered_data['Male'] = filtered_data['Male'].apply(lambda x: 1 if x == 1 else 0)

# Assign the filtered data to the 'data' variable as requested
data = filtered_data

print(data.head())
print(data.shape)
print("Unique values in 'Male' column after conversion:", data['Male'].unique())

  data = pd.read_csv(data_path+'/list_attribute.txt', sep='\s+', skiprows=1)


     image_id  Male
0  000051.jpg     1
1  000052.jpg     1
2  000352.jpg     1
3  000409.jpg     1
4  000545.jpg     1
(1768, 2)
Unique values in 'Male' column after conversion: [1 0]


In [None]:
# split the data into train and test sets with a 80:20 ratio
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

## Preprocessing

In [None]:
class GenderDataset(Dataset):
    def __init__(self, data, image_folder_path, transform=None):
        self.data = data
        self.image_folder_path = image_folder_path
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_folder_path, self.data.iloc[idx, 0])
        # please define image convertion technique to RGB here
        image = Image.open(image_path).convert('RGB')
        gender = self.data.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(gender, dtype=torch.long)

In [None]:
transform = transforms.Compose([
    # please define data transformation techniques here
    transforms.Resize((299, 299)),  # Resize images to a consistent size
    transforms.ToTensor(),         # Convert PIL Image to PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize image data
])

In [None]:
train_set = GenderDataset(train_data, image_folder_path=os.path.join(data_path, "Images"), transform=transform)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=2)

test_set = GenderDataset(test_data, os.path.join(data_path, "Images"), transform=transform)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=2)

## Architecture

In [None]:
# please define the model optimizer and criterion (loss function)
from torchvision import models
from torchvision.models import inception_v3
model = inception_v3(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()



Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


100%|██████████| 104M/104M [00:00<00:00, 180MB/s] 


## Modeling

In [None]:
import time
from torch.autograd import Variable

def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, use_gpu=torch.cuda.is_available(), num_epochs=10):
    # please define the training model (VGG/GoogleNet/ResNet) here
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0
    train_acc_history = []
    test_acc_history = []

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                #scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                if phase == 'train':
                    outputs, aux_outputs = model(inputs)
                    loss1 = criterion(outputs, labels)
                    loss2 = criterion(aux_outputs, labels)
                    loss = loss1 + 0.4 * loss2 # Total loss is a weighted sum
                    _, preds = torch.max(outputs.data, 1)
                else:
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs.data, 1)


                # statistics
                running_loss += loss.data
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.float() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'train':
                train_acc_history.append(epoch_acc.item())
            else:
                test_acc_history.append(epoch_acc.item())

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best test Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
    pass

In [None]:
dataloders = {
    "train":train_loader, "test":test_loader
}
dataset_sizes= {
    "train":len(train_set), "test":len(test_set)
}

In [None]:
use_gpu = torch.cuda.is_available()

if use_gpu:
  model = model.to("cuda")

In [None]:
model = train_model(model, dataloders, dataset_sizes, criterion, optimizer, use_gpu, 10)

Epoch 0/9
----------
train Loss: 0.1312 Acc: 0.4873
test Loss: 0.0240 Acc: 0.4774

Epoch 1/9
----------
train Loss: 0.1311 Acc: 0.4760
test Loss: 0.0241 Acc: 0.4548

Epoch 2/9
----------
train Loss: 0.1312 Acc: 0.4965
test Loss: 0.0241 Acc: 0.4576

Epoch 3/9
----------
train Loss: 0.1314 Acc: 0.4745
test Loss: 0.0240 Acc: 0.4520

Epoch 4/9
----------
train Loss: 0.1315 Acc: 0.4632
test Loss: 0.0240 Acc: 0.4605

Epoch 5/9
----------
train Loss: 0.1311 Acc: 0.4993
test Loss: 0.0242 Acc: 0.4350

Epoch 6/9
----------
train Loss: 0.1315 Acc: 0.4646
test Loss: 0.0241 Acc: 0.4661

Epoch 7/9
----------
train Loss: 0.1311 Acc: 0.5057
test Loss: 0.0241 Acc: 0.4661

Epoch 8/9
----------
train Loss: 0.1316 Acc: 0.4590
test Loss: 0.0241 Acc: 0.4576

Epoch 9/9
----------
train Loss: 0.1315 Acc: 0.4894
test Loss: 0.0241 Acc: 0.4774

Training complete in 2m 1s
Best test Acc: 0.477401


## Evaluation

In [None]:
def evaluate_model(model, test_loader, target_labels):
    # please define the evaluation function here
    pass

In [None]:
evaluate_model(model, dataloders['test'], ["female", "male"])