In [1]:
!git clone https://github.com/speckean/upar_dataset.git

fatal: destination path 'upar_dataset' already exists and is not an empty directory.


In [2]:
!pip install efficientnet_pytorch -q

# Import Tools

In [3]:
import os
import pickle
import glob
import random

import numpy as np
import torch.utils.data as data
from PIL import Image

import torchvision.transforms as T

from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

from efficientnet_pytorch import EfficientNet

In [4]:
HOME = os.getcwd()
HOME

'C:\\Users\\USER\\Downloads\\LabML\\FashionDetection(Week3)\\UPAR(Soft-Biometric-Classification)'

# Dataset

## Get a data

In [5]:
!git clone https://github.com/speckean/upar_challenge.git
%cd upar_challenge
!pip install tqdm gdown requests

C:\Users\USER\Downloads\LabML\FashionDetection(Week3)\UPAR(Soft-Biometric-Classification)\upar_challenge


fatal: destination path 'upar_challenge' already exists and is not an empty directory.




In [6]:
!python3 download_datasets.py
%cd {HOME}
!mv upar_challenge/data .

C:\Users\USER\Downloads\LabML\FashionDetection(Week3)\UPAR(Soft-Biometric-Classification)


Traceback (most recent call last):
  File "download_datasets.py", line 9, in <module>
    import gdown
ModuleNotFoundError: No module named 'gdown'
'mv' is not recognized as an internal or external command,
operable program or batch file.


In [7]:
current_dir_path = 'data/PA100k/release_data/release_data'
new_dir_path = 'data/PA100k/data'


if os.path.exists(current_dir_path):
    if not os.path.exists(new_dir_path):
        os.rename(current_dir_path, new_dir_path)
        print(f"Directory renamed from '{current_dir_path}' to '{new_dir_path}'")

    else:
        print(f"The target directory '{new_dir_path}' already exists.")
else:
    print(f"The directory '{current_dir_path}' does not exist.")

The directory 'data/PA100k/release_data/release_data' does not exist.


## Construct dataset/dataloader

In [8]:

class UPAR(data.Dataset):
  """
  Load UPAR dataset from pickle file

  split: whether to use train/val/trainval/test split
  partition: partition id 0-9
  root: path to datasets, original datasets must be in this directory
  data_path: path to UPAR pickle file
  transform: training data transforms
  target_transforms: evaluation data transforms
  """
  def __init__(self, split='train', partition=0, root=HOME, data_path='upar_dataset/UPAR/dataset_all.pkl', transform=None, target_transform=None):
    dataset_info = pickle.load(open(data_path, 'rb+'))
    self.dataset_info = dataset_info

    img_id = dataset_info.image_name
    attr_label = dataset_info.label

    assert split in dataset_info.partition.keys(), f'split {split} does not exist'

    self.dataset = 'UPAR'
    self.transform = transform  # data transforms during training
    self.target_transform = target_transform  # data transforms during testing

    self.root_path = root+"/data"  # path to datasets

    self.attr_id = dataset_info.attr_name  # attribute names
    self.attr_num = len(self.attr_id)  # number of attributes

		# load partition
    self.img_idx = dataset_info.partition[split]

    if isinstance(self.img_idx, list):
      self.img_idx = self.img_idx[partition]

    if isinstance(self.img_idx, list):
      self.img_idx = np.hstack(self.img_idx)

    self.img_idx = np.array([i for i in self.img_idx if not any(folder in img_id[i] for folder in ["RAP"])])


    self.img_num = self.img_idx.shape[0]
    self.img_id = [img_id[i] for i in self.img_idx]
    self.label = attr_label[self.img_idx]

    # set sub-dataset lengths to enable evaluation on sub-datasets
    self.sub_dataset_lengths = [len(d) for d in dataset_info.partition.test[partition]]


  def __getitem__(self, index):
      """
      get dataset item by index

      index: item index
      return: image data (img) with corresponding ground truth labels (gt_label), dataset id (did), and image path (imgname)
      """
      imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index]
      did = self.dataset_info.dataset_ids[imgidx]
      imgpath = os.path.join(self.root_path, imgname)
      img = Image.open(imgpath)

      if self.transform is not None:
          img = self.transform(img)

      gt_label = gt_label.astype(np.float32)

      if self.target_transform is not None:
          gt_label = self.transform(gt_label)

      return img, gt_label, did, imgname


  def __len__(self):
    """
    get length of dataset
    """
    return len(self.img_id)


def get_transform(height, width):
    normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    train_transform = [
        T.Resize((height, width))
    ]

    train_transform += [
        T.Pad(10),
        T.RandomCrop((height, width)),
        T.RandomHorizontalFlip(),
    ]

    train_transform += [
        T.ToTensor(),
        normalize,
    ]
    train_transform = T.Compose(train_transform)

    valid_transform = T.Compose([
        T.Resize((height, width)),
        T.ToTensor(),
        normalize
    ])

    return train_transform, valid_transform

In [9]:
height = 224  # Example height
width = 224   # Example width
train_transform, valid_transform = get_transform(height, width)


# For training
train_dataset = UPAR(split='train', partition=0, transform=train_transform)

# For validation
val_dataset = UPAR(split='val', partition=0, transform=valid_transform)

# For testing
test_dataset = UPAR(split='test', partition=0, transform=valid_transform)


In [10]:
batch_size = 32  # Example batch size

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model Baseline

In [11]:
class UPAREfficientNetModel(nn.Module):
    def __init__(self, num_attributes, version='efficientnet-b0'):
        super(UPAREfficientNetModel, self).__init__()
        self.efficientnet = EfficientNet.from_pretrained(version)
        feature_size = self.efficientnet._fc.in_features
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(feature_size, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, num_attributes),
            nn.Sigmoid()
        )
        
        # Remove the original classifier
        self.efficientnet._fc = nn.Identity()

    def forward(self, x):
        x = self.efficientnet(x)
        x = self.classifier(x)
        return x

In [12]:
num_attributes = 40  # Number of attributes/classes
model = UPAREfficientNetModel(num_attributes=num_attributes, version='efficientnet-b0')

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

Loaded pretrained weights for efficientnet-b0


In [13]:
def train_one_epoch(model, dataloader, loss_fn, optimizer, device):
    model.train()
    total_loss = 0
    for batch, (X, y, _, _) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if batch % 100 == 0:
            print(f"Batch {batch}, Loss: {loss.item()}")

    avg_loss = total_loss / len(dataloader)
    print(f"Average Training Loss: {avg_loss}")
    return avg_loss

def validate(model, dataloader, loss_fn, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for X, y, _, _ in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            loss = loss_fn(pred, y)
            total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Average Validation Loss: {avg_loss}")
    return avg_loss

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 10

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    print("-" * 10)
    train_loss = train_one_epoch(model, train_loader, loss_fn, optimizer, device)
    val_loss = validate(model, val_loader, loss_fn, device)


Epoch 1/10
----------
Batch 0, Loss: 0.888163685798645
Batch 100, Loss: 0.6714040040969849
Batch 200, Loss: 0.670503556728363
Batch 300, Loss: 0.6674556136131287
Batch 400, Loss: 0.668674886226654
Batch 500, Loss: 0.6675630807876587
Batch 600, Loss: 0.6666461825370789
Batch 700, Loss: 0.6657683253288269
Batch 800, Loss: 0.6662879586219788
Batch 900, Loss: 0.6638235449790955
Batch 1000, Loss: 0.6654782891273499
Batch 1100, Loss: 0.663582444190979
Batch 1200, Loss: 0.6669347882270813
Batch 1300, Loss: 0.6680306792259216
Batch 1400, Loss: 0.6697019934654236
Batch 1500, Loss: 0.6734623908996582
Batch 1600, Loss: 0.6631010174751282
Batch 1700, Loss: 0.667633056640625
Batch 1800, Loss: 0.6623842120170593
Batch 1900, Loss: 0.6694939732551575
Batch 2000, Loss: 0.6625273823738098
Batch 2100, Loss: 0.6639817357063293
Batch 2200, Loss: 0.6697638630867004
Batch 2300, Loss: 0.6644483804702759
Batch 2400, Loss: 0.6661810874938965
Batch 2500, Loss: 0.6712441444396973
Batch 2600, Loss: 0.6692441701889

In [14]:
model_save_path = "eff_model.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

Model saved to eff_model.pth
