In [1]:
import random
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
from torchvision import models, transforms

In [2]:
DATADIR = '/kaggle/input/ai-challenge/data/mnt/md0/projects/sami-hackathon/private/data/'
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
image_size = 224
train_transform = transforms.Compose([transforms.Resize((image_size, image_size)),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                           std = [0.225, 0.225, 0.225])])
val_transform = transforms.Compose([transforms.Resize((image_size, image_size)),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                         std = [0.225, 0.225, 0.225])])

df = pd.read_csv('/kaggle/input/ai-challenge/labels.csv')
df[df.file_name == 'image_4503.jpg']

Unnamed: 0,file_name,height,width,bbox,age,race,masked,skintone,emotion,gender
14777,image_4503.jpg,1024,1024,"[261.2020202020202, 147.9292929292934, 618.696...",Baby,Mongoloid,unmasked,light,Neutral,Female


In [3]:
image_paths = [DATADIR + path for path in list(df.file_name)]
bbox = [[float(i.strip()) for i in box.replace("[", "").replace("]", "").split(',')] for box in df.bbox]
age = list(df.age)
race = list(df.race)
masked = [1 if i == 'masked' else 0 for i in list(df.masked)]
skin = list(df.skintone)
emotion = list(df.emotion)
gender = list(df.gender)

In [4]:
def convert2dict(li):
    values, counts = np.unique(li, return_counts=True)
    result_dict = {value: count for value, count in zip(values, counts)}
    return result_dict

def plot_valcount(di, name):
    keys = list(di.keys())
    values = list(di.values())

    # Plot the figure
    fig, ax = plt.subplots()
    ax.bar(keys, values)
    ax.set_title(name)

    plt.show()


data_dict = {
    'Age': convert2dict(age),
    'Race': convert2dict(race),
    'Masked': convert2dict(masked),
    'Skin': convert2dict(skin),
    'Emotion': convert2dict(emotion),
    'Gender': convert2dict(gender),
}

data_dict

{'Age': {'20-30s': 11236,
  '40-50s': 1602,
  'Baby': 345,
  'Kid': 954,
  'Senior': 637,
  'Teenager': 536},
 'Race': {'Caucasian': 7106, 'Mongoloid': 7487, 'Negroid': 717},
 'Masked': {0: 14806, 1: 504},
 'Skin': {'dark': 339, 'light': 10485, 'mid-dark': 798, 'mid-light': 3688},
 'Emotion': {'Anger': 319,
  'Disgust': 132,
  'Fear': 114,
  'Happiness': 9218,
  'Neutral': 4844,
  'Sadness': 380,
  'Surprise': 303},
 'Gender': {'Female': 10522, 'Male': 4788}}

## Dataframe for skin data

In [None]:
dark_df = pd.concat([df[df.skintone == 'dark']]*4)
middark_df = pd.concat([df[df.skintone == 'mid-dark']]*2)
midlight_df = df[df.skintone == 'mid-light']
light_df = df[df.skintone == 'light']
len(dark_df), len(light_df)

In [5]:
young_df = df[df.age == '20-30s']
old_df = df[df.age == '40-50s']
baby_df = pd.concat([df[df.age == 'Baby']]*5)
kid_df = pd.concat([df[df.age == 'Kid']]*2)
senior_df= pd.concat([df[df.age == 'Senior']]*3)
teen_df = pd.concat([df[df.age == 'Teenager']]*4)
young_df.shape[0], old_df.shape[0], baby_df.shape[0], kid_df.shape[0], senior_df.shape[0],teen_df.shape[0]

(11236, 1602, 1725, 1908, 1911, 2144)

# Get face with bounding box

In [6]:
from math import ceil
def get_coordinate(box):
    x1 = box[0]
    y1 = box[1]
    x2 = box[0] + box[2]
    y2 = box[1] + box[3]
    return (x1,y1,x2,y2)

def get_crop_face(img, box):
    x1,y1,x2,y2 = get_coordinate(box)
    x1,y1,x2,y2 = ceil(x1), ceil(y1), ceil(x2), ceil(y2)
    return img[y1:y2, x1:x2, :]

# Preprocess data

In [7]:
def shuffle_with_permutation(lst, permutation):
        combined = sorted(zip(lst, permutation), key=lambda x: x[1])
        shuffled_list = [item[0] for item in combined]
        return shuffled_list

## Masked data

In [None]:
class MaskedDataset(Dataset):
    def __init__(self, transform):
        super().__init__()
        unmasked_list, masked_list = [], []
        idx = []
    
        for i in range(len(masked)):
            if masked[i] == 1:
                masked_list.append(masked[i])
                idx.append(i)
        
        random_idx = np.random.choice(range(len(masked)), size=len(idx)*2, replace=False)
        idx.extend(list(random_idx))
        idx = random.sample(idx, len(idx))
        self.image_paths = [image_paths[i] for i in idx]
        self.bbox = [bbox[i] for i in idx]
        self.label = [masked[i] for i in idx]
        self.transform = transform
    
    def __len__(self):
        return len(self.label)

    def __getitem__(self, index):
        path = self.image_paths[index]
        image = np.array(Image.open(path))
        box = self.bbox[index]
        crop_img = get_crop_face(image, box)
        face = Image.fromarray(crop_img)
        print(path)
        label = self.label[index]
        if label == 1:
            face = self.transform(face)
        else:
            face = val_transform(face)
        return face, label
    
masked_data = MaskedDataset(train_transform)

## RaceSkin data

In [None]:
def process_race_label(label):
    if (label=='Caucasian'):
        return 0
    elif (label=='Mongoloid'):
        return 1
    else:
        return 2
def process_skin_label(label):
    if (label=='dark'):
        return 0
    elif (label=='mid-dark'):
        return 1
    elif (label=='light'):
        return 2
    else:
        return 3

class RaceSkinDataset(Dataset):
    def __init__(self, transform):
        super().__init__()
        image_paths, bbox = [], []
        skin, race = [], []
        
        for i in range(dark_df.shape[0]):
            image_paths.append(DATADIR + dark_df.iloc[i]['file_name'])
            bbox.append(dark_df.iloc[i]['bbox'])
            skin.append(process_skin_label(dark_df.iloc[i]['skintone']))
            race.append(process_race_label(dark_df.iloc[i]['race']))

        for i in range(middark_df.shape[0]):
            image_paths.append(DATADIR + middark_df.iloc[i]['file_name'])
            bbox.append(middark_df.iloc[i]['bbox'])
            skin.append(process_skin_label(middark_df.iloc[i]['skintone']))
            race.append(process_race_label(middark_df.iloc[i]['race']))

        exclude = []
        for i in range(middark_df.shape[0]):
            id = random.randint(0, midlight_df.shape[0] - 1)
            while id in exclude:
                id = random.randint(0, midlight_df.shape[0] - 1)
            exclude.append(id)
            image_paths.append(DATADIR + midlight_df.iloc[id]['file_name'])
            bbox.append(midlight_df.iloc[id]['bbox'])
            skin.append(process_skin_label(midlight_df.iloc[id]['skintone']))
            race.append(process_race_label(midlight_df.iloc[id]['race']))

        exclude = []
        for i in range(middark_df.shape[0]):
            id = random.randint(0, light_df.shape[0] - 1)
            while id in exclude:
                id = random.randint(0, light_df.shape[0] - 1)
            exclude.append(id)
            image_paths.append(DATADIR + light_df.iloc[id]['file_name'])
            bbox.append(light_df.iloc[id]['bbox'])
            skin.append(process_skin_label(light_df.iloc[id]['skintone']))
            race.append(process_race_label(light_df.iloc[id]['race']))
        
        idx = [i for i in range(len(image_paths))]
        random.shuffle(idx)
        self.image_paths = shuffle_with_permutation(image_paths, idx)
        self.bbox = shuffle_with_permutation(bbox, idx)
        self.skin = shuffle_with_permutation(skin, idx)
        self.race = shuffle_with_permutation(race, idx)
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        path = self.image_paths[index]
        image = np.array(Image.open(path))
        box = self.bbox[index]
        box = [float(i.strip()) for i in box.replace("[", "").replace("]", "").split(',')]
        crop_img = get_crop_face(image, box)
        face = Image.fromarray(crop_img)
        race = self.race[index]
        skin = self.skin[index]
        face = self.transform(face)
        return face, race, skin
    
raceskin_data = RaceSkinDataset(train_transform)

In [None]:
face, mask = masked_data[156]
mask

## Age data

In [8]:
def process_age_label(label):
    if (label=='20-30s'):
        return 0
    elif (label=='40-50s'):
        return 1
    elif (label=='Baby'):
        return 2
    elif (label=='Kid'):
        return 3
    elif (label=='Senior'):
        return 4
    else:
        return 5

class AgeDataset(Dataset):
    def __init__(self, transform):
        super().__init__()
        image_paths, bbox, age = [], [], []
        
        for i in range(teen_df.shape[0]):
            image_paths.append(DATADIR + teen_df.iloc[i]['file_name'])
            bbox.append(teen_df.iloc[i]['bbox'])
            age.append(process_age_label(teen_df.iloc[i]['age']))

        for i in range(old_df.shape[0]):
            image_paths.append(DATADIR + old_df.iloc[i]['file_name'])
            bbox.append(old_df.iloc[i]['bbox'])
            age.append(process_age_label(old_df.iloc[i]['age']))
            
        for i in range(baby_df.shape[0]):
            image_paths.append(DATADIR + baby_df.iloc[i]['file_name'])
            bbox.append(baby_df.iloc[i]['bbox'])
            age.append(process_age_label(baby_df.iloc[i]['age']))
            
        for i in range(kid_df.shape[0]):
            image_paths.append(DATADIR + kid_df.iloc[i]['file_name'])
            bbox.append(kid_df.iloc[i]['bbox'])
            age.append(process_age_label(kid_df.iloc[i]['age']))
            
        for i in range(senior_df.shape[0]):
            image_paths.append(DATADIR + senior_df.iloc[i]['file_name'])
            bbox.append(senior_df.iloc[i]['bbox'])
            age.append(process_age_label(senior_df.iloc[i]['age']))
            
        exclude = []
        for i in range(teen_df.shape[0]):
            id = random.randint(0, teen_df.shape[0] - 1)
            while id in exclude:
                id = random.randint(0, teen_df.shape[0] - 1)
            exclude.append(id)
            image_paths.append(DATADIR + teen_df.iloc[id]['file_name'])
            bbox.append(teen_df.iloc[id]['bbox'])
            age.append(process_age_label(teen_df.iloc[id]['age']))
        
        idx = [i for i in range(len(image_paths))]
        random.shuffle(idx)
        self.image_paths = shuffle_with_permutation(image_paths, idx)
        self.bbox = shuffle_with_permutation(bbox, idx)
        self.age = shuffle_with_permutation(age, idx)
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        path = self.image_paths[index]
        image = np.array(Image.open(path))
        box = self.bbox[index]
        box = [float(i.strip()) for i in box.replace("[", "").replace("]", "").split(',')]
        crop_img = get_crop_face(image, box)
        face = Image.fromarray(crop_img)
        age = self.age[index]
        face = self.transform(face)
        return face, age

age_data = AgeDataset(train_transform)

In [None]:
face1, age = age_data[59]
face1, age

- (classifier): Sequential(
    - (0): Linear(in_features=25088, out_features=4096, bias=True)
    - (1): ReLU(inplace=True)
    - (2): Dropout(p=0.5, inplace=False)
    - (3): Linear(in_features=4096, out_features=4096, bias=True)
    - (4): ReLU(inplace=True)
    - (5): Dropout(p=0.5, inplace=False)
    - (6): Linear(in_features=4096, out_features=1000, bias=True)
- )

In [10]:
class AgeModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = torch.load('/kaggle/working/backbone.pt')
        self.pool = nn.Sequential(nn.AdaptiveAvgPool2d(output_size=(7, 7)),
                                  nn.Flatten())
        self.fc = nn.Sequential(nn.Linear(in_features=25088, out_features=4096, bias=True),
                                nn.ReLU(inplace=True),
                                nn.Dropout())
        self.fc.load_state_dict(torch.load('/kaggle/working/fc.pth'))
        self.agecls = nn.Linear(in_features=4096, out_features=6, bias=False)
        self.agecls.load_state_dict(torch.load('/kaggle/working/age.pth'))
        
    def forward(self, x):
        out = self.pool(self.backbone(x))
        return self.agecls(self.fc(out))

amodel = AgeModel().to(device)
for p in amodel.backbone.parameters():
    p.requires_grad = False
for p in amodel.fc.parameters():
    p.requires_grad = False

# Masked model

In [None]:
class TestModel(nn.Module):
    def __init__(self, nrace=3, nskin=4):
        super().__init__()
        self.backbone = torch.load('/kaggle/working/backbone.pt', map_location=torch.device('cpu'))
        self.pool = nn.Sequential(nn.AdaptiveAvgPool2d(output_size=(7, 7)),
                                  nn.Flatten())
        self.fc = nn.Sequential(nn.Linear(in_features=25088, out_features=4096, bias=True),
                                nn.ReLU(inplace=True),
                                nn.Dropout())
        self.fc.load_state_dict(torch.load('/kaggle/working/fc.pth', map_location=torch.device('cpu')))
        
        self.racecls = nn.Linear(in_features=4096, out_features=nrace, bias=False)
        self.racecls.load_state_dict(torch.load('/kaggle/working/race.pth', map_location=torch.device('cpu')))
        self.skincls = nn.Linear(in_features=4096, out_features=nskin, bias=False) 
        self.skincls.load_state_dict(torch.load('/kaggle/working/skin.pth', map_location=torch.device('cpu')))
        
#         self.fc = torch.load('/kaggle/working/fc.pt')
#         self.pool = nn.Sequential(nn.AdaptiveAvgPool2d(output_size=(7, 7)),
#                                   nn.Flatten())
#         self.racecls = torch.load('/kaggle/working/race.pt')
#         self.skincls = torch.load('/kaggle/working/skin.pt')
    
    def forward(self, x):
        out = self.pool(self.backbone(x))
        return self.racecls(self.fc(out)), self.skincls(self.fc(out))

    
# tmodel = TestModel().to(device)
tmodel2 = TestModel().to(device)
# for param in tmodel.backbone.parameters():
#     param.requires_grad = False

# Main model

In [None]:
class MainModel(nn.Module):
    def __init__(self, nrace=3, nskin=4):
        super().__init__()
        self.backbone = torch.load('/kaggle/working/backbone.pt')
        self.pool = nn.Sequential(nn.AdaptiveAvgPool2d(output_size=(7, 7)),
                                  nn.Flatten())
        self.fc = nn.Sequential(
                                nn.Linear(in_features=25088, out_features=4096, bias=True),
                                nn.ReLU(inplace=True),
                                nn.Dropout())
        self.fc.load_state_dict(torch.load('/kaggle/working/fc.pth'))
        self.racecls = nn.Linear(in_features=4096, out_features=nrace, bias=False)
        self.racecls.load_state_dict(torch.load('/kaggle/working/race.pth'))
        self.skincls = nn.Linear(in_features=4096, out_features=nskin, bias=False) 
        self.skincls.load_state_dict(torch.load('/kaggle/working/skin.pth'))
        self.mask = torch.load('/kaggle/working/masked.pt')
        self.gender = torch.load('/kaggle/input/gender-pt/gender.pt')
        self.age = nn.Linear(in_features=4096, out_features=6, bias=False)
        self.age.load_state_dict(torch.load('/kaggle/working/age.pth'))
    
    def forward(self, x):
        out = self.pool(self.backbone(x))
        return self.mask(out), self.gender(out), self.racecls(self.fc(out)), \
                self.skincls(self.fc(out)), self.age(self.fc(out))

main_model = MainModel().to(device)

In [None]:
import torchinfo
torchinfo.summary(main_model, input_size=(1,3,image_size,image_size),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
out = main_model(face1.unsqueeze(0).to(device))
def printlabel(out):
    mask = 'Unmasked' if torch.sigmoid(out[0]) < 0.5 else 'Masked'
    gender = 'Female' if torch.sigmoid(out[1]) < 0.5 else 'Male'
    race = torch.argmax(out[2])
    skin = torch.argmax(out[3])
    age = torch.argmax(out[4])
    return mask, gender, race, skin, age
print(printlabel(out))
plt.imshow(face1.permute(1,2,0).numpy())

In [11]:
## dataloader
def get_dataloaders(ds, lengths=[0.9, 0.1], batch_size=32, seed=42):
    train_set, val_set = random_split(ds, lengths=lengths, generator=torch.Generator().manual_seed(seed))

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader

train_loader, val_loader = get_dataloaders(age_data)

In [None]:
torch.sum(torch.argmax(y_hat1, dim=1) == y1).item()

In [12]:
def model_step(batch, model, loss_func, device):
    x, y = batch
    x, y = x.to(device), y.to(device)
    y_hat = model(x).squeeze()
    loss = loss_func(y_hat, y)
    return loss, y_hat, y

def optimizer_step(optimizer, scaler, loss):
    optimizer.zero_grad()
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()

def evaluation(model, loader, loss_func, scheduler, epoch, device, stage='Val'):
    model.eval()
    total, total_loss, total_acc = 0, 0, 0
    pbar = tqdm(loader)
    with torch.no_grad():
        for batch in pbar:
            loss, y_hat, y = model_step(batch, model, loss_func, device)
            total += len(y)
            total_loss += loss.item()
            total_acc += torch.sum(torch.argmax(y_hat, dim=1) == y).item()
            pbar.set_description(f"Epoch {epoch} {stage} | Loss = {total_loss/total:.4f} | Acc = {total_acc*100/total:.2f}")
        scheduler.step(total_loss/total)
    return total_loss/total

def train_epoch(model, train_loader, loss_func, optimizer, scaler, epoch, device):
    model.train()
    pbar = tqdm(train_loader)
    total, total_loss, total_acc = 0, 0, 0
    for batch in pbar:
        loss, y_hat, y = model_step(batch, model, loss_func, device)
        optimizer_step(optimizer, scaler, loss)
        total += len(y)
        total_loss += loss.item()
        total_acc += torch.sum(torch.argmax(y_hat, dim=1) == y).item()
        pbar.set_description(f"Epoch {epoch} Train | Loss = {total_loss/total:.4f} | Acc = {total_acc*100/total:.2f}")
    return total_loss/total

def train(model, train_loader, val_loader, loss_func, optimizer, scaler,
          scheduler, max_epochs=10, device=device, early_stop=False):
    for epoch in range(max_epochs):
        ## train loop
        train_epoch(model, train_loader, loss_func, optimizer, scaler, epoch, device)
        ## val loop
        val_loss = evaluation(model, val_loader, loss_func, scheduler, epoch, device)

In [13]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=amodel.parameters(), lr=1e-3)
scaler = torch.cuda.amp.GradScaler()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

In [None]:
train(amodel, train_loader, val_loader, loss_fn, optimizer, scaler,
      scheduler, max_epochs=5, device=device)

Epoch 0 Train | Loss = 0.0451 | Acc = 46.40: 100%|██████████| 322/322 [07:47<00:00,  1.45s/it]
Epoch 0 Val | Loss = 0.0367 | Acc = 54.16: 100%|██████████| 36/36 [00:51<00:00,  1.42s/it]
Epoch 1 Train | Loss = 0.0409 | Acc = 49.52: 100%|██████████| 322/322 [07:42<00:00,  1.44s/it]
Epoch 1 Val | Loss = 0.0360 | Acc = 55.29: 100%|██████████| 36/36 [00:51<00:00,  1.44s/it]
Epoch 2 Train | Loss = 0.0388 | Acc = 52.63:  29%|██▉       | 94/322 [02:15<05:28,  1.44s/it]

In [None]:
torch.save(agemodel.agecls.state_dict(), '/kaggle/working/age.pth')
print('Save success')

In [None]:
def masked_inference(
    model: torch.nn.Module,
    image_path: str,
    box, 
    image_size = (224, 224),
    transform: torchvision.transforms = None,
    device: torch.device = device):

    img = plt.imread(image_path)
    face = Image.fromarray(get_crop_face(img, box))
    
    if transform is not None:
        image_transform = transform
    else:
        image_transform = transforms.Compose([transforms.Resize(image_size),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                                         std=[0.229, 0.224, 0.225])])

    model.eval()
    with torch.inference_mode():
        transformed_image = image_transform(face).unsqueeze(dim=0)
        logit = model(transformed_image.to(device))

    pred = torch.sigmoid(logit).item()
    label = 'unmasked' if pred < 0.5 else 'masked'
    plt.figure()
    plt.imshow(face)
    plt.title(
        f"Pred: {label}"
    )
    plt.axis(False)
    plt.show()
    return label

# Predict

In [9]:
import cv2
import os
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image

!pip install ultralytics
from ultralytics import YOLO

In [29]:
image_size = 224
device = 'cuda' if torch.cuda.is_available() else 'cpu'
transform = transforms.Compose([transforms.Resize((image_size, image_size)),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                        std = [0.225, 0.225, 0.225])])
race_dict = {0:'Caucasian', 1:'Mongoloid', 2:'Negroid'}
skin_dict = {0:'dark', 1:'mid-dark', 2:'light', 3:'mid-light'}
age_dict = {0:'20-30s', 1:'40-50s', 2:'Baby', 3:'Kid', 4:'Senior', 5:'Teenager'}
emotion_dict = {
        0: 'Happiness',
        1: 'Neutral',
        2: 'Surprise',
        3: 'Sadness',
        4: 'Anger',
        5: 'Disgust',
        6: 'Fear'
    }

class MainModel(nn.Module):
    def __init__(self, nrace=3, nskin=4):
        super().__init__()
        
        self.backbone = torch.load('./backbone.pt')
        self.pool = nn.Sequential(nn.AdaptiveAvgPool2d(output_size=(7, 7)),
                                  nn.Flatten())
        self.fc = nn.Sequential(
                                nn.Linear(in_features=25088, out_features=4096, bias=True),
                                nn.ReLU(inplace=True),
                                nn.Dropout())
        self.fc.load_state_dict(torch.load('./fc.pth'))
        self.race = nn.Linear(in_features=4096, out_features=nrace, bias=False)
        self.race.load_state_dict(torch.load('./race.pth'))
        self.skin = nn.Linear(in_features=4096, out_features=nskin, bias=False) 
        self.skin.load_state_dict(torch.load('./skin.pth'))
        self.mask = torch.load('./masked.pt')
        self.gender = torch.load('/kaggle/input/gender-pt/gender.pt')
        self.age = nn.Linear(in_features=4096, out_features=6, bias=False)
        self.age.load_state_dict(torch.load('./age.pth'))
        self.emotion = nn.Linear(in_features=4096, out_features=7, bias=False)
        self.emotion.load_state_dict(torch.load('/kaggle/input/emotion/emotion.pth'))
    
    def forward(self, x):
        out = self.pool(self.backbone(x))
        return self.race(self.fc(out)), self.age(self.fc(out)), self.emotion(self.fc(out)), self.gender(out),  \
                self.skin(self.fc(out)), self.mask(out)

def predict(model, yolo, image_path):
    res = yolo(image_path)
    box = res[0].boxes[0].xyxy.cpu().numpy()[0]
    img = cv2.imread(image_path)
    x1,y1,x2,y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
    crop_img = img[y1:y2, x1:x2, :]
    input = transform(Image.fromarray(crop_img))
    out = model(input.unsqueeze(0).to(device))
    race = race_dict[torch.argmax(out[0]).item()]
    age = age_dict[torch.argmax(out[1]).item()]
    emotion = emotion_dict[torch.argmax(out[2]).item()] 
    gender = 'Female' if torch.sigmoid(out[3]) < 0.5 else 'Male'
    skintone = skin_dict[torch.argmax(out[4]).item()]
    masked = 'Unmasked' if torch.sigmoid(out[5]) < 0.5 else 'Masked'
    return box, race, age, emotion, gender, skintone, masked
    
model = MainModel().to(device)
yolo = YOLO('/kaggle/input/yolov8/best.pt').to(device)

In [24]:
import json
id_dict = pd.read_json('/kaggle/input/image-id/file_name_to_image_id.json', lines=True)
id_dict.loc[0]['100429351.jpg']

2

In [None]:
test_dir = '/kaggle/input/test-data/public_test'
test_paths = os.listdir(test_dir)
file_name, bbox, image_id, races, ages, emotions, genders, skintones, maskeds =[],[],[],[],[],[],[],[],[]
for path in test_paths:
    file_name.append(path)
    image_id.append(id_dict.loc[0][path])
    img_path = test_dir + '/' + path
    box, race, age, emotion, gender, skintone, masked = predict(model, yolo, img_path)
    bbox.append(box)
    races.append(race)
    ages.append(age)
    emotions.append(emotion)
    genders.append(gender)
    skintones.append(skintone)
    maskeds.append(masked)
answer_df = pd.DataFrame({'file_name':file_name, 
                          'bbox':bbox, 
                          'image_id':image_id, 
                          'race':races, 
                          'age':ages, 
                          'emotion':emotions, 
                          'gender':genders, 
                          'skintone':skintones, 
                          'masked':maskeds})
answer_csv = answer_df.to_csv('./answer.csv')