In [1]:
import sys
import os
sys.path
sys.path.append('..')
import pandas as pd
import numpy as np
import torch
import pytorch_lightning as pl

from utils import MultiLabelImageDataset, compute_class_weights

from utils import EfficientNetMultiOutput, UnfreezeCallback_EffNet

from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint


print(f"Is CUDA available: {torch.cuda.is_available()}")
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Is CUDA available: True


In [2]:
path_data = '../data/merged'
path_labels = '../data/merged/dataset_labels.csv'
df = pd.read_csv(path_labels)

In [3]:
df

Unnamed: 0,filename,gender,race,age_group
0,face0_0_0_8.jpg,0,0,8
1,face100000_0_2_1.jpg,0,2,1
2,face100002_1_0_3.jpg,1,0,3
3,face100003_1_4_3.jpg,1,4,3
4,face100004_1_4_1.jpg,1,4,1
...,...,...,...,...
100980,face99998_0_4_1.jpg,0,4,1
100981,face99999_0_4_2.jpg,0,4,2
100982,face999_1_0_2.jpg,1,0,2
100983,face99_0_3_2.jpg,0,3,2


In [4]:
torch.set_float32_matmul_precision('medium')

In [5]:
train_df, remaining_df = train_test_split(df, test_size=0.30, random_state=42, stratify=df[['gender', 'race', 'age_group']])

val_df, test_df = train_test_split(remaining_df, test_size=0.5, random_state=42, stratify=remaining_df[['gender', 'race', 'age_group']])

print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")
print(f"Test set size: {len(test_df)}")

Training set size: 70689
Validation set size: 15148
Test set size: 15148


In [6]:
from sklearn.utils import resample
def downsample_dataframe(df, target_column):
    """
    Downsamples the majority classes in the dataframe to balance the classes in the target column.
    
    Args:
    - df: The pandas DataFrame that contains the data.
    - target_column: The column name of the target variable (e.g., 'gender', 'race', 'age_group').
    
    Returns:
    - A DataFrame with balanced classes.
    """
    min_class_size = df[target_column].value_counts().min()
    
    dfs = [resample(group, replace=False, n_samples=min_class_size, random_state=42) 
           for _, group in df.groupby(target_column)]
    
    balanced_df = pd.concat(dfs)
    
    return balanced_df


In [7]:
df_balanced_gender = downsample_dataframe(train_df, 'gender')
df_balanced_race = downsample_dataframe(train_df, 'race')
df_balanced_age = downsample_dataframe(train_df, 'age_group')

In [8]:
df_balanced = df_balanced_gender.merge(df_balanced_race, on='filename').merge(df_balanced_age, on='filename')

In [9]:
df_balanced

Unnamed: 0,filename,gender_x,race_x,age_group_x,gender_y,race_y,age_group_y,gender,race,age_group
0,face32656_0_1_6.jpg,0,1,6,0,1,6,0,1,6
1,face18308_0_0_6.jpg,0,0,6,0,0,6,0,0,6
2,face71590_0_4_2.jpg,0,4,2,0,4,2,0,4,2
3,face115805_0_0_4.jpg,0,0,4,0,0,4,0,0,4
4,face93031_0_2_0.jpg,0,2,0,0,2,0,0,2,0
...,...,...,...,...,...,...,...,...,...,...
9541,face10216_1_0_0.jpg,1,0,0,1,0,0,1,0,0
9542,face120220_1_2_4.jpg,1,2,4,1,2,4,1,2,4
9543,face116060_1_1_2.jpg,1,1,2,1,1,2,1,1,2
9544,face42544_1_3_8.jpg,1,3,8,1,3,8,1,3,8


In [11]:
class_weights = compute_class_weights(train_df)
print(class_weights)

{'gender': tensor([0.9678, 1.0345]), 'race': tensor([0.8337, 1.3167, 0.8135, 1.3530, 0.9321]), 'age': tensor([4.2410, 1.0074, 1.1154, 0.3686, 0.5241, 0.9648, 1.4404, 3.0197, 5.2119])}


In [12]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

batch_size = 128
accumulate_grad_batches = 32

train_dataset = MultiLabelImageDataset(train_df, path_data, transform=transform)
val_dataset = MultiLabelImageDataset(val_df, path_data, transform=transform)
test_dataset = MultiLabelImageDataset(test_df, path_data, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=1, persistent_workers=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=1, persistent_workers=True)


In [13]:
name = "CNN_EffNet_sigmoid_unbalanced"
logger = TensorBoardLogger("../tb_logs", name=name)
model = EfficientNetMultiOutput(class_weights=class_weights)

checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    filename = f'{name}_b{batch_size}_gacc{accumulate_grad_batches}_' + '{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
    dirpath='../models/',
    verbose=False
)

early_stop_callback = pl.callbacks.EarlyStopping(monitor="val_loss", patience=9, mode="min")

trainer = pl.Trainer(
    max_epochs=99,
    accelerator='gpu',
    devices=[0],
    accumulate_grad_batches=accumulate_grad_batches,
    callbacks=[UnfreezeCallback_EffNet(patience=4),checkpoint_callback, early_stop_callback],
    logger=logger
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [15]:
new_lr = 5e-4

In [16]:
model.hparams.lr = new_lr
hyperparams = {
    'learning_rate': new_lr,
    'batch_size': batch_size,
    'accumulate_grad_batches': accumulate_grad_batches,
    'dropout': 0.5,
    'weight_decay': 1e-5
}
logger.log_hyperparams(hyperparams)

In [None]:
trainer.fit(model, train_loader, val_loader)

C:\Users\MiestoMeska\anaconda3\envs\TC_4\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\MiestoMeska\Desktop\Mokslai\TC\Module4\Sprint_3\Project\vruzga-DL.3.5\models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                     | Type                | Params | Mode 
--------------------------------------------------------------------------
0  | efficientnet             | EfficientNet        | 4.0 M  | train
1  | gender_classifier        | Sequential          | 787 K  | train
2  | race_classifier          | Sequential          | 788 K  | train
3  | age_regression           | Sequential          | 2.0 M  | train
4  | criterion_gender         | CrossEntropyLoss    | 0      | train
5  | criterion_race           | CrossEntropyLoss    | 0      | train
6  | criterion_age_regression | MSELoss             | 0      | train
7  | gender_accuracy          | MulticlassAccuracy  | 0      | train
8  | gender_preci

Sanity Checking: |                                                                               | 0/? [00:00<…

C:\Users\MiestoMeska\anaconda3\envs\TC_4\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
C:\Users\MiestoMeska\anaconda3\envs\TC_4\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Training: |                                                                                      | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Unfreezing blocks 7, 6, and 5 due to plateau in validation loss.
Learning rate changed from 0.0005 to 5e-05
Layers unfrozen: 3, optimizer reinitialized, learning rate decreased.


Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…