In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import os
import copy
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from tqdm import tqdm
from utils.loss_function import SaliencyLoss
from utils.data_process_uni import TrainDataset,ValDataset
from net.models.SUM import SUM
from net.configs.config_setting import setting_config
import sys
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

  from .autonotebook import tqdm as notebook_tqdm
2024-07-29 12:47:31.570935: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-29 12:47:31.591952: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-29 12:47:31.591975: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-29 12:47:31.592522: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-29 12:47:31.5

In [2]:
# Load and preprocess the dataframe
df = pd.read_csv('HandInfo.csv')
age_bins = [0, 21, 22, 23, 24, 31, 76]
labels = np.arange(6)
df['age_category'] = pd.cut(df['age'], bins=age_bins, labels=labels, right=False, include_lowest=True)
df = df[df.accessories == 0]
df['p'] = np.where(df.aspectOfHand.str.startswith('p') == True, 1, 0)
df['r'] = np.where(df.aspectOfHand.str.endswith('right') == True, 1, 0)
df_p_r = df[(df.p == 1) & (df.r == 1)]
df_p_l = df[(df.p == 1) & (df.r == 0)]
df_d_r = df[(df.p == 0) & (df.r == 1)]
df_d_l = df[(df.p == 0) & (df.r == 0)]

In [3]:
# Parameters
image_directory = 'Hands'
batch_size = 20
img_height = 224
img_width = 224
lr = 8e-6
split_size = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = setting_config
model_cfg = config.model_config
# Training and Validation Loop
best_loss = float('inf')
num_epochs = 70
# Early stopping setup
early_stop_counter = 0
early_stop_threshold = 4



In [4]:

# Define the data augmentation and normalization transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)], p=0.5),
        transforms.Resize((img_height, img_width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((img_height, img_width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}


In [5]:
# Split data into training and validation sets
def split_data(df, test_size=split_size):
    train_df, val_df = train_test_split(df, test_size=test_size, stratify=df['id'], random_state=42)
    return train_df, val_df

def encode(col, encoder, val_df, train_df):
    encoder.fit(train_df[col])
    train_df[col] = encoder.transform(train_df[col])
    val_df[col] = encoder.transform(val_df[col])

train_df_p_r, val_df_p_r = split_data(df_p_r)
train_df_p_l, val_df_p_l = split_data(df_p_l)
train_df_d_r, val_df_d_r = split_data(df_d_r)
train_df_d_l, val_df_d_l = split_data(df_d_l)

train_df = pd.concat([train_df_p_r, train_df_d_r, train_df_p_l, train_df_d_l])
val_df = pd.concat([val_df_p_r, val_df_d_r, val_df_p_l, val_df_d_l])
val_dic = {'Total': val_df, 'Palmer Right': val_df_p_r, 'Palmer Left': val_df_p_l, 'Dorsal Right': val_df_d_r, 'Dorsal Left': val_df_d_l}
data_len = train_df.shape[0]

pairs = [('id', LabelEncoder()), ('age_category', LabelEncoder()), ('gender', LabelEncoder())]
for pair in pairs:
    encode(pair[0], pair[1], val_df, train_df)
train_id_one_hot = pd.get_dummies(train_df['id'])
val_id_one_hot = pd.get_dummies(val_df['id'])
train_age_one_hot = pd.get_dummies(train_df['age_category'])
val_age_one_hot = pd.get_dummies(val_df['age_category'])
train_gender_one_hot = pd.get_dummies(train_df['gender'])
val_gender_one_hot = pd.get_dummies(val_df['gender'])
num_classes1 = len(pairs[0][1].classes_)
num_classes2 = len(pairs[1][1].classes_)
num_classes3 = len(pairs[2][1].classes_)


In [6]:
class HandDataset(Dataset):
    def __init__(self, dataframe, labels, image_dir, transform=None):
        self.dataframe = dataframe
        self.labels1, self.labels2, self.labels3 = labels
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx]['imageName'])
        image = Image.open(img_name).convert('RGB')
        label1 = self.labels1.iloc[idx].values.astype(float)
        label2 = self.labels2.iloc[idx].values.astype(float)
        label3 = self.labels3.iloc[idx].values.astype(float)

        if self.transform:
            image = self.transform(image)

        return image, [torch.tensor(label1), torch.tensor(label2), torch.tensor(label3)]

# Create datasets
train_dataset = HandDataset(train_df, [train_id_one_hot, train_age_one_hot, train_gender_one_hot], image_directory, transform=data_transforms['train'])
val_dataset = HandDataset(val_df, [val_id_one_hot, val_age_one_hot, val_gender_one_hot], image_directory, transform=data_transforms['val'])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)


In [7]:
torch.cuda.empty_cache()

In [8]:
# Define the FullyConnectedNetwork
class FullyConnectedNetwork(nn.Module):
    def __init__(self, sum_model, num_classes1, num_classes2, num_classes3, input_features, dropout_rate=0.5):
        super(FullyConnectedNetwork, self).__init__()
        self.sum_model = sum_model
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout_rate)
        # first fully connected layers
        self.fc1_1 = nn.Linear(input_features, 512)
        self.bn1_1 = nn.BatchNorm1d(512)
        self.fc2_1 = nn.Linear(512, 256)
        self.bn2_1 = nn.BatchNorm1d(256)
        self.fc3_1 = nn.Linear(256, num_classes1)
        # second fully connected layers
        self.fc1_2 = nn.Linear(input_features, 512)
        self.bn1_2 = nn.BatchNorm1d(512)
        self.fc2_2 = nn.Linear(512, 256)
        self.bn2_2 = nn.BatchNorm1d(256)
        self.fc3_2 = nn.Linear(256, num_classes2)
        # third fully connected layers
        self.fc1_3 = nn.Linear(input_features, 512)
        self.bn1_3 = nn.BatchNorm1d(512)
        self.fc2_3 = nn.Linear(512, 256)
        self.bn2_3 = nn.BatchNorm1d(256)
        self.fc3_3 = nn.Linear(256, num_classes3)
        
    def forward(self, x):
        x = self.sum_model(x)
        x = self.flatten(x)
        # Forward through the first separate fully connected network
        out1 = torch.relu(self.bn1_1(self.fc1_1(x)))
        out1 = self.dropout(out1)
        out1 = torch.relu(self.bn2_1(self.fc2_1(out1)))
        out1 = self.dropout(out1)
        out1 = self.fc3_1(out1)
        
        # Forward through the second separate fully connected network
        out2 = torch.relu(self.bn1_2(self.fc1_2(x)))
        out2 = self.dropout(out2)
        out2 = torch.relu(self.bn2_2(self.fc2_2(out2)))
        out2 = self.dropout(out2)
        out2 = self.fc3_2(out2)
        
        # Forward through the third separate fully connected network
        out3 = torch.relu(self.bn1_3(self.fc1_3(x)))
        out3 = self.dropout(out3)
        out3 = torch.relu(self.bn2_3(self.fc2_3(out3)))
        out3 = self.dropout(out3)
        out3 = self.fc3_3(out3)
        
        return out1, out2, out3
    # Assuming the SUM class is already defined as per your model
if config.network == 'sum':
    sum_model = SUM(
        num_classes=model_cfg['num_classes'],
        input_channels=model_cfg['input_channels'],
        depths=model_cfg['depths'],
        depths_decoder=model_cfg['depths_decoder'],
        drop_path_rate=model_cfg['drop_path_rate'],
        load_ckpt_path=model_cfg['load_ckpt_path'],
    )
    sum_model.load_from()
    sum_model.cuda()
    # for param in sum_model.parameters():
    #     param.requires_grad = False

# Define the FullyConnectedNetwork with the SUM model as part of it
# Adjust input_features based on the output shape of the SUM model
input_features = 37632  # Example value, adjust based on your model's output shape
model = FullyConnectedNetwork(sum_model, num_classes1, num_classes2, num_classes3, input_features)
model.cuda()

# Define a custom learning rate scheduler
class CustomLRScheduler:
    def __init__(self, optimizer, initial_lr, final_lr, increase_epochs, decay_epoch, decay_lr):
        self.optimizer = optimizer
        self.initial_lr = initial_lr
        self.final_lr = final_lr
        self.increase_epochs = increase_epochs
        self.decay_epoch = decay_epoch
        self.decay_lr = decay_lr
        self.current_epoch = 0

    def step(self):
        if self.current_epoch < self.increase_epochs:
            # Linearly increase the learning rate
            lr = self.initial_lr + (self.final_lr - self.initial_lr) * (self.current_epoch / self.increase_epochs)
        elif self.current_epoch < self.decay_epoch:
            # Keep the learning rate constant
            lr = self.final_lr
        else:
            # Decay the learning rate
            lr = self.decay_lr
        
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
        
        self.current_epoch += 1

# Set up criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Use CrossEntropyLoss with label smoothing
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
scheduler = CustomLRScheduler(optimizer, lr, 8e-4, 10, 30, 4e-4)
# Initialize TensorBoard SummaryWriter
writer = SummaryWriter('runs/experiment_2')

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=25, patience=10, base_model_path="3_head_best.pth"):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                data_loader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                data_loader = val_loader

            running_loss = 0.0
            running_corrects1 = 0
            running_corrects2 = 0
            running_corrects3 = 0

            # Iterate over data
            for batch_idx, (inputs, labels) in enumerate(data_loader):
                inputs = inputs.to(device)
                labels1, labels2, labels3 = labels
                labels1 = labels1.to(device)
                labels2 = labels2.to(device)
                labels3 = labels3.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs1, outputs2, outputs3 = model(inputs)
                    _, preds1 = torch.max(outputs1, 1)
                    _, preds2 = torch.max(outputs2, 1)
                    _, preds3 = torch.max(outputs3, 1)
                    loss1 = criterion(outputs1, labels1.argmax(dim=1))
                    loss2 = criterion(outputs2, labels2.argmax(dim=1))
                    loss3 = criterion(outputs3, labels3.argmax(dim=1))
                    loss = loss1 + loss2 + loss3

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects1 += torch.sum(preds1 == labels1.argmax(dim=1).data)
                running_corrects2 += torch.sum(preds2 == labels2.argmax(dim=1).data)
                running_corrects3 += torch.sum(preds3 == labels3.argmax(dim=1).data)

                # Print loss status after each batch
                if phase == 'train':
                    sys.stdout.write(f'\rBatch {batch_idx}/{len(data_loader) - 1} Loss: {loss.item():.4f}')
                    sys.stdout.flush()

            epoch_loss = running_loss / len(data_loader.dataset)
            epoch_acc1 = running_corrects1.double() / len(data_loader.dataset)
            epoch_acc2 = running_corrects2.double() / len(data_loader.dataset)
            epoch_acc3 = running_corrects3.double() / len(data_loader.dataset)

            # Log to TensorBoard
            writer.add_scalar(f'{phase}/Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase}/Accuracy1', epoch_acc1, epoch)
            writer.add_scalar(f'{phase}/Accuracy2', epoch_acc2, epoch)
            writer.add_scalar(f'{phase}/Accuracy3', epoch_acc3, epoch)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc1: {epoch_acc1:.4f} Acc2: {epoch_acc2:.4f} Acc3: {epoch_acc3:.4f}')

            # Deep copy the model
            if phase == 'val':
                scheduler.step(epoch_loss)
                avg_acc = (epoch_acc1 + epoch_acc2 + epoch_acc3) / 3
                if avg_acc > best_acc:
                    print(f'Current best is in epoch {epoch}.')
                    best_acc = avg_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    epochs_no_improve = 0
                    torch.save(model.state_dict(), base_model_path)  # Save the base model
                else:
                    epochs_no_improve += 1

                if epochs_no_improve >= patience:
                    print(f'Early stopping at epoch {epoch}')
                    model.load_state_dict(best_model_wts)
                    writer.close()
                    return model

        print()

    print(f'Best val Acc: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    writer.close()
    return model

In [9]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Train the model
model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=num_epochs, patience=10)

Epoch 0/9
----------


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacty of 5.67 GiB of which 3.56 MiB is free. Including non-PyTorch memory, this process has 5.65 GiB memory in use. Of the allocated memory 5.40 GiB is allocated by PyTorch, and 140.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF