In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import os
import copy
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from tqdm import tqdm
from utils.loss_function import SaliencyLoss
from utils.data_process_uni import TrainDataset,ValDataset
from net.models.SUM import SUM
from net.configs.config_setting import setting_config
import sys
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load and preprocess the dataframe
df = pd.read_csv('HandInfo.csv')
df = df[df.accessories == 0]
df['p'] = np.where(df.aspectOfHand.str.startswith('p') == True, 1, 0)
df['r'] = np.where(df.aspectOfHand.str.endswith('right') == True, 1, 0)

df_p_r = df[(df.p == 1) & (df.r == 1)]
df_p_l = df[(df.p == 1) & (df.r == 0)]
df_d_r = df[(df.p == 0) & (df.r == 1)]
df_d_l = df[(df.p == 0) & (df.r == 0)]

In [3]:
# Parameters
image_directory = 'Hands'
batch_size = 32
img_height = 224
img_width = 224
lr = 8e-4
split_size = 0.2
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
config = setting_config
model_cfg = config.model_config
# Training and Validation Loop
best_loss = float('inf')
num_epochs = 10
# Early stopping setup
early_stop_counter = 0
early_stop_threshold = 4


In [4]:

# Define the data augmentation and normalization transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)], p=0.5),
        transforms.Resize((img_height, img_width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((img_height, img_width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}


In [5]:
# Split data into training and validation sets
def split_data(df, test_size=split_size):
    train_df, val_df = train_test_split(df, test_size=test_size, stratify=df['id'], random_state=42)
    return train_df, val_df

train_df_p_r, val_df_p_r = split_data(df_p_r)
train_df_p_l, val_df_p_l = split_data(df_p_l)
train_df_d_r, val_df_d_r = split_data(df_d_r)
train_df_d_l, val_df_d_l = split_data(df_d_l)

train_df = pd.concat([train_df_p_r, train_df_d_r, train_df_p_l, train_df_d_l])
val_df = pd.concat([val_df_p_r, val_df_d_r, val_df_p_l, val_df_d_l])
data_len = train_df.shape[0]

common_ids = set(train_df['id']).intersection(val_df['id'])
train_df = train_df[train_df['id'].isin(common_ids)]
val_df = val_df[val_df['id'].isin(common_ids)]

# Encode labels
label_encoder = LabelEncoder()
label_encoder.fit(train_df['id'])

train_df['id'] = label_encoder.transform(train_df['id'])
val_df['id'] = label_encoder.transform(val_df['id'])

train_labels_one_hot = pd.get_dummies(train_df['id'])
val_labels_one_hot = pd.get_dummies(val_df['id'])

num_classes = len(label_encoder.classes_)

In [6]:
class HandDataset(Dataset):
    def __init__(self, dataframe, labels, image_dir, transform=None):
        self.dataframe = dataframe
        self.labels = labels
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx]['imageName'])
        image = Image.open(img_name).convert('RGB')
        label = self.labels.iloc[idx].values.astype(float)

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label)

# Create datasets
train_dataset = HandDataset(train_df, train_labels_one_hot, image_directory, transform=data_transforms['train'])
val_dataset = HandDataset(val_df, val_labels_one_hot, image_directory, transform=data_transforms['val'])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [7]:
torch.cuda.empty_cache()

In [8]:
# Define the FullyConnectedNetwork
class FullyConnectedNetwork(nn.Module):
    def __init__(self, sum_model, num_classes, input_features, dropout_rate=0.5):
        super(FullyConnectedNetwork, self).__init__()
        self.sum_model = sum_model
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_features, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        
    def forward(self, x):
        x = self.sum_model(x)
        x = self.flatten(x)
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Assuming the SUM class is already defined as per your model
if config.network == 'sum':
    sum_model = SUM(
        num_classes=model_cfg['num_classes'],
        input_channels=model_cfg['input_channels'],
        depths=model_cfg['depths'],
        depths_decoder=model_cfg['depths_decoder'],
        drop_path_rate=model_cfg['drop_path_rate'],
        load_ckpt_path=model_cfg['load_ckpt_path'],
    )
    sum_model.load_from()
    sum_model.cuda()
    for param in sum_model.parameters():
        param.requires_grad = False

# Define the FullyConnectedNetwork with the SUM model as part of it
# Adjust input_features based on the output shape of the SUM model
input_features = 37632  # Example value, adjust based on your model's output shape
num_classes = len(label_encoder.classes_)  # Ensure num_classes is defined
model = FullyConnectedNetwork(sum_model, num_classes, input_features)
model.cuda(1)

# Set up criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Use CrossEntropyLoss with label smoothing
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

# Initialize TensorBoard SummaryWriter
writer = SummaryWriter('runs/experiment_2')

# Training function‍
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=25, patience=10, base_model_path="model.pth"):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                data_loader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                data_loader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for batch_idx, (inputs, labels) in enumerate(data_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels.argmax(dim=1))

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.argmax(dim=1).data)

                # Print loss status after each batch
                if phase == 'train':
                    sys.stdout.write(f'\rBatch {batch_idx}/{len(data_loader) - 1} Loss: {loss.item():.4f}')
                    sys.stdout.flush()

            epoch_loss = running_loss / len(data_loader.dataset)
            epoch_acc = running_corrects.double() / len(data_loader.dataset)

            # Log to TensorBoard
            writer.add_scalar(f'{phase}/Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase}/Accuracy', epoch_acc, epoch)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model
            if phase == 'val':
                scheduler.step(epoch_loss)
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    epochs_no_improve = 0
                    torch.save(model.state_dict(), base_model_path)  # Save the base model
                else:
                    epochs_no_improve += 1

                if epochs_no_improve >= patience:
                    print(f'Early stopping at epoch {epoch}')
                    model.load_state_dict(best_model_wts)
                    writer.close()
                    return model

        print()

    print(f'Best val Acc: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    writer.close()
    return model


In [9]:
# Set device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
# Train the model
model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=num_epochs, patience=10)

Epoch 0/9
----------
Batch 49/49 Loss: 4.5327train Loss: 4.8117 Acc: 0.0672
val Loss: 3.9527 Acc: 0.2078

Epoch 1/9
----------
Batch 49/49 Loss: 3.8745train Loss: 4.0586 Acc: 0.1839
val Loss: 3.2025 Acc: 0.4123

Epoch 2/9
----------
Batch 49/49 Loss: 3.3416train Loss: 3.4335 Acc: 0.3147
val Loss: 2.5879 Acc: 0.5832

Epoch 3/9
----------
Batch 49/49 Loss: 2.7555train Loss: 2.9335 Acc: 0.4464
val Loss: 2.1131 Acc: 0.7141

Epoch 4/9
----------
Batch 49/49 Loss: 2.5297train Loss: 2.5896 Acc: 0.5300
val Loss: 1.8322 Acc: 0.7891

Epoch 5/9
----------
Batch 49/49 Loss: 2.7494train Loss: 2.3023 Acc: 0.6222
val Loss: 1.6152 Acc: 0.8647

Epoch 6/9
----------
Batch 49/49 Loss: 2.0175train Loss: 2.1247 Acc: 0.6822
val Loss: 1.4886 Acc: 0.8952

Epoch 7/9
----------
Batch 49/49 Loss: 2.2674train Loss: 1.9810 Acc: 0.7279
val Loss: 1.4000 Acc: 0.9288

Epoch 8/9
----------
Batch 49/49 Loss: 1.8302train Loss: 1.8821 Acc: 0.7550
val Loss: 1.3386 Acc: 0.9377

Epoch 9/9
----------
Batch 49/49 Loss: 2.4566t

In [None]:
model.load_state_dict(torch.load("best_model.pth"))

for name, param in sum_model.named_parameters():
    param.requires_grad = True
    
num_epochs = 15
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1, end_factor=0.95, total_iters=num_epochs)
# Fine-tune the entire model
model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=num_epochs, patience=10, base_model_path = 'besti.pth')

Epoch 0/14
----------
Batch 196/196 Loss: 1.5065train Loss: 1.9949 Acc: 0.7058
val Loss: 1.2943 Acc: 0.9454





Epoch 1/14
----------
Batch 196/196 Loss: 1.6103train Loss: 1.4719 Acc: 0.8856
val Loss: 1.1393 Acc: 0.9835

Epoch 2/14
----------
Batch 196/196 Loss: 1.2699train Loss: 1.3123 Acc: 0.9429
val Loss: 1.0931 Acc: 0.9917

Epoch 3/14
----------
Batch 196/196 Loss: 1.3738train Loss: 1.2305 Acc: 0.9673
val Loss: 1.0656 Acc: 0.9911

Epoch 4/14
----------
Batch 196/196 Loss: 1.1671train Loss: 1.1881 Acc: 0.9722
val Loss: 1.0386 Acc: 0.9956

Epoch 5/14
----------
Batch 150/196 Loss: 1.1899

In [None]:
model.load_state_dict(torch.load("new_best.pth"))