In [12]:
import torch
from tqdm import tqdm
from models.cnn import CNN
from data.dataset import SkinDataset, NonSkinDataset
from settings import *
from utils.data_augmentation import *
from torch.utils.data import DataLoader, ConcatDataset
import torch.nn as nn
import torch.optim as optim
from utils.evaluation import evaluate_model, update_csv
from utils.evaluation import calculate_metrics

import numpy as np

In [13]:
model_name = 'modelb32e1_more_data_arch.pth'

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [15]:
skin_file_paths = collect_file_paths(os.path.join(DATA_DIR, "SKIN"))
not_skin_file_paths = collect_file_paths(os.path.join(DATA_DIR, "NS"))

In [16]:
torch.manual_seed(42)

skin_indices = torch.randperm(len(skin_file_paths))
skin_file_paths = [skin_file_paths[i] for i in skin_indices]

not_skin_indices = torch.randperm(len(not_skin_file_paths))
not_skin_file_paths = [not_skin_file_paths[i] for i in not_skin_indices]

In [17]:
train_skin_file_paths = skin_file_paths[:NUM_TRAIN_SKIN]
train_not_skin_file_paths = not_skin_file_paths[:NUM_TRAIN_NOT_SKIN]

test_skin_file_paths = skin_file_paths[NUM_TRAIN_SKIN:NUM_TRAIN_SKIN + NUM_TEST_SKIN]
test_not_skin_file_paths = not_skin_file_paths[NUM_TRAIN_NOT_SKIN:
                                               NUM_TRAIN_NOT_SKIN + NUM_TEST_NOT_SKIN]

val_skin_file_paths = skin_file_paths[NUM_TRAIN_SKIN + NUM_TEST_SKIN:]
val_not_skin_file_paths = not_skin_file_paths[NUM_TRAIN_NOT_SKIN +
                                              NUM_TEST_NOT_SKIN:]

In [18]:
train_skin_dataset = SkinDataset(train_skin_file_paths)
train_not_skin_dataset = NonSkinDataset(train_not_skin_file_paths)

test_skin_dataset = SkinDataset(test_skin_file_paths)
test_not_skin_dataset = NonSkinDataset(test_not_skin_file_paths)

val_skin_dataset = SkinDataset(val_skin_file_paths)
val_not_skin_dataset = NonSkinDataset(val_not_skin_file_paths)

In [19]:
train_dataset = ConcatDataset([train_skin_dataset, train_not_skin_dataset])
test_dataset = ConcatDataset([test_skin_dataset, test_not_skin_dataset])
val_dataset = ConcatDataset([val_skin_dataset, val_not_skin_dataset])

In [20]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [21]:
def train_cnn(train_loader, val_loader, num_epochs=NUM_EPOCHS, save_dir="models", save_path=model_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN().to(device)
    criterion = nn.BCELoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.001)
    
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, save_path)
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        all_outputs = []
        all_targets = []

        # Create a tqdm progress bar for training
        with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit='batch') as pbar:
            for _, (inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                targets = targets.float().view(-1, 1)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
                
                all_outputs.append(outputs.detach().cpu().numpy())
                all_targets.append(targets.cpu().numpy())

                pbar.set_postfix(loss=loss.item())
                pbar.update(1)

        avg_loss = total_loss / len(train_loader)
        all_outputs = np.concatenate(all_outputs, axis=0)
        all_targets = np.concatenate(all_targets, axis=0)
        accuracy, precision, recall, f1, mcc, iou = calculate_metrics(all_outputs, all_targets)
        print(f"Epoch [{epoch+1}/{num_epochs}] - Average Training Loss: {avg_loss:.4f}")
        print(f"Training Accuracy: {accuracy:.4f}")
        print(f"Training Precision: {precision:.4f}")
        print(f"Training Recall: {recall:.4f}")
        print(f"Training F1 Score: {f1:.4f}")
        print(f"Training MCC: {mcc:.4f}")
        print(f"Training IoU: {iou:.4f}")
    
        # Evaluate on validation/test set
        model.eval()
        avg_val_loss, val_accuracy, val_precision, val_recall, val_f1, val_mcc, val_iou = evaluate_model(
            model, f"model:{model_name}, Validation/Test:", val_loader, criterion, device
        )
        print(f"Epoch [{epoch+1}/{num_epochs}] - Validation Loss: {avg_val_loss:.4f}")
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        print(f"Validation Precision: {val_precision:.4f}")
        print(f"Validation Recall: {val_recall:.4f}")
        print(f"Validation F1 Score: {val_f1:.4f}")
        print(f"Validation MCC: {val_mcc:.4f}")
        print(f"Validation IoU: {val_iou:.4f}")

        # Save the model if it has the best validation loss so far
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), save_path)
            if epoch >= 1:
                update_csv(model_name, avg_val_loss, val_accuracy, val_precision, val_recall, val_f1, val_mcc, val_iou)
            print(f"Model saved to {save_path}, epoch no {epoch + 1}")

    print("Training complete!")

In [None]:
train_cnn(train_loader=train_loader, val_loader=val_loader, num_epochs=NUM_EPOCHS)

Epoch 1/1:  40%|███▉      | 1620/4094 [34:41<29:10,  1.41batch/s, loss=0.232]   

In [None]:
# model = CNN()
# model.load_state_dict(torch.load(f'models/{model_name}'))
# model.eval()
# 
# evaluate_model(model, f'models/{model_name} - Test', test_loader, nn.BCELoss(), device)