In [None]:
import numpy as np
import torch
from torch import nn, optim
from tqdm.autonotebook import tqdm
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, RandomSampler
from torch.utils.tensorboard import SummaryWriter
import argparse

from model_classification import *
from data_classification import create_dataset

In [None]:
import os
import shutil
import random
from tqdm import tqdm

# Define the paths
source_dir = "/path/to/images"
train_dir = "/path/to/train"
val_dir = "/path/to/val"

# Create train and val directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Function to copy files while preserving directory structure with progress bar
def copy_files(source, destination, file_list):
    for file in tqdm(file_list, desc="Copying files"):
        src_file = os.path.join(source, file)
        dst_file = os.path.join(destination, file)
        shutil.copy(src_file, dst_file)

# Iterate through the subdirectories in the source directory
for subdirectory in os.listdir(source_dir):
    source_subdir = os.path.join(source_dir, subdirectory)
    train_subdir = os.path.join(train_dir, subdirectory)
    val_subdir = os.path.join(val_dir, subdirectory)

    # Create train and val directories for each subdirectory
    os.makedirs(train_subdir, exist_ok=True)
    os.makedirs(val_subdir, exist_ok=True)

    # Get the list of files in the current subdirectory
    file_list = os.listdir(source_subdir)
    num_files = len(file_list)
    num_train = int(num_files * 0.8)

    # Randomly shuffle the file list
    random.shuffle(file_list)

    # Copy files to train directory
    copy_files(source_subdir, train_subdir, file_list[:num_train])

    # Copy files to val directory
    copy_files(source_subdir, val_subdir, file_list[num_train:])


In [None]:
import os
import glob
train_acc = []
train_loss = []
val_acc = []
val_loss = []

def train_model(model, epochs, opt, loss, batch_size, checkpoint_dir, checkpoint_interval):
    # create dataset
    data_train = create_dataset(
        datadir='/path/to/train',
        balance='upsample', mult=1)

    data_val = create_dataset(
        datadir='/path/to/val',
        # path to val data
        balance='upsample', mult=1)

    # draw random subsamples
    train_sampler = RandomSampler(data_train, replacement=True,
                                  num_samples=int(2*len(data_train)/3))
    val_sampler = RandomSampler(data_val, replacement=True,
                                  num_samples=int(2*len(data_val)/3))

    # initialize data loaders
    train_dl = DataLoader(data_train, batch_size=batch_size, num_workers=2,
                          pin_memory=True, sampler=train_sampler)
    val_dl = DataLoader(data_val, batch_size=batch_size, num_workers=2,
                         pin_memory=True, sampler=val_sampler)

    start_epoch = 1
    # start training process
    for epoch in range(start_epoch, epochs):

        model.train()

        train_loss_total, train_acc_total = 0, 0
        progress = tqdm(enumerate(train_dl), desc="Train Loss: ",
                        total=len(train_dl))
        for i, batch in progress:
            x = batch['img'].float().to(device)
            y = batch['lbl'].float().to(device)

            output = model(x)

            # derive binary output
            output_binary = torch.zeros_like(output)
            output_binary[output >= 0] = 1

            # derive accuracy score
            acc = accuracy_score(y.cpu().detach().numpy(), output_binary.cpu().detach().numpy())
            train_acc_total += acc

            # calculate loss
            loss_epoch = loss(output, y.reshape(-1, 1))
            train_loss_total += loss_epoch.item()
            progress.set_description("Train Loss: {:.4f}".format(
                train_loss_total/(i+1)))

            # learning
            opt.zero_grad()
            loss_epoch.backward()
            opt.step()

        # logging
        writer.add_scalar("training loss", train_loss_total/(i+1), global_step=epoch)
        writer.add_scalar("training acc", train_acc_total/(i+1), global_step=epoch)
        train_acc.append(train_acc_total/(i+1))
        train_loss.append(train_loss_total/(i+1))
        writer.add_scalar('learning_rate', opt.param_groups[0]['lr'], global_step=epoch)

        torch.cuda.empty_cache()

        # evaluation based on validation sample
        model.eval()
        val_loss_total, val_acc_total = 0, 0
        progress = tqdm(enumerate(val_dl), desc="val Loss: ",
                        total=len(val_dl))
        for j, batch in progress:
            x, y = batch['img'].float().to(device), batch['lbl'].float().to(device)

            output = model(x)

            # calculate loss
            loss_epoch = loss(output, y.reshape(-1, 1))
            val_loss_total += loss_epoch.item()
            progress.set_description("val Loss: {:.4f}".format(
                val_loss_total/(j+1)))

            # derive binary output
            output_binary = torch.zeros_like(output)
            output_binary[output >= 0] = 1

            # derive accuracy score
            acc = accuracy_score(y.cpu().detach().numpy(), output_binary.cpu().detach().numpy())
            val_acc_total += acc

        # logging
        writer.add_scalar("val loss", val_loss_total/(j+1), global_step=epoch)
        writer.add_scalar("val accuracy", val_acc_total/(j+1), global_step=epoch)
        val_acc.append(val_acc_total/(j+1))
        val_loss.append(val_loss_total/(j+1))

        # screen output
        print(("Epoch {:d}: train loss={:.3f}, val loss={:.3f}, "
               "train acc={:.3f}, val acc={:.3f}").format(
                   epoch+1, train_loss_total/(i+1), val_loss_total/(j+1),
                   train_acc_total/(i+1), val_acc_total/(j+1)))

        writer.flush()
        scheduler.step(val_loss_total/(j+1))
        torch.cuda.empty_cache()

    return model


# setup argument parser
parser = argparse.ArgumentParser(allow_abbrev=True)
parser.add_argument('-ep', type=int, default=10, help='Number of epochs')
parser.add_argument('-bs', type=int, nargs='?', default=128, help='Batch size')
parser.add_argument('-lr', type=float, nargs='?', default=0.3, help='Learning rate')
parser.add_argument('-mo', type=float, nargs='?', default=0.7, help='Momentum')
args = parser.parse_args(args=[])

# initialize tensorboard writer
writer = SummaryWriter('runs/'+"ep{}_lr{:.0e}_bs{:03d}_mo{:.1f}/".format(
    args.ep, args.lr, args.bs, args.mo))

# initialize loss, optimizer, and scheduler
loss = nn.BCEWithLogitsLoss()
opt = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mo)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, 'min',
                                                 factor=0.5, threshold=1e-4,
                                                 min_lr=1e-6)

# Create the checkpoint directory if it doesn't exist
checkpoint_dir = '/path/to/checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

# run model training
checkpoint_interval = 5
train_model(model, args.ep, opt, loss, args.bs, checkpoint_dir, checkpoint_interval)

writer.close()