In [1]:
from clearml import Task, Logger

Task.set_credentials(
     api_host="https://api.clear.ml",
     web_host="https://app.clear.ml",
     files_host="https://files.clear.ml",
     key='LDZNDSTURBWF24BBSRTI',
     secret='XiGzvdre6QslqIEmzmayua3zukG4M4nSUJyH3gvW3Iw4C9GVJO'
)

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from time import time
import os
from PIL import Image
from tempfile import TemporaryDirectory

In [3]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((244,244)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.Resize((244,244)),
        transforms.ToTensor(),
    ]),
}

data_dir = '../data/HAR_2'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=8,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
import multiprocessing as mp
from time import time

for num_workers in range(0, mp.cpu_count(), 2):
    train_loader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=8,
                                             shuffle=True, num_workers=num_workers, pin_memory=True)
    start = time()
    for epoch in range(1, 3):
        for i, data in enumerate(train_loader, 0):
            pass
    end = time()
    print("Finish with:{} second, num_workers={}".format(end - start, num_workers))

Finish with:47.84133434295654 second, num_workers=0


KeyboardInterrupt: 

In [4]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, history=0):
    since = time()


    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0
                running_corrects = 0


                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)


                    optimizer.zero_grad()



                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)


                        if phase == 'train':
                            loss.backward()
                            optimizer.step()


                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                epoch_acc = running_corrects.double() / dataset_sizes[phase]
                epoch_loss = running_loss / dataset_sizes[phase]

                if phase == 'train':
                    scheduler.step()

                    Logger.current_logger().report_scalar(
                        "train", "loss", iteration=history+epoch, value=epoch_loss
                    )
                    Logger.current_logger().report_scalar(
                        "train", "accuracy", iteration=history+epoch, value=epoch_acc
                    )

                elif phase == 'val':
                    Logger.current_logger().report_scalar(
                        "test", "loss", iteration=history+epoch, value=epoch_loss
                    )
                    Logger.current_logger().report_scalar(
                        "test", "accuracy", iteration=history+epoch, value=epoch_acc
                    )

                    if epoch_acc > best_acc:
                        best_acc = epoch_acc
                        Logger.current_logger().report_single_value("best accuracy", best_acc)

                        torch.save(model.state_dict(), best_model_params_path)

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')



            print()

        time_elapsed = time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')


        model.load_state_dict(torch.load(best_model_params_path))
    return model

In [5]:
task = Task.init(project_name="AI-DJ", task_name="Vgg11 Training", tags=["HAR"])

args = dict(
    model = "Vgg11",
    loss = "Cross Entropy",
    num_epochs = 10,
    optimizer = "SGD",
    scheduler = "exp_lr_scheduler_5_0.9",
    dataset = "resize v2",
    batch = 8,
    lr=0.005
)

task.connect(args)

ClearML Task: created new task id=cb31a6afd19a43269fa6ec92cf34b41a
2023-11-03 17:28:33,438 - clearml.Task - INFO - Storing jupyter notebook directly as code
ClearML results page: https://app.clear.ml/projects/b3648b77d87c4a8e95218e2aa2d8fe86/experiments/cb31a6afd19a43269fa6ec92cf34b41a/output/log


{'model': 'Vgg11',
 'loss': 'Cross Entropy',
 'num_epochs': 10,
 'optimizer': 'SGD',
 'scheduler': 'exp_lr_scheduler_5_0.9',
 'dataset': 'resize v2',
 'batch': 8,
 'lr': 0.005}

In [10]:
# model = models.vgg11(num_classes=5)
# model = model.to(device)

# criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(model.parameters(), lr=0.003)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=15, gamma=0.9)


In [11]:
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=30, history=20)

Epoch 0/29
----------
train Loss: 0.9879 Acc: 0.6062
val Loss: 1.0921 Acc: 0.5635

Epoch 1/29
----------
train Loss: 0.9734 Acc: 0.6144
val Loss: 1.0098 Acc: 0.5940

Epoch 2/29
----------
train Loss: 0.9504 Acc: 0.6215
val Loss: 1.0629 Acc: 0.5745

Epoch 3/29
----------
train Loss: 0.9302 Acc: 0.6357
val Loss: 1.0321 Acc: 0.5963

Epoch 4/29
----------
train Loss: 0.9043 Acc: 0.6456
val Loss: 1.1005 Acc: 0.5687

Epoch 5/29
----------
train Loss: 0.8775 Acc: 0.6535
val Loss: 1.0415 Acc: 0.6009

Epoch 6/29
----------
train Loss: 0.8611 Acc: 0.6640
val Loss: 1.0032 Acc: 0.6032

Epoch 7/29
----------
train Loss: 0.8400 Acc: 0.6730
val Loss: 0.9861 Acc: 0.6101

Epoch 8/29
----------
train Loss: 0.8065 Acc: 0.6911
val Loss: 0.9793 Acc: 0.6176

Epoch 9/29
----------
train Loss: 0.7791 Acc: 0.7039
val Loss: 0.9595 Acc: 0.6245

Epoch 10/29
----------
train Loss: 0.7462 Acc: 0.7191
val Loss: 0.9885 Acc: 0.6314

Epoch 11/29
----------
train Loss: 0.7258 Acc: 0.7258
val Loss: 0.9824 Acc: 0.6279

Ep

In [12]:
task.close()

In [13]:
torch.save(model.state_dict(), "vgg11/model_50.pt")
