# Tuning Self-supervised Contrastive Learning

### Table of Content
1. [Dependencies](#import-and-set-up-dependencies)
    - [Tune Config](#tune-settings)
    - [Base Model Config](#base-model-configuration)
2. [Data](#prepare-datasets)
3. [Tuning Loop](#set-up-tuning-loop)
4. [Tune](#tune)
5. [Logging](#log-results)
6. [Test](#testing-using-tuned-model)

## Import and Set Up Dependencies

In [1]:
#   Setup
##  Standard packages
import os
import sys
import time
import math
import logging
import numpy as np
from sklearn.linear_model import LogisticRegression

##  Torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as vtransforms
import torch.backends.cudnn as cudnn
from torch.utils.data import Dataset, random_split
from torch.utils.data.sampler import WeightedRandomSampler
from torch import optim

##  SRC dataset, loader, and metrics
import src.data.dataset as ds
import src.data.dataloader as dl
import src.models as mdl
import src.utils.metric as customMetric
from src.utils.metric import calc_score

##  Self-supervised Contrastive Learning
from src.train import train_supcon, valid_supcon
from src.utils.supcontrast import TwoCropTransform, AverageMeter, SupConLoss
from src.utils.supcontrast import adjust_learning_rate, warmup_learning_rate
from src.utils.supcontrast import set_optimizer, save_model
from supCon import set_model
from src.test import test_supcon

##  Tuning Packages
import ray
from ray import tune
from ray.air import session
from ray.air.checkpoint import Checkpoint
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.search.hyperopt import HyperOptSearch

  from .autonotebook import tqdm as notebook_tqdm


Using cuda on NVIDIA GeForce GTX 1650 with Max-Q Design :D 


In [2]:
# -------------------- Globals --------------------#
# Device Config
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {DEVICE} on {torch.cuda.get_device_name(0)} :D ")

# Dataset Config
TASK_IN = "Task_11"
MAX_LENGTH = 3.0
SR = 8000
HOP_LENGTH = 128
MAX_LENGTH_SAMPLES = int(MAX_LENGTH * SR / HOP_LENGTH)
INPUT_X_DIM = int(MAX_LENGTH * SR / HOP_LENGTH)
N_F_BIN = 64
N_FFT = 512
FEATURE = "mfcc"

# DataLoader Config
VAL_PERCENT = 0.2
BATCH_SIZE = 32

# Log Config
formatter = logging.Formatter("%(asctime)s:%(levelname)s:%(name)s:%(message)s")

Using cuda on NVIDIA GeForce GTX 1650 with Max-Q Design :D 


#### Tune Settings

In [3]:
TUNE_STRAT = ["score", "max"]
BASE_CONFIG = {}
TUNE_CONFIG = {
    "temperature": tune.uniform(0.1, 0.9),
    "lr": tune.loguniform(1e-4, 1e-2),
    
}
TUNE_MAX_EPOCH = 20
TUNE_GPU_PER_TRIAL = 1
TUNE_CPU_PER_TRIAL = 8
TUNE_SAMPLE_NUM = 10

### Base Model Configuration

In [4]:
# -------------------- Define customized Argparse --------------------#
class modelSetting:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

    def print_args(self):
        argparse_dict = vars(self)
        for key, value in argparse_dict.items():
            print(f"{key}: {value}")

opt = modelSetting(
    # Dataset Config
    task_in = "Task_11", data_path = "SPRSound/", 
    batch_size = BATCH_SIZE, val_percent = 0.2,
    
    # Model Config
    model = "resnet18", embedding_size = 128, 
    head = "linear", ckpt = "best.pth", 

    # Train Config
    print_freq = 50, save_freq = 50, epochs = 50,

    # Optim Config
    optimizer = "SGD",
    learning_rate = 0.001, momentum = 0.9,
    lr_decay_rate = 0.1, lr_decay_epochs = "70,80,90",
    weight_decay = 1e-4, dropout = 0.25,

    # SupCon Config
    temperature = 0.5, method = "SupCon",

    # Other Config
    cosine = True, warm = False, verbose = False,
)

iterations = opt.lr_decay_epochs.split(",")
opt.lr_decay_epochs = list([])
for it in iterations:
    opt.lr_decay_epochs.append(int(it))

# warm-up for large-batch training,
if opt.batch_size > 256:
    opt.warm = True
if opt.warm:
    opt.warmup_from = 0.01
    opt.warm_epochs = 10
    if opt.cosine:
        eta_min = opt.learning_rate * (opt.lr_decay_rate ** 3)
        opt.warmup_to = eta_min + (opt.learning_rate - eta_min) * (
                1 + math.cos(math.pi * opt.warm_epochs / opt.epochs)) / 2
    else:
        opt.warmup_to = opt.learning_rate

# set the path according to the environment
opt.model_path = "./temp/SupCon-Notes/{}_models".format(opt.task_in)
opt.model_name = "{}_{}{}_{}{}_hop{}_{}_lr{}_temp{}_drop{}_val{}".format(
    opt.model, 
    FEATURE, 
    N_F_BIN, 
    opt.head,
    opt.embedding_size,
    HOP_LENGTH, 
    opt.optimizer,
    opt.learning_rate,
    opt.temperature,
    opt.dropout,
    opt.val_percent,
)
opt.save_folder = os.path.join(opt.model_path, opt.model_name)
if not os.path.isdir(opt.save_folder):
    os.makedirs(opt.save_folder)

opt.print_args()

task_in: Task_11
data_path: SPRSound/
batch_size: 32
val_percent: 0.2
model: resnet18
embedding_size: 128
head: linear
ckpt: best.pth
print_freq: 50
save_freq: 50
epochs: 50
optimizer: SGD
learning_rate: 0.001
momentum: 0.9
lr_decay_rate: 0.1
lr_decay_epochs: [70, 80, 90]
weight_decay: 0.0001
dropout: 0.25
temperature: 0.5
method: SupCon
cosine: True
warm: False
verbose: False
model_path: ./temp/SupCon-Notes/Task_11_models
model_name: resnet18_mfcc64_linear128_hop128_SGD_lr0.001_temp0.5_drop0.25_val0.2
save_folder: ./temp/SupCon-Notes/Task_11_models\resnet18_mfcc64_linear128_hop128_SGD_lr0.001_temp0.5_drop0.25_val0.2


In [5]:
# -------------------- User-defined functions --------------------#
def setupLogger(name, logPath, level=logging.INFO):
    handler = logging.FileHandler(logPath)
    handler.setFormatter(formatter)
    logger = logging.getLogger(name)
    logger.setLevel(level)
    logger.addHandler(handler)
    return logger

## Set Globals
log_path = "logs/hyperTune.logs"
if not os.path.exists(log_path):
    open(log_path, "a").close()
logger = setupLogger("TuneSupConLogger", log_path)
main_task = int(TASK_IN[-2])
sub_task = int(TASK_IN[-1])
data_path = "SPRSound"
num_classes = len(dl.classes[TASK_IN])

In [6]:
data_dict={
    "train":[
        os.path.join(data_path, "train_wav"), 
        os.path.join(data_path, "train_json")
    ],
    "intra_test":[
        os.path.join(data_path, "test_wav"), 
        os.path.join(data_path, "test_json/intra_test_json")
    ],
    "inter_test":[
        os.path.join(data_path, "test_wav"),
        os.path.join(data_path, "test_json/inter_test_json")
    ],
}

[Home](#table-of-content)

## Prepare Datasets

In [7]:
## Preapare transformation and generate dataset
train_transform = vtransforms.Compose([
    vtransforms.RandomHorizontalFlip(),
    vtransforms.RandomCrop(size=(N_F_BIN, MAX_LENGTH_SAMPLES), padding=0, pad_if_needed=True),
])

trainDataset, intra_testDataset, inter_testDataset = ds.genDatasets(
    task=main_task, 
    data_dict=data_dict,
    resample=None,
    feature=FEATURE,
    pre_emph=False,
    pos_norm="zscore",
    n_mfcc=N_F_BIN,
    hop_length=HOP_LENGTH,
    n_fft=N_FFT,
)

In [8]:
supcon_loader = dl.trainValLoader(
    trainDataset,
    sub_task,
    valid_size=VAL_PERCENT,
    batch_size=BATCH_SIZE,
    collate_fn=lambda batch: dl.supcon_collate(
        batch, TASK_IN, sub_task, transform=TwoCropTransform(train_transform)
    ),
    train_sampler="balanced",
    val_sampler="balanced",
)

print("\n\nGenerating Dataloader for Train Dataset...")
dataloader = dl.trainValLoader(
    trainDataset,
    sub_task,
    valid_size=VAL_PERCENT,
    batch_size=BATCH_SIZE,
    collate_fn=lambda batch: dl.custom_collate(
        batch, MAX_LENGTH_SAMPLES, TASK_IN, sub_task
    ),
    train_sampler="balanced",
    val_sampler="balanced",
)

print("\n\nGenerating Dataloader for Intra Dataset...")
intra_testloader = dl.testLoader(
    intra_testDataset,
    batch_size=BATCH_SIZE,
    collate_fn=lambda batch: dl.custom_collate(
        batch, MAX_LENGTH_SAMPLES, TASK_IN, sub_task
    ),
    shuffle_in=False,
)

print("\nGenerating Dataloader for Inter Dataset...")
inter_testloader = dl.testLoader(
    inter_testDataset,
    batch_size=BATCH_SIZE,
    collate_fn=lambda batch: dl.custom_collate(
        batch, MAX_LENGTH_SAMPLES, TASK_IN, sub_task
    ),
    shuffle_in=False,
)

Getting Data... 20% Validation Set

Batch Size: 32

Train Len = 5325 , Validation Len = 1331

Balanced sampler is used for train.
Number of samples in each class:
 Counter({'Normal': 4119, 'Adventitious': 1206})

Balanced sampler is used for val.
Number of samples in each class:
 Counter({'Normal': 1040, 'Adventitious': 291})

Train Size Batched = 166 , Validation Size Batched = 41


Generating Dataloader for Train Dataset...
Getting Data... 20% Validation Set

Batch Size: 32

Train Len = 5325 , Validation Len = 1331

Balanced sampler is used for train.
Number of samples in each class:
 Counter({'Normal': 4122, 'Adventitious': 1203})

Balanced sampler is used for val.
Number of samples in each class:
 Counter({'Normal': 1037, 'Adventitious': 294})

Train Size Batched = 166 , Validation Size Batched = 41


Generating Dataloader for Intra Dataset...
Batch Size: 32
Test Len = 1004, Test Size Batched = 31

Generating Dataloader for Inter Dataset...
Batch Size: 32
Test Len = 1429, Test Size

[Home](#table-of-content)

## Set up Tuning-Loop

In [9]:
def trainingLoop(config, opt, supcon_loader, dataloader):
    opt.learning_rate = config["lr"]
    opt.temperature = config["temperature"]
    model, criterion = set_model(opt)
    optimizer = set_optimizer(opt, model)
    print("\n\nTraining...")
    print("Running for {} epochs...".format(opt.epochs))
    best_loss = 0
    best_epoch = 1
    # training routine
    for epoch in range(1, opt.epochs + 1):
        adjust_learning_rate(opt, optimizer, epoch)

        # train for one epoch
        time1 = time.time()
        train_loss = train_supcon(supcon_loader["train"], model, criterion, optimizer, epoch, opt)
        valid_loss = valid_supcon(supcon_loader["val"], model, criterion, opt)
        time2 = time.time()
        print("epoch {}, total time {:.2f}, train loss: {:.2f}, valid loss: {:.2f}".format(epoch, time2 - time1, train_loss, 1/valid_loss))

        if valid_loss > best_loss:
            best_loss = valid_loss
            best_model = model
            best_optimizer = optimizer
            best_epoch = epoch
    print("\n\nTesting..")
    best_model.eval()
    targets = []
    embeddings = torch.zeros((0, opt.embedding_size), dtype=torch.float32)
    for data, label, _ in dataloader["train"]:
        data = data.to(DEVICE)
        embedding = best_model(data)
        targets.extend(label.detach().cpu().tolist())
        embeddings = torch.cat((embeddings, embedding.detach().cpu()), dim=0)
    x_embed = np.array(embeddings)
    y = np.array(targets)

    # Create a logistic regression classifier
    classifier = LogisticRegression()
    classifier.fit(x_embed, y)
    predictions = classifier.predict(x_embed)

    print("\nResult for Train:")
    train_score, *_ = calc_score(y, predictions, verbose=True, task=int(opt.task_in[-2]))
    print("\nResult for Valid:")
    val_score = test_supcon(best_model, classifier, dataloader["val"], opt)
    # Here we save a checkpoint. It is automatically registered with
    # Ray Tune and can be accessed through `session.get_checkpoint()`
    # API in future iterations.
    os.makedirs("tuning_models", exist_ok=True)
    torch.save(
        (best_model.state_dict(), optimizer.state_dict()),
        "tuning_models/checkpoint.pt",
    )
    checkpoint = Checkpoint.from_directory("tuning_models")
    session.report(
        {
            "score": val_score,
        },
        checkpoint=checkpoint,
    )



In [10]:
strat_target, strat_mode = TUNE_STRAT
scheduler = ASHAScheduler(max_t=TUNE_MAX_EPOCH, grace_period=1, reduction_factor=2)
algo = HyperOptSearch(metric=strat_target, mode=strat_mode)
tuner = tune.Tuner(
    tune.with_resources(
        tune.with_parameters(
            trainingLoop, opt=opt, supcon_loader=supcon_loader, dataloader=dataloader
        ),
        resources={"cpu": TUNE_CPU_PER_TRIAL, "gpu": TUNE_GPU_PER_TRIAL},
    ),
    tune_config=tune.TuneConfig(
        metric=strat_target,
        mode=strat_mode,
        scheduler=scheduler,
        num_samples=TUNE_SAMPLE_NUM,
        search_alg=algo,
    ),
    param_space=TUNE_CONFIG,
)

[Home](#table-of-content)

## Tune

In [11]:
results = tuner.fit()

2023-05-19 10:54:35,824	INFO worker.py:1544 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Current time:,2023-05-19 16:05:27
Running for:,05:10:47.51
Memory:,11.2/15.7 GiB

Trial name,status,loc,lr,temperature,iter,total time (s),score
trainingLoop_c17e11e1,TERMINATED,127.0.0.1:22992,0.000629898,0.407588,1,2037.44,0.955615
trainingLoop_12dd535b,TERMINATED,127.0.0.1:22992,0.00155857,0.180024,1,1983.23,0.957936
trainingLoop_72ca5cc1,TERMINATED,127.0.0.1:22992,0.00296015,0.771114,1,1746.91,0.969826
trainingLoop_53eada76,TERMINATED,127.0.0.1:22992,0.000214106,0.869231,1,1774.47,0.954172
trainingLoop_60175507,TERMINATED,127.0.0.1:22992,0.00667781,0.332267,1,1691.4,0.972214
trainingLoop_17504815,TERMINATED,127.0.0.1:22992,0.000867774,0.859936,1,1820.43,0.967623
trainingLoop_3a4aaebb,TERMINATED,127.0.0.1:22992,0.000449841,0.778233,1,1983.04,0.966508
trainingLoop_3ed1f54f,TERMINATED,127.0.0.1:22992,0.00525245,0.758034,1,1995.78,0.958055
trainingLoop_de036944,TERMINATED,127.0.0.1:22992,0.00077561,0.259986,1,1754.55,0.970542
trainingLoop_507c5726,TERMINATED,127.0.0.1:22992,0.000210114,0.346956,1,1850.33,0.951079


[2m[36m(pid=22992)[0m Windows fatal exception: code 0xc0000139
[2m[36m(pid=22992)[0m 
[2m[36m(pid=22992)[0m Stack (most recent call first):
[2m[36m(pid=22992)[0m   File "c:\Users\leowc\anaconda3\envs\ECGJH\lib\ctypes\__init__.py", line 374 in __init__
[2m[36m(pid=22992)[0m   File "c:\Users\leowc\anaconda3\envs\ECGJH\lib\site-packages\torch\_ops.py", line 220 in load_library
[2m[36m(pid=22992)[0m   File "c:\Users\leowc\anaconda3\envs\ECGJH\lib\site-packages\torchvision\extension.py", line 20 in <module>
[2m[36m(pid=22992)[0m   File "<frozen importlib._bootstrap>", line 241 in _call_with_frames_removed
[2m[36m(pid=22992)[0m   File "<frozen importlib._bootstrap_external>", line 883 in exec_module
[2m[36m(pid=22992)[0m   File "<frozen importlib._bootstrap>", line 688 in _load_unlocked
[2m[36m(pid=22992)[0m   File "<frozen importlib._bootstrap>", line 1006 in _find_and_load_unlocked
[2m[36m(pid=22992)[0m   File "<frozen importlib._bootstrap>", line 1027 in _

[2m[36m(pid=22992)[0m Using cuda on NVIDIA GeForce GTX 1650 with Max-Q Design :D 
[2m[36m(trainingLoop pid=22992)[0m 
[2m[36m(trainingLoop pid=22992)[0m 
[2m[36m(trainingLoop pid=22992)[0m Training...
[2m[36m(trainingLoop pid=22992)[0m Running for 50 epochs...
[2m[36m(trainingLoop pid=22992)[0m epoch 1, total time 58.14, train loss: 23.74, valid loss: 23.83
[2m[36m(trainingLoop pid=22992)[0m epoch 2, total time 37.82, train loss: 23.41, valid loss: 23.46
[2m[36m(trainingLoop pid=22992)[0m epoch 3, total time 35.91, train loss: 23.25, valid loss: 24.01
[2m[36m(trainingLoop pid=22992)[0m epoch 4, total time 36.76, train loss: 23.08, valid loss: 23.76
[2m[36m(trainingLoop pid=22992)[0m epoch 6, total time 37.12, train loss: 22.77, valid loss: 23.12
[2m[36m(trainingLoop pid=22992)[0m epoch 7, total time 36.95, train loss: 22.80, valid loss: 23.23
[2m[36m(trainingLoop pid=22992)[0m epoch 8, total time 35.61, train loss: 22.58, valid loss: 22.56
[2m[36m(

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,score,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
trainingLoop_12dd535b,2023-05-19_12-01-50,True,,64ba4ed4d2234032b3c268c15c829506,"2_lr=0.0016,temperature=0.1800",LAPTOP-C4D49LCR,1,127.0.0.1,22992,0.957936,True,1983.23,1983.23,1983.23,1684468910,0,,1,12dd535b,0.009974
trainingLoop_c17e11e1,2023-05-19_11-28-47,True,,64ba4ed4d2234032b3c268c15c829506,"1_lr=0.0006,temperature=0.4076",LAPTOP-C4D49LCR,1,127.0.0.1,22992,0.955615,True,2037.44,2037.44,2037.44,1684466927,0,,1,c17e11e1,0.009974


[2m[36m(trainingLoop pid=22992)[0m 
[2m[36m(trainingLoop pid=22992)[0m 
[2m[36m(trainingLoop pid=22992)[0m Training...
[2m[36m(trainingLoop pid=22992)[0m Running for 50 epochs...
[2m[36m(trainingLoop pid=22992)[0m epoch 1, total time 37.60, train loss: 10.50, valid loss: 10.52
[2m[36m(trainingLoop pid=22992)[0m epoch 2, total time 37.05, train loss: 10.33, valid loss: 10.36
[2m[36m(trainingLoop pid=22992)[0m epoch 3, total time 38.79, train loss: 10.28, valid loss: 10.63
[2m[36m(trainingLoop pid=22992)[0m epoch 5, total time 37.43, train loss: 10.16, valid loss: 10.63
[2m[36m(trainingLoop pid=22992)[0m epoch 6, total time 42.19, train loss: 10.13, valid loss: 10.60
[2m[36m(trainingLoop pid=22992)[0m epoch 7, total time 40.94, train loss: 10.04, valid loss: 10.61
[2m[36m(trainingLoop pid=22992)[0m epoch 8, total time 38.57, train loss: 10.04, valid loss: 10.48
[2m[36m(trainingLoop pid=22992)[0m epoch 9, total time 40.92, train loss: 9.97, valid loss: 

2023-05-19 16:05:27,898	INFO tune.py:798 -- Total run time: 18647.60 seconds (18647.48 seconds for the tuning loop).


[Home](#table-of-content)

## Log Results

In [12]:
best_result = results.get_best_result(strat_target, strat_mode)
best_val_score = best_result.metrics["score"]
# best_val_loss = best_result.metrics["loss"]
# best_val_accu = best_result.metrics["accuracy"]
print("Best trial config: {}".format(best_result.config))
# print("Best trial final validation loss: {}".format(best_val_loss))
# print("Best trial final validation accuracy: {}".format(best_val_accu))
print("Best trial final validation score: {}".format(best_val_score))

Best trial config: {'temperature': 0.33226734021311427, 'lr': 0.006677813948545732}
Best trial final validation score: 0.9722136368277405


[Home](#table-of-content)

## Testing using Tuned Model

In [13]:
## Test the best Network
### Set the opt based on best result ----- Edit this part for different variables
opt.temperature = best_result.config["temperature"]
opt.learning_rate = best_result.config["lr"]
### --------------------------------------------------
test_network = mdl.SupConResNet(
    name=opt.model, 
    head=opt.head,
    feat_dim=opt.embedding_size,
    dropout=opt.dropout,
).to(DEVICE)
best_chkpt = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

model_state, _ = torch.load(best_chkpt)
test_network.load_state_dict(model_state)
test_network.eval()

targets = []
embeddings = torch.zeros((0, opt.embedding_size), dtype=torch.float32)
for data, label, _ in dataloader["train"]:
    data = data.to(DEVICE)
    embedding = test_network(data)
    targets.extend(label.detach().cpu().tolist())
    embeddings = torch.cat((embeddings, embedding.detach().cpu()), dim=0)

x_embed = np.array(embeddings)
y = np.array(targets)

# Create a logistic regression classifier
classifier = LogisticRegression()
classifier.fit(x_embed, y)
predictions = classifier.predict(x_embed)

with torch.no_grad():
    print("\nResult for Intra:")
    intra_score = test_supcon(test_network, classifier, intra_testloader, opt)
    print("\nResult for Inter:")
    inter_score = test_supcon(test_network, classifier, inter_testloader, opt)


Result for Intra:
Sensitivity (SE): 0.8639
Specificity (SP): 0.8968
Average Score (AS): 0.8804
Harmonic Score (HS): 0.8801
Score: 0.8802

Result for Inter:
Sensitivity (SE): 0.9152
Specificity (SP): 0.8702
Average Score (AS): 0.8927
Harmonic Score (HS): 0.8921
Score: 0.8924


In [15]:
logMessage = (
    f"SupContrast with: {opt.model}, Task: {TASK_IN}, inter score: {inter_score:>0.3}, "
    f"intra score: {intra_score:>0.3}, val_score: {best_val_score:>0.3}, best trial config: {best_result.config}")
logger.info(logMessage)