In [1]:
from functools import partial
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split, TensorDataset   # DataLoader
import torchvision
import torchvision.transforms as transforms
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [2]:
#local_dir = "~/ray_results/local_dir"
cwd = os.getcwd()
print(cwd)
#assert cwd.startswith(os.path.expanduser(local_dir)), cwd
#assert not cwd.startswith("~"), cwd

/home/studio-lab-user


In [3]:
PATH = cwd + "/data/IEMOCAP/wav2vec/"

In [4]:
num_classes = 7

In [5]:
#model_option = 'xvec'
#model_option = 'base'
model_option = 'xlsr'

In [6]:
def load_data(data_dir=PATH):
    """
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(
        root=data_dir, train=True, download=True, transform=transform)

    testset = torchvision.datasets.CIFAR10(
        root=data_dir, train=False, download=True, transform=transform)

    return trainset, testset
    """
    x_train = np.load(PATH + model_option + "_icp_hs_train.npy")
    x_val = np.load(PATH + model_option + "_icp_hs_val.npy")
    x_test = np.load(PATH + model_option + "_icp_hs_test.npy")

    y_train = np.load(PATH + model_option + "_icp_lb_train.npy", allow_pickle=True)
    y_val = np.load(PATH + model_option + "_icp_lb_val.npy", allow_pickle=True)
    y_test = np.load(PATH + model_option + "_icp_lb_test.npy", allow_pickle=True)
    """
    x_train = np.load(PATH + "icp_ft_train.npy")
    x_val = np.load(PATH + "icp_ft_val.npy")
    x_test = np.load(PATH + "icp_ft_test.npy")

    y_train = np.load(PATH + "icp_lb_train.npy", allow_pickle=True)
    y_val = np.load(PATH + "icp_lb_val.npy", allow_pickle=True)
    y_test = np.load(PATH + "icp_lb_test.npy", allow_pickle=True)
    """
    print(y_train.shape, y_val.shape, y_test.shape)       # (4696,) (935,) (1407,)
    print(x_train.shape, x_val.shape, x_test.shape)       # (4696, 3072) (935, 3072) (1407, 3072)

    train_dataset = TensorDataset(torch.tensor(x_train), torch.tensor(y_train))
    # train_dataset = TensorDataset(torch.from_numpy(x_train_eval).float(), torch.from_numpy(y_train_eval).float())
    val_dataset = TensorDataset(torch.tensor(x_val), torch.tensor(y_val))
    test_dataset = TensorDataset(torch.tensor(x_test), torch.tensor(y_test))

    #train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) # num_workers
    #val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    #test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    return train_dataset, val_dataset, test_dataset

In [7]:
"""
class Net(nn.Module):
    def __init__(self, l1=120, l2=84):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
"""
# my simple MLP    
class Net(nn.Module):
    def __init__(self, num_classes=7, l1=1200, l2=840):
        super(Net, self).__init__()
        self.layers = nn.Sequential(
            #nn.Linear(3072, 1000),     # icp_ft.npy
            nn.Linear(6144, l1),
            nn.ReLU(),
            nn.Linear(l1, l2),
            nn.Linear(l2, num_classes)
        )

    def forward(self, x):
        # convert tensor (128, 1, 28, 28) --> (128, 1*28*28)
        x = x.view(x.size(0), -1)
        #print(x.shape)
        x = self.layers(x)
        return x

In [8]:
def train(config, checkpoint_dir=None):
    net = Net(num_classes, config["l1"], config["l2"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    #optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
    optimizer = Adam(net.parameters(), lr=config["lr"], weight_decay=config["w_decay"])

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    train_subset, val_subset, testset = load_data()

    #test_abs = int(len(trainset) * 0.8)
    #train_subset, val_subset = random_split(
     #   trainset, [test_abs, len(trainset) - test_abs])

    trainloader = torch.utils.data.DataLoader(
        train_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=4)
    valloader = torch.utils.data.DataLoader(
        val_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=4)

    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finished Training")

In [9]:
def test_accuracy(net, device="cpu"):
    train_subset, val_subset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [None]:
def main(num_samples=7, max_num_epochs=10, gpus_per_trial=2):
    #data_dir = os.path.abspath("./data")
    #load_data(data_dir)
    config = {
        "l1": tune.sample_from(lambda _: 10*2**np.random.randint(2, 9)),
        "l2": tune.sample_from(lambda _: 10*2**np.random.randint(2, 9)),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([2, 16, 32, 64])
    }
    #print(config)
    
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    #print(scheduler)
    
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    #print(reporter)

    #reporter(timesteps_total=1)
     
    result = tune.run(
        #partial(train, data_dir=PATH),
        train,
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=num_classes, max_num_epochs=100, gpus_per_trial=1)



== Status ==
Current time: 2022-03-02 16:57:00 (running for 00:00:00.16)
Memory usage on this node: 1.5/15.4 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 2.0/4 CPUs, 1.0/1 GPUs, 0.0/8.68 GiB heap, 0.0/4.34 GiB objects (0.0/1.0 accelerator_type:T4)
Result logdir: /home/studio-lab-user/ray_results/train_2022-03-02_16-56-59
Number of trials: 7/7 (6 PENDING, 1 RUNNING)
+-------------------+----------+--------------------+--------------+------+------+-------------+
| Trial name        | status   | loc                |   batch_size |   l1 |   l2 |          lr |
|-------------------+----------+--------------------+--------------+------+------+-------------|
| train_c6a31_00000 | RUNNING  | 169.254.255.2:2569 |            2 |   40 |  320 | 0.00840225  |
| train_c6a31_00001 | PENDING  |                    |           32 |  160 | 1280 | 0.

  r, k = function_base._ureduce(


== Status ==
Current time: 2022-03-02 16:57:16 (running for 00:00:16.23)
Memory usage on this node: 4.2/15.4 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: nan
Resources requested: 2.0/4 CPUs, 1.0/1 GPUs, 0.0/8.68 GiB heap, 0.0/4.34 GiB objects (0.0/1.0 accelerator_type:T4)
Result logdir: /home/studio-lab-user/ray_results/train_2022-03-02_16-56-59
Number of trials: 7/7 (6 PENDING, 1 RUNNING)
+-------------------+----------+--------------------+--------------+------+------+-------------+--------+------------+----------------------+
| Trial name        | status   | loc                |   batch_size |   l1 |   l2 |          lr |   loss |   accuracy |   training_iteration |
|-------------------+----------+--------------------+--------------+------+------+-------------+--------+------------+----------------------|
| train_c6a31_00000 | RUNNING  | 169.254.255.2:2