In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from tqdm import tqdm
from torchvision import datasets, transforms

In [None]:
import torch
class GlobalManager:
    def __init__(self, use_gpu=False, log_state=False):
        self.use_gpu = use_gpu
        self.log_state = log_state
        self.cpu_d = torch.device("cpu")
        self.device = torch.device("cuda") if use_gpu else self.cpu
        pass

    def is_gpu(self):
        return self.use_gpu
    
    def current(self):
        return self.device

    def cpu(self):
        return self.cpu_d
    
    def log(self, msg):
        if self.log_state:
            print(msg)

In [None]:
class ExecutionEvents:
    def __init__(self):
        pass

    def set_model(self, model, optimizer):
        self.model = model
        self.optimizer = optimizer

    def on_epoch_start(self, epoch, **kwargs):
        if hasattr(self, 'es'):
            self.es(epoch, **kwargs)
        pass
    
    def on_epoch_end(self, epoch, losses, avg_loss, **kwargs):
        if hasattr(self, 'ee'):
            self.ee(epoch, losses, avg_loss, **kwargs)
        pass

    def on_batch_processed(self, epoch, batch, loss, **kwargs):
        if hasattr(self, 'bp'):
            self.bp(epoch, batch, loss, **kwargs)
        pass

    def on_validated(self, epoch, loss, **kwargs):
        if hasattr(self, 'v'):
            self.v(epoch, loss, **kwargs)
        pass

    def attach_epoch_start(self, func):
        self.es = func
        pass
    
    def attach_epoch_end(self, func):
        self.ee = func
        pass

    def attach_batch_processed(self, func):
        self.bp = func
        pass

    def attach_validated(self, func):
        self.v = func
        pass
    

In [None]:
class PreprocessingDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))

In [None]:
class EarlyStopping(object):
    def __init__(self, mode='min', min_delta=0, patience=10, percentage=False):
        self.mode = mode
        self.min_delta = min_delta
        self.patience = patience
        self.best = None
        self.num_bad_epochs = 0
        self.is_better = None
        self._init_is_better(mode, min_delta, percentage)

        if patience == 0:
            self.is_better = lambda a, b: True
            self.step = lambda a: False

    def step(self, metrics):
        if self.best is None:
            self.best = metrics
            return False

        if torch.isnan(metrics):
            return True

        if self.is_better(metrics, self.best):
            self.num_bad_epochs = 0
            self.best = metrics
        else:
            self.num_bad_epochs += 1

        if self.num_bad_epochs >= self.patience:
            return True

        return False

    def _init_is_better(self, mode, min_delta, percentage):
        if mode not in {'min', 'max'}:
            raise ValueError('mode ' + mode + ' is unknown!')
        if not percentage:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - min_delta
            if mode == 'max':
                self.is_better = lambda a, best: a > best + min_delta
        else:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - (
                            best * min_delta / 100)
            if mode == 'max':
                self.is_better = lambda a, best: a > best + (
                            best * min_delta / 100)

In [None]:
from tqdm import tqdm
import torch.nn
import torch
from torch.utils.data import DataLoader, random_split

class Executor:
    def __init__(self, model, manager):
        self.model = model.to(manager.current())
        self.manager = manager
    
    def configure(self, optimizer, loss="cross_entropy", preprocessor=None):
        self.optimizer = optimizer
        if isinstance(loss, str):
            if loss == "mse":
                self.criterion = F.mse_loss
            elif loss == "cross_entropy":
                self.criterion = F.cross_entropy
            else:
                raise Exception("Undefined loss func, use function or nn.Module instead!")
        elif isinstance(loss, nn.Module) or type(loss).__name__ == "function":
            self.criterion = loss
        else:
            raise Exception("loss parameter must be string, function or nn.Module!")
        
        if preprocessor is None:
            self.preproc = False
            pass
        elif type(preprocessor).__name__ == "function":
            self.preproc = True
            self.preprocessor = preprocessor
        else:
            raise Exception("Preprocessor type not recognized!")

    def train(self, train_dataset, epochs, val_split=0.15, train_batch=32, val_batch=32,
              event_listener=None, progress=True, metrics={},
              early_stopping=False, patience=5, disturb=False):
        total = len(train_dataset)
        val_count = int(val_split * total)
        self.train_set, self.val_set = random_split(train_dataset, (total - val_count, val_count))

        train_loader = DataLoader(self.train_set, batch_size=train_batch, shuffle=True)
        val_loader = DataLoader(self.val_set, batch_size=val_batch, shuffle=True)

        if self.preproc:
            train_loader = PreprocessingDataLoader(train_loader, self.preprocessor)
            val_loader = PreprocessingDataLoader(val_loader, self.preprocessor)
        event_listener = ExecutionEvents() if event_listener is None else event_listener
        event_listener.set_model(self.model, self.optimizer)
        if early_stopping:
            self.es = EarlyStopping(patience=patience)
        if disturb:
            self.disturb = DisturbLabel(alpha=20, C=10)
        for i in range(epochs):
            #Training an epoch
            model.train()
            losses = {}
            metrics_v = {}
            for metric in metrics:
                metrics_v[metric] = {}
            it = enumerate(train_loader)
            it = tqdm(it, total=len(train_loader), position=0) if progress else it
            if progress:
                it.set_description('Epoch %d' % i)
            event_listener.on_epoch_start(i)
            for batch_idx, (data, target) in it:
                data, target = data.to(self.manager.current()), target.to(self.manager.current())
                if disturb:
                    target = self.disturb(target).to(self.manager.current())
                def fwd():
                    self.optimizer.zero_grad()
                    output = self.model(data)
                    loss = self.criterion(output, target)
                    losses[batch_idx] = loss.item()
                    for metric in metrics:
                        metrics_v[metric][batch_idx] = metrics[metric](output, target).item()
                    loss.backward(create_graph=True)
                    return loss, output
                self.optimizer.step(closure=fwd)
                event_listener.on_batch_processed(i, batch_idx, losses[batch_idx])
                if progress:
                    m_mean = {k: np.mean(list(metrics_v[k].values())) for k in metrics}
                    it.set_postfix(batch=batch_idx, loss=losses[batch_idx], mean_loss=np.mean(list(losses.values())),
                                  **m_mean)
            if progress:
                it.close()
            ls = list(losses.values())
            m_mean = {k: np.mean(list(metrics_v[k].values())) for k in metrics}
            m_all = {k+"_all": list(metrics_v[k].values()) for k in metrics}
            event_listener.on_epoch_end(i, ls, np.mean(ls), **m_mean, **m_all)
            #Validating an epoch
            model.eval()

            it = val_loader
            it = tqdm(it, total=len(val_loader), position=0) if progress else it
            if progress:
                it.set_description('Validating Epoch %d' % i)

            loss_val = []
            metrics_val = {}
            for metric in metrics:
                metrics_val[metric] = []
            with torch.no_grad():
                for data, target in it:
                    data, target = data.to(self.manager.current()), target.to(self.manager.current())
                    output = self.model(data)
                    loss = self.criterion(output, target)
                    loss = loss.item()
                    loss_val.append(loss)
                    for metric in metrics:
                        metrics_val[metric].append(metrics[metric](output, target).item())
                    if progress:
                        m_mean = {k: np.mean(metrics_val[k]) for k in metrics}
                        it.set_postfix(loss=loss, mean_loss=np.mean(loss_val),
                                    **m_mean)
            m_mean = {k: np.mean(metrics_val[k]) for k in metrics}
            m_all = {k+"_all": metrics_val[k] for k in metrics}
            event_listener.on_validated(i, np.mean(loss_val), **m_mean, **m_all)
            if early_stopping:
                if self.es.step(torch.tensor(np.mean(loss_val))):
                    break
            if progress:
                it.close()
    def eval(self, test_dataset, batch_size=32, progress=True, metrics={}):
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

        if self.preproc:
            test_loader = PreprocessingDataLoader(test_loader, self.preprocessor)

        model.eval()

        it = test_loader
        it = tqdm(it, total=len(test_loader), position=0) if progress else it
        if progress:
            it.set_description("Evaluating")

        loss_val = []
        metrics_val = {}
        for metric in metrics:
            metrics_val[metric] = []
        outs = []
        with torch.no_grad():
            for data, target in it:
                data, target = data.to(self.manager.current()), target.to(self.manager.current())
                output = self.model(data)
                outs.append(output)
                loss = self.criterion(output, target).item()
                loss_val.append(loss)
                for metric in metrics:
                    metrics_val[metric].append(metrics[metric](output, target).item())
                if progress:
                    m_mean = {k: np.mean(metrics_val[k]) for k in metrics}
                    it.set_postfix(loss=loss, mean_loss=np.mean(loss_val),
                                **m_mean)
        m_mean = {k: np.mean(metrics_val[k]) for k in metrics}
        m_mean["loss"] = np.mean(loss_val)
        if progress:
            it.close()
        return torch.cat(outs), m_mean

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def accuracy(pred, gt):
    pred = pred.argmax(dim=1, keepdim=True)
    correct = pred.eq(gt.view_as(pred)).sum()
    return correct.float()/pred.shape[0]

In [None]:
def loss(predicted, ground_truth):
    return F.nll_loss(predicted, ground_truth)

def loss_n(model, lambda_):
    def loss(predicted, ground_truth):
        return F.nll_loss(predicted, ground_truth)
    return loss

def loss_l1(model, lambda_):
    def loss(predicted, ground_truth):
        l1_reg = torch.tensor(0., requires_grad=True).cuda()
        for name, param in model.named_parameters():
            if 'weight' in name:
                l1_reg += torch.norm(param, 1)
        return F.nll_loss(predicted, ground_truth) + lambda_ * l1_reg
    return loss

def loss_l2(model, lambda_):
    def loss(predicted, ground_truth):
        l2_reg = torch.tensor(0., requires_grad=True).cuda()
        for param in model.parameters():
            l2_reg += torch.norm(param)
        return F.nll_loss(predicted, ground_truth) + lambda_ * l2_reg
    return loss

def loss_group_lasso(model, lambda_):
    def loss(predicted, ground_truth):
        gl_reg = torch.tensor(0., requires_grad=True).cuda()
        for name, param in model.named_parameters():
            if 'weight' in name:
                gl_reg += param.norm(2, dim=1).sum()
            if 'bias' in name:
                gl_reg += param.norm(2)
        return F.nll_loss(predicted, ground_truth) + lambda_ * gl_reg
    return loss

def loss_sparse_group_lasso(model, lambda_):
    def loss(predicted, ground_truth):
        gl_reg = torch.tensor(0., requires_grad=True).cuda()
        l1_reg = torch.tensor(0., requires_grad=True).cuda()

        for name, param in model.named_parameters():
            if 'weight' in name:
                gl_reg += param.norm(2, dim=1).sum()
                l1_reg += torch.norm(param, 1)               
            if 'bias' in name:
                gl_reg += param.norm(2)
        return F.nll_loss(predicted, ground_truth) + lambda_ * (gl_reg + l1_reg)
    return loss

In [None]:
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

transformer = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                    ])
aug_transformer = transforms.Compose([
                        transforms.RandomCrop(32, padding=4),
                        transforms.RandomGrayscale(0.3),
                        transforms.RandomPerspective(),
                        transforms.RandomRotation(degrees=90),
                        transforms.RandomRotation(degrees=45),
                        transforms.ToTensor(),
                        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                    ])

cifar_train = CIFAR10('../data', train=True, download=True, transform=transformer)
cifar_aug_train = CIFAR10('../data', train=True, download=True, transform=aug_transformer)
cifar_aug_train, _ = random_split(cifar_aug_train, (15000, 35000))
cifar_test = CIFAR10('../data', train=False, transform=transformer)

In [None]:
class ConcatDataset(torch.utils.data.Dataset):
    def __init__(self, *datasets):
        self.datasets = datasets

    def __getitem__(self, i):
        return tuple(d[i] for d in self.datasets)

    def __len__(self):
        return min(len(d) for d in self.datasets)

In [None]:
class DisturbLabel(nn.Module):
    def __init__(self, alpha, C):
        super(DisturbLabel, self).__init__()
        self.alpha = alpha
        self.C = C
        # Multinoulli distribution
        self.p_c = (1 - ((C - 1)/C) * (alpha/100))
        self.p_i = (1 / C) * (alpha / 100)

    def forward(self, y):
        # convert classes to index
        y_tensor = y
        y_tensor = y_tensor.type(torch.LongTensor).view(-1, 1)

        # create disturbed labels
        depth = self.C
        y_one_hot = torch.ones(y_tensor.size()[0], depth) * self.p_i
        y_one_hot.scatter_(1, y_tensor, self.p_c)
        y_one_hot = y_one_hot.view(*(tuple(y.shape) + (-1,)))

        # sample from Multinoulli distribution
        distribution = torch.distributions.OneHotCategorical(y_one_hot)
        y_disturbed = distribution.sample()
        y_disturbed = y_disturbed.max(dim=1)[1]  # back to categorical

        return y_disturbed

In [None]:
class CifarNet(nn.Module):
    def __init__(self, dropout=False):
        super(CifarNet, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, 3, 1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, 1, padding=1)
        self.conv3 = nn.Conv2d(128, 128, 3, 1)
        self.fc1 = nn.Linear(6*6*128, 720)
        self.fc2 = nn.Linear(720, 256)
        self.fc3 = nn.Linear(256, 10)

        self.dropout = dropout
        self.dropout1 = nn.Dropout2d(0.2)
        self.dropout2 = nn.Dropout2d(0.1)
        self.dropout3 = nn.Dropout2d(0.2)
        self.dropout4 = nn.Dropout(0.15)
        self.softmax = nn.LogSoftmax()

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)

        if self.dropout:
            x = self.dropout1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)

        if self.dropout:
            x = self.dropout2(x)

        x = self.conv3(x)
        x = F.relu(x)

        if self.dropout:
            x = self.dropout3(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)

        if self.dropout:
            x = self.dropout4(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

In [None]:
import torch.optim as optim
!wget -q https://github.com/LiyuanLucasLiu/RAdam/raw/master/radam/radam.py
from radam import RAdam

configList = {
    "radam-0.01": lambda p: RAdam(p, lr=0.01, weight_decay=5e-4),
    "radam-0.001": lambda p: RAdam(p, lr=0.001, weight_decay=5e-4),
    "adagrad-0.01": lambda p: optim.Adagrad(p, lr=0.01),
    "adagrad-0.001": lambda p: optim.Adagrad(p, lr=0.001),
    "sgd-0.01": lambda p: optim.SGD(p, lr=0.01, momentum=0.9),
    "sgd-0.001": lambda p: optim.SGD(p, lr=0.001, momentum=0.9),
    "adam-0.01": lambda p: optim.Adam(p, lr=0.01),
    "adam-0.001": lambda p: optim.Adam(p, lr=0.001),
    "adamax-0.01": lambda p: optim.Adamax(p, lr=0.01),
    "adamax-0.001": lambda p: optim.Adamax(p, lr=0.001),
}

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
recording = []
losses = [("normal", loss_n)]
for optK in configList:
    for ln, l in losses:
        data = {
            "loss_reg": ln,
            "opt": optK,
            "dropout": True,
            "losses": [],
            "val_losses":[],
            "acc":[],
            "val_acc":[],
        }

        model = CifarNet(dropout=True)
        train_set = cifar_train
        executor = Executor(model, GlobalManager(use_gpu=True, log_state=True))
        executor.configure(configList[optK](model.parameters()), l(model, 0), preprocessor=None)
        listener = ExecutionEvents()
        def vd(ep, loss, **kw):
            data["val_losses"].append(loss)
            data["val_acc"].append(kw["acc_all"])
        def ee(ep, loss, avg, **kw):
            data["losses"].append(loss)
            data["acc"].append(kw["acc_all"])
        listener.attach_validated(vd)
        listener.attach_epoch_end(ee)
        executor.train(cifar_train, 75, train_batch=400, val_batch=600,
                        early_stopping=True, patience=6, disturb=False,
                        event_listener=listener, metrics={"acc":accuracy})
        res, data["info"] = executor.eval(cifar_test, batch_size=600, metrics={"acc":accuracy})
        recording.append(data)
        torch.save(recording, "/content/drive/MyDrive/results-cifar-new.data")

In [None]:
import torch
new_d = torch.load('/content/drive/MyDrive/results-cifar-new.data')

In [None]:
for i in new_d:
    print("loss: %s | opt: %s" % (i["loss_reg"], i["opt"]))

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame()
loss_n = []
opt_n = []
lr = []
loss = []
acc = []
vloss = []
vacc = []
loss_l = []
acc_l = []
vloss_l = []
vacc_l = []
for da in new_d:
    loss_n.append(da["loss_reg"])
    od = da["opt"].split("-")
    opt_n.append(od[0])
    lr.append(float(od[1]))
    loss.append(np.mean(da["losses"]))
    acc.append(np.mean(da["acc"]))
    vloss.append(np.mean(da["val_losses"]))
    vacc.append(np.mean(da["val_acc"]))
    loss_l.append(da["losses"])
    acc_l.append(da["acc"])
    vloss_l.append(da["val_losses"])
    vacc_l.append(da["val_acc"])
df["loss_name"] = loss_n
df["optimizer"] = opt_n
df["learning_rate"] = lr
df["loss"] = loss
df["acc"] = acc
df["val_loss"] = vloss
df["val_acc"] = vacc
df["loss_l"] = loss_l
df["acc_l"] = acc_l
df["val_loss_l"] = vloss_l
df["val_acc_l"] = vacc_l

In [None]:
view_cols = list(filter(lambda t: not t.endswith("_l"), list(df.columns)))
df.sort_values("val_acc", ascending=False)[view_cols]

In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [None]:
colorPal = sns.color_palette(palette=["#F75590", "#3DB1F5", "#9EE493", "#FFF689", "#FF3D3D"])

In [None]:
def plot_loss_feat(target, loss, feats, means, titles, pal=None):
    fig, axs = plt.subplots(nrows=len(feats))
    fig.set_size_inches(10, 20)

    for k in range(len(feats)):
        x = []
        y = []
        style = []
        df = target
        target = df[df["loss_name"] == loss]
        for i in range(len(target)):
            lab = []
            data = target.iloc[i]
            v = data[feats[k]]
            v = np.array(v)
            if means[k]:
                v = np.mean(v, axis=1)
            x += list(range(len(v)))
            y += list(v)
            lab.append(data["optimizer"])
            style += [" ".join(lab) for j in v]
        if pal is None:
            pal = colorPal
        fg = sns.lineplot(x=x, y=y, hue=style, ax=axs[k], palette=colorPal)
        tt = fg.set_title(titles[k])
        plt.setp(tt, color='white')
        legend = fg.get_legend()
        frame = legend.get_frame()
        frame.set_facecolor('#202446')
        frame.set_edgecolor('#2D3262')
        for text in legend.get_texts():
            text.set_color("white")
        for idx, ax in enumerate(axs):
            ax.spines['bottom'].set_color('white')
            ax.spines['top'].set_color('white') 
            ax.spines['right'].set_color('white')
            ax.spines['left'].set_color('white')
            ax.xaxis.label.set_color('white')
            ax.yaxis.label.set_color('white')
            ax.tick_params(colors='white', which='both')
            ax.set_facecolor("#0e101f")
        fig.set_facecolor("#0e101f")

In [None]:
plot_loss_feat(df[df.learning_rate == 0.001], "normal",
                   ["loss_l", "val_loss_l", "acc_l", "val_acc_l"],
                   [True, False, True, True],
                   ["Loss", "Validation Loss", "Accuracy", "Validation Accuracy"])

In [None]:
plot_loss_feat(df[df.learning_rate == 0.01], "normal",
                   ["loss_l", "val_loss_l", "acc_l", "val_acc_l"],
                   [True, False, True, True],
                   ["Loss", "Validation Loss", "Accuracy", "Validation Accuracy"])

In [None]:
ndf = df.copy()
ndf["optimizer"] = df["optimizer"] + "-" + df["learning_rate"].apply(str)

colorPal = sns.color_palette(palette=["#F75590", "#AF0846", "#3DB1F5", "#0972AE", "#9EE493", "#339325", "#FFF689", "#FFEB0A", "#FF3D3D", "#A30000"])
plot_loss_feat(ndf, "normal",
                   ["loss_l", "val_loss_l", "acc_l", "val_acc_l"],
                   [True, False, True, True],
                   ["Loss", "Validation Loss", "Accuracy", "Validation Accuracy"])