In [None]:
import torch
import torch.nn as nn
import torchvision
import torch.distributed as dist
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.utils.data as udata
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
from tqdm import tqdm
import argparse
import os


device = 'cuda'

# write a function to return train and test laoders
# Note that dataset argument can also be MNIST. So
# You might want to use if else statements to handle
# the cases for getting the dataloader.
def get_dataloaders(dataset = 'CIFAR10', bs = 128, model = 'resnet'):

    if dataset == 'CIFAR10':
        if model == 'resnet':
            transform_train = transforms.Compose(
                [transforms.ToTensor(),
                # transforms.RandomHorizontalFlip(0.5),
                # transforms.RandomCrop(32, 2),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
                ])
            transform_test = transforms.Compose(
                [transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
                ])
        else:
            transform_train = transforms.Compose(
                [transforms.ToTensor(),
                # transforms.RandomHorizontalFlip(0.5),
                # transforms.RandomCrop(32, 2),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
                transforms.Grayscale()
                ])
            transform_test = transforms.Compose(
                [transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
                transforms.Grayscale()
                ])
        trainset = torchvision.datasets.CIFAR10('./data', download=True, train=True, transform=transform_train)
        testset = torchvision.datasets.CIFAR10('./data', download=True, train=False, transform=transform_test)

        trainloader = torch.utils.data.DataLoader(trainset, bs, True)
        testloader = torch.utils.data.DataLoader(testset, 1000, False)
    else:
        transform = transforms.Compose(
            [transforms.ToTensor(),
            transforms.Normalize((0.5,),(0.5,)),
            transforms.Pad(2)
            ])

        trainset = torchvision.datasets.MNIST('./data', download=True, train=True, transform=transform)
        testset = torchvision.datasets.MNIST('./data', download=True, train=False, transform=transform)

        trainloader = torch.utils.data.DataLoader(trainset, bs, True)
        testloader = torch.utils.data.DataLoader(testset, 1000, False)

    return trainloader, testloader

class FlexibleMLP(nn.Module):
    def __init__(self, hidden_size, hidden_layers):
        super(FlexibleMLP, self).__init__()
        layers = [nn.Linear(32*32, hidden_size), nn.ReLU()]
        for _ in range(hidden_layers-1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_size, 10))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        # TODO
        # complete the forward function
        x = x.view(-1, 32*32)
        x = self.layers(x)
        return x

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, args, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.skip = args.skip

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(planes)

        if self.skip:
            self.shortcut = nn.Sequential()
            if stride != 1 or in_planes != self.expansion*planes:
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.skip:
            out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, args, num_classes=10, dataset = "CIFAR10"):
        super(ResNet, self).__init__()
        self.args = args
        print(self.args.skip)
        self.in_planes = self.args.width * 8

        if dataset == "CIFAR10":
          input = 3
        else:
          input = 1
        self.conv1 = nn.Conv2d(input, self.args.width * 8, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(self.args.width * 8)

        self.layer1 = self._make_layer(block, self.args.width * 8, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, self.args.width * 16, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, self.args.width * 32, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, self.args.width * 64, num_blocks[3], stride=2)
        self.linear = nn.Linear(self.args.width * 64 * block.expansion, num_classes)
        self.normed = False

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.args, self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def resnet18_narrow(args, **kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], args, **kwargs, dataset = args.dataset)

def train_epoch(model, train_loader, optimizer, criterion, device):
    ''' Function carry out training over the given dataloader'''

    all_correct = 0
    all_loss = 0
    total_size = 0

    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        l = len(target)
        total_size += l

        data, target = data.to(device), target.to(device)
        #grad_list = [param.grad for param in list(model.parameters())]

        optimizer.zero_grad()
        #grad_list = [param.grad for param in list(model.parameters())]
        output = model(data)
        #loss = criterion(output, target)
        loss = F.cross_entropy(output, target)
        all_loss += loss.item() * l
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(output.data, 1)
        correct = (predicted.to(device) == target).sum().item()
        all_correct += correct

    all_correct = all_correct / total_size
    all_loss = all_loss / total_size
    all_error = (1 - all_correct) * 100

    return model, all_loss, all_error


def test_epoch(model, test_loader, device):
    ''' Function carry out test over the given dataloader'''
    all_correct = 0
    all_loss = 0
    total_size = 0
    with torch.no_grad():
        model.eval()
        for batch_idx, (data, target) in enumerate(test_loader):
            l = len(target)
            total_size += l

            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = F.cross_entropy(output, target) * l
            all_loss += loss.item()

            _, predicted = torch.max(output.data, 1)
            correct = (predicted.to(device) == target).sum().item()
            all_correct += correct

    all_correct = all_correct / total_size
    all_loss = all_loss / total_size
    all_error = (1 - all_correct) * 100

    return all_loss, all_error


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-f')
    # Add more arguments if needed
    # you can use this tutorial as reference: https://www.youtube.com/watch?v=OxpBMNalsDM

    parser.add_argument('--epochs', default=1, type=int)
    parser.add_argument('--momentum', default=0.9, type=float)
    parser.add_argument('--width', default=8, type=int)
    parser.add_argument('--wd', default=1e-4, type=float)
    parser.add_argument('--lr', default=0.1, type=float)
    parser.add_argument('--batch_size', default=128, type=int)
    parser.add_argument('--skip', default=True, action='store_false')
    parser.add_argument('--gpu', default=0, type=int)
    parser.add_argument('--seed', default=232, type=int)
    parser.add_argument('--model', default="mlp", type=str)
    parser.add_argument('--hidden_nodes', default=256, type=int)
    parser.add_argument('--dataset', default="CIFAR10", type=str)
    args = parser.parse_args()

    # parser.parse_args(['--sum', '7', '-1', '42'])

    print("================  ARGUMENTS  ================")
    print("Epochs:", args.epochs)
    print("Momentum:", args.momentum)
    print("Learning rate:", args.lr)
    print("Width:", args.width)
    print("Weight Decay:", args.wd)
    print("Batch size:", args.batch_size)
    print("Skip Connections:", args.skip)
    print("GPU:", args.gpu)
    print("model:", args.model)
    print("hidden_nodes:", args.hidden_nodes)
    print("dataset:", args.dataset)
    print("=============================================")

    # TODO:
    # EXP_DIR SHOULD DEPEND ONLY ON THE MODEL TYPE AND DATASET NAME
    # MODIFY THIS ACCORDINGLY
    EXP_DIR = args.dataset + " " + args.model
    if EXP_DIR not in os.listdir():
        os.mkdir(EXP_DIR)

    # TODO:
    # folder_name SHOULD DEPEND ON ALL THE HYPERPARAMETERS THAT ARE VARIED
    # MODIFY THIS PART ACCORDINGLY
    folder_name = '%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_'%(str(args.epochs),
                                                          str(args.momentum),
                                                          str(args.width),
                                                          str(args.wd),
                                                          str(args.lr),
                                                          str(args.batch_size),
                                                          str(args.skip),
                                                          str(args.gpu),
                                                          str(args.seed),
                                                          str(args.model),
                                                          str(args.hidden_nodes),
                                                          str(args.dataset))
    folder_dir = os.path.join(EXP_DIR, folder_name)


    print(os.listdir(EXP_DIR))
    if folder_name not in os.listdir(EXP_DIR):
        os.mkdir(folder_dir)
    doc_name = folder_name + '.csv'
    print(doc_name)
    csv_dir = os.path.join(folder_dir, doc_name)
    params_dir = os.path.join(folder_dir, "modelparams.pt")

    train_loader, test_loader = get_dataloaders(dataset = args.dataset, bs = args.batch_size, model = args.model)
    gpu_name = 'cuda:%d'%args.gpu
    device = gpu_name if torch.cuda.is_available() else 'cpu'

    if args.model == 'resnet':
      model = resnet18_narrow(args).to(device)
    else:
      model = FlexibleMLP(args.hidden_nodes, args.width).to(device)

    criterion = torch.nn.CrossEntropyLoss(reduction = 'sum').to(device)
    optimizer = optim.SGD(model.parameters(), lr = args.lr,
                            weight_decay = args.wd, momentum = args.momentum)

    log_ls = []
    # For loop for epoch
    for e in tqdm(range(1,args.epochs+1)):

        if e == 100: # learning rate drop by 10 fold after 100 epochs
            for g in optimizer.param_groups:
                g['lr'] = g['lr']*0.1
        elif e == 150: # another lr drop by 10 fold
            for g in optimizer.param_groups:
                g['lr'] = g['lr']*0.1

        # training
        model, train_loss, train_error = train_epoch(model, train_loader, optimizer, criterion, device)

        # testing
        test_loss, test_error = test_epoch(model, test_loader, device)

        # keeping track of the errors and losses
        row_ls = [e, train_loss, train_error, test_loss, test_error]
        print(row_ls)
        log_ls.append(row_ls)

        # updating and writing the csv
        column_names = ['Epoch', 'Train Loss', 'Train Error', 'Test Loss', 'Test Error']
        data_pd = pd.DataFrame(data = log_ls, columns = column_names)
        data_pd.to_csv(csv_dir)

    torch.save(model.state_dict(), params_dir) # saving model parameters

Epochs: 1
Momentum: 0.9
Learning rate: 0.1
Width: 8
Weight Decay: 0.0001
Batch size: 128
Skip Connections: True
GPU: 0
model: mlp
hidden_nodes: 256
dataset: CIFAR10
['1_0.9_8_0.0001_0.1_128_True_0_232_mlp_256_CIFAR10_']
1_0.9_8_0.0001_0.1_128_True_0_232_mlp_256_CIFAR10_.csv
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 1/1 [00:17<00:00, 17.01s/it]

[1, 2.236670703048706, 87.256, 2.087304931640625, 81.77]





In [None]:
!pkill -9 python

Analysis

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
import argparse
import logging

import torchvision
import torch.distributed as dist
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.utils.data as udata
import torch.nn.functional as F
import torch.optim as optim


def get_dataloaders(dataset = 'CIFAR10', bs = 128, model = 'resnet'):

    if dataset == 'CIFAR10':
        if model == 'resnet':
            transform_train = transforms.Compose(
                [transforms.ToTensor(),
                # transforms.RandomHorizontalFlip(0.5),
                # transforms.RandomCrop(32, 2),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
                ])
            transform_test = transforms.Compose(
                [transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
                ])
        else:
            transform_train = transforms.Compose(
                [transforms.ToTensor(),
                # transforms.RandomHorizontalFlip(0.5),
                # transforms.RandomCrop(32, 2),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
                transforms.Grayscale()
                ])
            transform_test = transforms.Compose(
                [transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
                transforms.Grayscale()
                ])
        trainset = torchvision.datasets.CIFAR10('./data', download=True, train=True, transform=transform_train)
        testset = torchvision.datasets.CIFAR10('./data', download=True, train=False, transform=transform_test)

        trainloader = torch.utils.data.DataLoader(trainset, bs, True)
        testloader = torch.utils.data.DataLoader(testset, 1000, False)
    else:
        transform = transforms.Compose(
            [transforms.ToTensor(),
            transforms.Normalize((0.5,),(0.5,)),
            transforms.Pad(2)
            ])

        trainset = torchvision.datasets.MNIST('./data', download=True, train=True, transform=transform)
        testset = torchvision.datasets.MNIST('./data', download=True, train=False, transform=transform)

        trainloader = torch.utils.data.DataLoader(trainset, bs, True)
        testloader = torch.utils.data.DataLoader(testset, 1000, False)

    return trainloader, testloader

class FlexibleMLP(nn.Module):
    def __init__(self, hidden_size, hidden_layers):
        super(FlexibleMLP, self).__init__()
        layers = [nn.Linear(32*32, hidden_size), nn.ReLU()]
        for _ in range(hidden_layers-1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_size, 10))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        # TODO
        # complete the forward function
        x = x.view(-1, 32*32)
        x = self.layers(x)
        return x

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, args, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.skip = args.skip

        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(planes)

        if self.skip:
            self.shortcut = nn.Sequential()
            if stride != 1 or in_planes != self.expansion*planes:
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(self.expansion*planes))

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.skip:
            out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, args, num_classes=10, dataset = "CIFAR10"):
        super(ResNet, self).__init__()
        self.args = args
        print(self.args.skip)
        self.in_planes = self.args.width * 8

        if dataset == "CIFAR10":
          input = 3
        else:
          input = 1
        self.conv1 = nn.Conv2d(input, self.args.width * 8, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(self.args.width * 8)

        self.layer1 = self._make_layer(block, self.args.width * 8, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, self.args.width * 16, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, self.args.width * 32, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, self.args.width * 64, num_blocks[3], stride=2)
        self.linear = nn.Linear(self.args.width * 64 * block.expansion, num_classes)
        self.normed = False

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.args, self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def resnet18_narrow(args, **kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], args, **kwargs, dataset = args.dataset)


def calc_shannon_entropy(model, dataloader, device):
    '''
    Write a function to calculate the average value of the shannon entropy
    calculated over the given dataloader (ideally this is trainloader)
    '''
    shan_ent = 0
    total = 0
    m = torch.nn.Softmax()
    for (data, target) in dataloader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      # print(output.shape)
      softmaxed = m(output)
      shan_ent -= torch.sum(softmaxed*torch.log2(softmaxed))
      total += len(softmaxed)
    shan_ent = shan_ent/total

    return shan_ent

def get_model_eval_comprehensive(line_worker, all_train_loader, all_test_loader, device):

    line_worker.eval()
    train_loss, train_correct = 0, 0
    train_total_size = 0
    for batch_idx, (data, target) in enumerate(all_train_loader):
        train_total_size += len(target)
        data = data.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)
        output = line_worker(data)
        train_loss += F.cross_entropy(output, target, reduction='sum').item()
        _, predicted = torch.max(output.data, 1)
        train_correct += (predicted.to(device) == target).sum().item()
    train_loss = train_loss/train_total_size
    train_error = (1 - train_correct/train_total_size)* 100


    line_worker.eval()
    test_loss, test_correct = 0, 0
    test_total_size = 0
    for batch_idx, (data, target) in enumerate(all_test_loader):
        test_total_size += len(target)
        data = data.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)
        output = line_worker(data)
        test_loss += F.cross_entropy(output, target, reduction='sum').item()
        _, predicted = torch.max(output.data, 1)
        test_correct += (predicted.to(device) == target).sum().item()
    test_loss = test_loss/test_total_size
    test_error = (1 - test_correct/test_total_size)* 100

    return train_loss, train_error, test_loss, test_error


parser = argparse.ArgumentParser()
parser.add_argument('-f')

# MAIN PROGRAM =================

parser.add_argument('--model', default='mlp', type=str)
parser.add_argument('--dataset', default="CIFAR10", type=str)
parser.add_argument('--gpu', default=0, type=int)
args = parser.parse_args()
logging.basicConfig(filename = 'file_aug.log',
                    level = logging.DEBUG,
                    format = '%(asctime)s:%(levelname)s:%(name)s:%(message)s')

exp_dir = './' + args.dataset + "_" + args.model #TODO
device = 'cuda:0'
model_name = args.model #TODO: Enter correct model name

dirs = os.listdir(exp_dir)
# dirs = [dr for dr in dirs if model_name in dr]
# all_train_loader, all_test_loader = get_dataloaders()
valley_csv = []

for ind, dir in tqdm(enumerate(dirs)):

    pull_dir = os.path.join(exp_dir, dir)
    print(dir.split('_'))
    args.momentum = float(dir.split('_')[1])
    args.width = int(dir.split('_')[2])
    args.wd = float(dir.split('_')[3])
    args.lr = float(dir.split('_')[4])
    args.batch_size = int(dir.split('_')[5])
    args.model = dir.split('_')[9]
    args.hidden_nodes = int(dir.split('_')[10])
    args.dataset = dir.split('_')[11]

    # TODO: Rewrite this parsing part depending on your convention
    if dir.split('_')[6] == 'True':
        args.skip = True
    else:
        args.skip = False
    print(args)

    seed_number = int(dir.split('_')[8])
    print("Current seed number:", seed_number)

    all_train_loader, all_test_loader = get_dataloaders(args.dataset, args.batch_size, args.model)

    cw0 = os.path.join( pull_dir, 'modelparams.pt' )
    if args.model == 'resnet':
      worker = resnet18_narrow(args)
      worker.load_state_dict(torch.load(cw0, map_location='cpu'), strict=args.skip)
    else:
      worker = FlexibleMLP(args.hidden_nodes, args.width)
      worker.load_state_dict(torch.load(cw0, map_location='cpu'))

    # comprehensive analysis of the average of the workers
    worker = worker.to(device)
    avg_train_loss, avg_train_error, avg_test_loss, avg_test_error = get_model_eval_comprehensive(worker, all_train_loader, all_test_loader, device)

    print(avg_train_error)

    logging.info('Experiment index %d:'%ind)
    logging.info('Train error %f:'%avg_train_error)
    logging.info('Test error %f:'%avg_test_error)

    criterion = nn.CrossEntropyLoss()
    shannon_ent = calc_shannon_entropy(worker, all_train_loader, device) # Shannon Entropy calculation

    v_csv = [ shannon_ent, avg_train_loss, avg_train_error, avg_test_loss, avg_test_error]
    valley_csv.append(v_csv)
    logging.info('Flatness measures are calculated with seed %d'%seed_number)
    logging.info('Saving the flatness measures to a csv.')

    print("Saving the experiment values to a csv.")
    pd_valley = pd.DataFrame(valley_csv, columns = [ "shannon_ent", "train_loss", "train_error", "test_loss", "test_error"])
    pd_valley.to_csv("%s_analysis.csv"%(model_name))

0it [00:00, ?it/s]

['1', '0.9', '8', '0.0001', '0.1', '128', 'True', '0', '232', 'mlp', '256', 'CIFAR10', '']
Namespace(batch_size=128, dataset='CIFAR10', f='/root/.local/share/jupyter/runtime/kernel-b5d3155e-f25c-4d4f-b42a-c1bc3d76833d.json', gpu=0, hidden_nodes=256, lr=0.1, model='mlp', momentum=0.9, skip=True, wd=0.0001, width=8)
Current seed number: 232
Files already downloaded and verified
Files already downloaded and verified
80.95


1it [00:31, 31.33s/it]

Saving the experiment values to a csv.





In [None]:
!pkill -9 python

In [None]:
a = torch.randn(5)
a

tensor([-0.7559,  0.0351, -0.5321, -0.4984,  0.2499])

In [None]:
m = torch.nn.Softmax()
input = a
output = m(input)
print(output.shape)

torch.Size([5])


  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
a = 1e-10

In [None]:
a

1e-10