In [1]:
import torch
from torch import autograd
from torch import nn
import torchvision
from torch import optim
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
import numpy as np
from models import SReLU, SLinear, SModel, SCrossEntropyLoss
from Functions import SCrossEntropyLossFunction

In [2]:
def eval():
    total = 0
    correct = 0
    with torch.no_grad():
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            images = images.view(-1, 784)
            outputs = model(images)
            predictions = outputs.argmax(dim=1)
            correction = predictions == labels
            correct += correction.sum()
            total += len(correction)
    return correct/total

def Seval():
    total = 0
    correct = 0
    with torch.no_grad():
        model.clear_noise()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            images = images.view(-1, 784)
            outputs = model(images)
            predictions = outputs[0].argmax(dim=1)
            correction = predictions == labels
            correct += correction.sum()
            total += len(correction)
    return correct/total

def Seval_noise(var):
    total = 0
    correct = 0
    with torch.no_grad():
        model.set_noise(var)
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            images = images.view(-1, 784)
            outputs = model(images)
            predictions = outputs[0].argmax(dim=1)
            correction = predictions == labels
            correct += correction.sum()
            total += len(correction)
    return correct/total

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

BS = 128

trainset = torchvision.datasets.MNIST(root='~/testCode/data', train=True,
                                        download=False, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BS,
                                        shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='~/testCode/data', train=False,
                                    download=False, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=BS,
                                            shuffle=False, num_workers=2)
# model = Model()
# model.to(device)
# criteria = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# for _ in range(10):
#     for images, labels in trainloader:
#         optimizer.zero_grad()
#         images, labels = images.to(device), labels.to(device)
#         images = images.view(-1, 784)
#         outputs = model(images)
#         loss = criteria(outputs, labels)
#         loss.backward()
#         optimizer.step()
#     print(f"test acc: {eval():.4f}")

In [5]:
device = torch.device("cuda:0")
model = SModel()
model.to(device)
model.push_S_device()
model.clear_noise()
# criteria = SCrossEntropyLossFunction.apply
criteria = SCrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.005)


for epoch in range(10):
    running_loss = 0.
    running_l = 0.
    for i, (images, labels) in enumerate(trainloader):
        optimizer.zero_grad()
        model.clear_S_grad()
        images, labels = images.to(device), labels.to(device)
        images = images.view(-1, 784)
        outputs, outputsS = model(images)
        loss = criteria(outputs, outputsS,labels)
        loss.backward()
        l = loss + model.fetch_S_grad()
        model.do_second()
        optimizer.step()
        running_loss += loss.item()
        running_l += l.item()
        # print(running_loss)
    print(f"epoch: {epoch:-3d}, test acc: {Seval():.4f}, loss: {running_loss / len(trainloader):.4f}, s: {(running_l - running_loss) / len(trainloader):.4f}")
    # if (running_l - running_loss) / len(trainloader) > 35000:
    #     break

epoch:   0, test acc: 0.7398, loss: 1.7260, s: 11849.4468
epoch:   1, test acc: 0.8204, loss: 0.9835, s: 20598.4098
epoch:   2, test acc: 0.8543, loss: 0.7123, s: 24961.9215
epoch:   3, test acc: 0.8639, loss: 0.5881, s: 27730.9353
epoch:   4, test acc: 0.8752, loss: 0.5262, s: 31303.2763
tensor(True, device='cuda:0') tensor(True, device='cuda:0')


Exception: 

In [29]:
from tqdm.notebook import tqdm
import numpy as np
acc_list = []
with tqdm(range(100)) as loader:
    for _ in loader:
        acc_list.append(Seval_noise(0.05).item())
        loader.set_description(f"{np.mean(acc_list)}")

HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))




In [26]:
device = torch.device("cuda:0")
model = SModel()
model.to(device)
model.push_S_device()
model.clear_noise()
criteria = SCrossEntropyLossFunction.apply
optimizer = optim.SGD(model.parameters(), lr=0.01)

for i in range(50):
    running_loss = 0.
    running_l = 0.
    for images, labels in trainloader:
        optimizer.zero_grad()
        images, labels = images.to(device), labels.to(device)
        images = images.view(-1, 784)
        outputs, outputsS = model(images)
        loss = criteria(outputs, outputsS,labels)
        loss.backward()
        l = loss + model.fetch_S_grad()
        optimizer.step()
        optimizer.zero_grad()
        model.clear_S_grad()
        running_loss += loss.item()
        running_l += l.item()
    test_acc = Seval()
    print(f"epoch: {i:-3d}, test acc: {test_acc:.4f}, loss: {running_loss / len(trainloader):.4f}, s: {(running_l - running_loss) / len(trainloader):-5.4f}")
    if test_acc >= 0.8404:
        break

epoch:   0, test acc: 0.4812, loss: 2.2020, s: 1351.5894
epoch:   1, test acc: 0.7715, loss: 1.4606, s: 6371.6053
epoch:   2, test acc: 0.8442, loss: 0.7443, s: 13479.2123
epoch:   3, test acc: 0.8739, loss: 0.5353, s: 14481.9844


In [27]:
acc_list = []
with tqdm(range(100)) as loader:
    for _ in loader:
        acc_list.append(Seval_noise(0.05).item())
        loader.set_description(f"{np.mean(acc_list)}")

HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))




In [73]:
a

tensor([[1., 2.],
        [1., 2.]])

In [76]:
torch.max(a, dim=1)

torch.return_types.max(
values=tensor([2., 2.]),
indices=tensor([1, 1]))