In [1]:
from eve import Eve
from eve_plus import EvePlus
from wideresnet import WideResNet


import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.backends.cudnn as cudnn

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.autograd import Variable
from utils import lr_down_linearly

%reload_ext autoreload
%autoreload 2


In [None]:
### eve-pytorch

In [2]:
# variables
batch_size = 128
epochs = 100
cuda = torch.cuda.is_available()

# load data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ])
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('data/cifar10', train=True, download=True,
                     transform=transform),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('data/cifar10', train=False, transform=transform),
    batch_size=batch_size, shuffle=True)





Files already downloaded and verified


In [None]:
### pytorch cifar

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, stride=1)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3)
        self.dense1 = nn.Linear(in_features=64 * 25, out_features=512)
        self.dense1_bn = nn.BatchNorm1d(512)
        self.dense2 = nn.Linear(512, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.dropout(F.max_pool2d(self.conv2(x), 2), 0.25))
        x = F.relu(self.conv3(x))
        x = F.relu(F.dropout(F.max_pool2d(self.conv4(x), 2), 0.25))
        x = x.view(-1, 64 * 25)  # reshape
        x = F.relu(self.dense1_bn(self.dense1(x)))
        return F.log_softmax(self.dense2(x))

In [4]:
def train(epoch, model, optimizer):
    model.train()
    total_loss = 0
    total_loss_list = []
    total_d_t = []
    train_correct = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer = lr_down_linearly(optimizer, epoch, batch_idx)
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        def closure():
            optimizer.zero_grad()  # reset reset optimizer
            output = model(data)
            loss = F.cross_entropy(output, target)  # negative log likelihood loss
            loss.backward()  # backprop
            return loss, output
        loss, d_t, output = optimizer.step(closure)
        loss_value = loss.data[0]
        total_loss += loss_value / len(train_loader)
        total_loss_list.append(loss_value)
        total_d_t.append(d_t)
        pred = output.data.max(1)[1]
        train_correct += pred.eq(target.data).cpu().sum()
        if batch_idx % 20 == 0:
            print('\rTrain Epoch: {} [{}/{} ({:>4.2%})] Loss: {:>5.3} Accuracy: {} lr: {}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                batch_idx / len(train_loader), total_loss, train_correct / len(train_loader.dataset), optimizer.param_groups[0]['lr'], ),
                end="")
    return total_loss, total_d_t, total_loss_list


def test(epoch, model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        output = model(data)
        test_loss += F.cross_entropy(output, target).data[0]
        pred = output.data.max(1)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss /= len(test_loader)  # loss function already averages over batch size
    test_accuracy = correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2%})'.format(
        test_loss, correct, len(test_loader.dataset),
        correct / len(test_loader.dataset)))
    return test_loss, test_accuracy


def plot(loss_a, loss_b, filename, ylabel):
    import matplotlib
    matplotlib.use("AGG")
    import matplotlib.pyplot as plt
    plt.plot(loss_a)
    plt.plot(loss_b)
    plt.legend(["Eve", "Adam"])
    plt.xlabel("epochs")
    plt.ylabel(ylabel)
    plt.savefig(filename)
    plt.clf()

### model-32-64-512-10

In [None]:
print("EvePlus")
eve_loss = []
eve_loss_list = []
eve_test_loss = []
eve_test_acc = []
eve_dt = []
torch.manual_seed(233)
model = Net()
if cuda:
    model.cuda()
    model.cuda()
    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
optimizer = EvePlus(model.parameters(), lr=0.001)
epochs = 50
for i in range(1, epochs + 1):
    train_loss, dt, loss_list = train(i, model, optimizer)
    eve_loss.append(train_loss)
    eve_loss_list += loss_list
    eve_dt += dt
    test_loss, test_accuracy = test(i, model)
    eve_test_loss.append(test_loss)
    eve_test_acc.append(test_accuracy)

EvePlus
Test set: Average loss: 1.0533, Accuracy: 6197/10000 (61.97%)
Test set: Average loss: 0.8961, Accuracy: 6962/10000 (69.62%)
Test set: Average loss: 1.2724, Accuracy: 6023/10000 (60.23%)
Test set: Average loss: 0.7850, Accuracy: 7304/10000 (73.04%)
Test set: Average loss: 0.7370, Accuracy: 7491/10000 (74.91%)
Test set: Average loss: 0.8286, Accuracy: 7352/10000 (73.52%)
Test set: Average loss: 0.7200, Accuracy: 7781/10000 (77.81%)
Test set: Average loss: 0.7681, Accuracy: 7663/10000 (76.63%)
Test set: Average loss: 0.9756, Accuracy: 7445/10000 (74.45%)
Test set: Average loss: 0.8391, Accuracy: 7803/10000 (78.03%)
Test set: Average loss: 0.8639, Accuracy: 7860/10000 (78.60%)
Test set: Average loss: 0.8990, Accuracy: 7848/10000 (78.48%)

### model-VGG

In [32]:
print("EvePlus")
eve_loss = []
eve_test_loss = []
eve_test_acc = []
eve_dt = []
torch.manual_seed(233)
model = WideResNet(depth=28, num_classes=10, widen_factor=10)
if cuda:
    model.cuda()
    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
optimizer = EvePlus(model.parameters(), lr=0.001)
epochs = 50
for i in range(1, epochs + 1):
    train_loss, dt = train(i, model, optimizer)
    eve_loss.append(train_loss)
    eve_dt += dt
    test_loss, test_accuracy = test(i, model)
    eve_test_loss.append(test_loss)
    eve_test_acc.append(test_accuracy)

EvePlus


TypeError: 'float' object cannot be interpreted as an integer

### adam

In [7]:
def train(epoch, model, optimizer):
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)

        def closure():
            optimizer.zero_grad()  # reset reset optimizer
            output = model(data)
            loss = F.cross_entropy(output, target)  # negative log likelihood loss
            loss.backward()  # backprop
            return loss

        loss = optimizer.step(closure)
        total_loss += loss.data[0] / len(train_loader)
        if batch_idx % 20 == 0:
            print('\rTrain Epoch: {} [{}/{} ({:>4.2%})] Loss: {:>5.3}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       batch_idx / len(train_loader), total_loss),
                end="")
    return total_loss


def test(epoch, model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        output = model(data)
        test_loss += F.cross_entropy(output, target).data[0]
        pred = output.data.max(1)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss /= len(test_loader)  # loss function already averages over batch size
    test_accuracy = correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2%})'.format(
        test_loss, correct, len(test_loader.dataset),
        correct / len(test_loader.dataset)))
    return test_loss, test_accuracy

In [5]:
print("Adam")
adam_loss = []
adam_test_loss = []
adam_test_acc = []
torch.manual_seed(233)
model = Net()
if cuda:
    model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epoch = 50
for i in range(1, epochs + 1):
    adam_loss.append(train(i, model, optimizer))
    test_loss, test_acc = test(i, model)
    adam_test_loss.append(test_loss)
    adam_test_acc.append(test_acc)


# plot(eve_loss, adam_loss, "eve_loss.png", "training loss")
# plot(eve_test_loss, adam_test_loss, "eve_test_loss.png", "testing loss")

Adam
Test set: Average loss: 1.5684, Accuracy: 4252/10000 (42.52%)
Test set: Average loss: 1.3379, Accuracy: 5200/10000 (52.00%)
Test set: Average loss: 1.1263, Accuracy: 5997/10000 (59.97%)
Test set: Average loss: 1.1885, Accuracy: 5847/10000 (58.47%)
Test set: Average loss: 1.0476, Accuracy: 6336/10000 (63.36%)
Test set: Average loss: 0.9902, Accuracy: 6526/10000 (65.26%)
Test set: Average loss: 0.9388, Accuracy: 6806/10000 (68.06%)
Test set: Average loss: 0.8945, Accuracy: 6927/10000 (69.27%)
Test set: Average loss: 0.9238, Accuracy: 6824/10000 (68.24%)
Test set: Average loss: 0.8308, Accuracy: 7153/10000 (71.53%)
Test set: Average loss: 0.8205, Accuracy: 7212/10000 (72.12%)
Test set: Average loss: 0.8438, Accuracy: 7225/10000 (72.25%)
Test set: Average loss: 0.8575, Accuracy: 7157/10000 (71.57%)
Test set: Average loss: 0.8143, Accuracy: 7280/10000 (72.80%)
Test set: Average loss: 0.8576, Accuracy: 7152/10000 (71.52%)
Test set: Average loss: 0.8996, Accuracy: 7155/10000 (71.55%)
Tes

KeyboardInterrupt: 

In [99]:
print("Rms")
rms_loss = []
rms_test_loss = []
rms_test_acc = []
torch.manual_seed(233)
model = Net()
if cuda:
    model.cuda()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
epoch = 50
for i in range(1, epochs + 1):
    rms_loss.append(train(i, model, optimizer))
    test_loss, test_acc = test(i, model)
    rms_test_loss.append(test_loss)
    rms_test_acc.append(test_acc)


# plot(eve_loss, adam_loss, "eve_loss.png", "training loss")
# plot(eve_test_loss, adam_test_loss, "eve_test_loss.png", "testing loss")

Rms
Test set: Average loss: 1.5112, Accuracy: 4702/10000 (47.02%)
Test set: Average loss: 1.2033, Accuracy: 5784/10000 (57.84%)
Test set: Average loss: 1.1069, Accuracy: 6249/10000 (62.49%)
Test set: Average loss: 1.0079, Accuracy: 6568/10000 (65.68%)
Test set: Average loss: 1.0665, Accuracy: 6466/10000 (64.66%)
Test set: Average loss: 0.9779, Accuracy: 6761/10000 (67.61%)
Test set: Average loss: 0.8872, Accuracy: 7186/10000 (71.86%)
Test set: Average loss: 0.9422, Accuracy: 7051/10000 (70.51%)
Test set: Average loss: 0.9898, Accuracy: 6964/10000 (69.64%)
Test set: Average loss: 0.8602, Accuracy: 7334/10000 (73.34%)
Test set: Average loss: 1.0849, Accuracy: 7011/10000 (70.11%)
Test set: Average loss: 1.0255, Accuracy: 7171/10000 (71.71%)
Test set: Average loss: 1.1412, Accuracy: 7159/10000 (71.59%)
Test set: Average loss: 1.1878, Accuracy: 7106/10000 (71.06%)
Test set: Average loss: 1.0956, Accuracy: 7270/10000 (72.70%)
Test set: Average loss: 1.2651, Accuracy: 7047/10000 (70.47%)
Test

In [9]:
print("ada")
ada_loss = []
ada_test_loss = []
ada_test_acc = []
torch.manual_seed(233)
model = Net()
if cuda:
    model.cuda()
    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
optimizer = optim.Adagrad(model.parameters(), lr=0.01)
epoch = 50
for i in range(1, epochs + 1):
    ada_loss.append(train(i, model, optimizer))
    test_loss, test_acc = test(i, model)
    ada_test_loss.append(test_loss)
    ada_test_acc.append(test_acc)


# plot(eve_loss, adam_loss, "eve_loss.png", "training loss")
# plot(eve_test_loss, adam_test_loss, "eve_test_loss.png", "testing loss")

ada
Test set: Average loss: 1.2504, Accuracy: 5515/10000 (55.15%)
Test set: Average loss: 0.9247, Accuracy: 6804/10000 (68.04%)
Test set: Average loss: 0.8582, Accuracy: 7029/10000 (70.29%)
Test set: Average loss: 0.7957, Accuracy: 7259/10000 (72.59%)
Test set: Average loss: 0.8805, Accuracy: 6964/10000 (69.64%)
Test set: Average loss: 0.6787, Accuracy: 7609/10000 (76.09%)
Test set: Average loss: 0.7594, Accuracy: 7430/10000 (74.30%)
Test set: Average loss: 0.7021, Accuracy: 7584/10000 (75.84%)
Test set: Average loss: 0.6889, Accuracy: 7688/10000 (76.88%)
Test set: Average loss: 0.7421, Accuracy: 7526/10000 (75.26%)
Test set: Average loss: 0.6806, Accuracy: 7694/10000 (76.94%)
Test set: Average loss: 0.6929, Accuracy: 7750/10000 (77.50%)
Test set: Average loss: 0.7330, Accuracy: 7684/10000 (76.84%)
Test set: Average loss: 0.7676, Accuracy: 7595/10000 (75.95%)
Test set: Average loss: 0.7388, Accuracy: 7680/10000 (76.80%)
Test set: Average loss: 0.7639, Accuracy: 7636/10000 (76.36%)
Test

KeyboardInterrupt: 

In [8]:
# print("Eve")
# eve_loss = []
# eve_test_loss = []

torch.manual_seed(23)


model_1 = Net()
model_1.cuda()
model_1.conv1.weight





# if cuda:
#     model.cuda()
# optimizer = Eve(model.parameters())
# for i in range(1, epochs + 1):
#     eve_loss.append(train(i, model, optimizer))
#     eve_test_loss.append(test(i, model))

# print("Adam")
# adam_loss = []
# adam_test_loss = []
# model = Net()
# if cuda:
#     model.cuda()
# optimizer = optim.Adam(model.parameters())
# for i in range(1, epochs + 1):
#     adam_loss.append(train(i, model, optimizer))
#     adam_test_loss.append(test(i, model))

# plot(eve_loss)
# plot(eve_test_loss, adam_test_loss, "eve_test_loss.png", "testing loss")

Parameter containing:
(0 ,0 ,.,.) = 
  0.0067  0.0651  0.1720
 -0.1212  0.1022  0.1084
 -0.0838  0.0284 -0.1074

(0 ,1 ,.,.) = 
 -0.0224  0.0717 -0.1216
 -0.1281 -0.1540 -0.0414
 -0.1079  0.0454  0.1283

(0 ,2 ,.,.) = 
 -0.0339 -0.0282 -0.1915
  0.0980  0.1478  0.1912
  0.1482 -0.1924 -0.0768

(1 ,0 ,.,.) = 
  0.1832  0.0345 -0.0311
  0.1841  0.0699  0.1328
  0.0972 -0.1674  0.0304

(1 ,1 ,.,.) = 
 -0.0790 -0.0404 -0.0816
 -0.0079  0.1241 -0.0420
  0.0486  0.0643 -0.1499

(1 ,2 ,.,.) = 
  0.0986 -0.1922  0.1284
  0.1702 -0.1262 -0.1380
 -0.1108 -0.0302  0.1442

(2 ,0 ,.,.) = 
 -0.0591 -0.0259  0.1423
  0.1448 -0.0275 -0.0113
  0.1265 -0.0760  0.0839

(2 ,1 ,.,.) = 
  0.1029 -0.1466 -0.1885
  0.0371  0.0678 -0.1425
 -0.0109 -0.1626  0.0516

(2 ,2 ,.,.) = 
  0.1275  0.1618 -0.0137
 -0.1418 -0.1301  0.1306
  0.0185  0.0789  0.0341

(3 ,0 ,.,.) = 
  0.1801  0.1053  0.1556
  0.0600 -0.0797  0.0222
 -0.1357 -0.1238 -0.1381

(3 ,1 ,.,.) = 
 -0.0998  0.1749  0.0023
 -0.1923 -0.0397 -0.0806
 -0

In [15]:
model_2 = Net()
model_2.cuda()
model_2.conv1.weight

Parameter containing:
(0 ,0 ,.,.) = 
  0.0067  0.0651  0.1720
 -0.1212  0.1022  0.1084
 -0.0838  0.0284 -0.1074

(0 ,1 ,.,.) = 
 -0.0224  0.0717 -0.1216
 -0.1281 -0.1540 -0.0414
 -0.1079  0.0454  0.1283

(0 ,2 ,.,.) = 
 -0.0339 -0.0282 -0.1915
  0.0980  0.1478  0.1912
  0.1482 -0.1924 -0.0768

(1 ,0 ,.,.) = 
  0.1832  0.0345 -0.0311
  0.1841  0.0699  0.1328
  0.0972 -0.1674  0.0304

(1 ,1 ,.,.) = 
 -0.0790 -0.0404 -0.0816
 -0.0079  0.1241 -0.0420
  0.0486  0.0643 -0.1499

(1 ,2 ,.,.) = 
  0.0986 -0.1922  0.1284
  0.1702 -0.1262 -0.1380
 -0.1108 -0.0302  0.1442

(2 ,0 ,.,.) = 
 -0.0591 -0.0259  0.1423
  0.1448 -0.0275 -0.0113
  0.1265 -0.0760  0.0839

(2 ,1 ,.,.) = 
  0.1029 -0.1466 -0.1885
  0.0371  0.0678 -0.1425
 -0.0109 -0.1626  0.0516

(2 ,2 ,.,.) = 
  0.1275  0.1618 -0.0137
 -0.1418 -0.1301  0.1306
  0.0185  0.0789  0.0341

(3 ,0 ,.,.) = 
  0.1801  0.1053  0.1556
  0.0600 -0.0797  0.0222
 -0.1357 -0.1238 -0.1381

(3 ,1 ,.,.) = 
 -0.0998  0.1749  0.0023
 -0.1923 -0.0397 -0.0806
 -0

## write 写

In [75]:
import pickle

with open("eve-loss-0.33-6.0.txt", 'wb') as fp:
    pickle.dump(eve_loss_6, fp)

In [13]:
import pickle

with open("eve-loss-list-0.14new-model-one-lr-0.001-0.0001.txt", 'wb') as fp:
    pickle.dump(eve_loss_list, fp)
with open("eve-loss-0.14new-model-one-lr-0.001-0.0001.txt", 'wb') as fp:
    pickle.dump(eve_loss, fp)
with open("eve-test-loss-0.14new-model-one-lr-0.001-0.0001.txt", 'wb') as fp:
    pickle.dump(eve_test_loss, fp)
with open("eve-test-acc-0.14new-model-one-lr-0.001-0.0001.txt", 'wb') as fp:
    pickle.dump(eve_test_acc, fp)
with open("eve-dt-0.14new-model-one-lr-0.001-0.0001.txt", 'wb') as fp:
    pickle.dump(eve_dt, fp)

In [11]:
import pickle


with open("ada-loss-model-one-lr-0.01.txt", 'wb') as fp:
    pickle.dump(ada_loss[0:50], fp)
with open("ada-test-loss-model-one-lr-0.01.txt", 'wb') as fp:
    pickle.dump(ada_test_loss[0:50], fp)
with open("ada-test-acc-model-one-lr-0.01.txt", 'wb') as fp:
    pickle.dump(ada_test_acc[0:50], fp)
