In [1]:
import os
import torch
import argparse
import numpy as np
import torch.utils.data

from torch import nn, optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torchvision.utils import save_image

In [19]:
20//2

10

In [116]:
class AutoEncoder(nn.Module):
    def __init__(self, inp_size, hid_size):
        super(AutoEncoder, self).__init__()
        """
        Here you should define layers of your autoencoder
        Please note, if a layer has trainable parameters, it should be nn.Linear. 
        ## !! CONVOLUTIONAL LAYERS CAN NOT BE HERE !! ##
        However, you can use any noise inducing layers, e.g. Dropout.

        Your network must not have more than six layers with trainable parameters.
        :param inp_size: integer, dimension of the input object
        :param hid_size: integer, dimension of the hidden representation
        """
        self.input_layer = nn.Linear(inp_size, hid_size)
        self.encode_layer = nn.Linear(hid_size, hid_size)
        self.encode2_layer = nn.Linear(hid_size, hid_size)
        self.decode_layer = nn.Linear(hid_size, hid_size)
        self.decode2_layer = nn.Linear(hid_size, hid_size)
        self.output_layer = nn.Linear(hid_size, inp_size)
        pass

    def encode(self, x):
        """
        Encodes objects to hidden representations (E: R^inp_size -> R^hid_size)

        :param x: inputs, Variable of shape (batch_size, inp_size)
        :return:  hidden represenation of the objects, Variable of shape (batch_size, hid_size)
        """
        hid_enc_layer = nn.functional.elu(self.input_layer(x))
        hid_enc_layer = nn.functional.dropout(hid_enc_layer)
        hid_enc_layer = nn.functional.elu(self.encode_layer(hid_enc_layer))
        hid_enc_layer = nn.functional.dropout(hid_enc_layer, p = 0.7)
        #hid_enc_layer = nn.functional.elu(self.encode2_layer(hid_enc_layer))
        #hid_enc_layer = nn.functional.dropout(hid_enc_layer)
        return hid_enc_layer

    def decode(self, h):
        """
        Decodes objects from hidden representations (D: R^hid_size -> R^inp_size)

        :param h: hidden represenatations, Variable of shape (batch_size, hid_size)
        :return:  reconstructed objects, Variable of shape (batch_size, inp_size)
        """
        hid_dec_layer = nn.functional.elu(self.decode_layer(h))
        hid_dec_layer = nn.functional.dropout(hid_dec_layer, p = 0.7)
        #hid_dec_layer = nn.functional.elu(self.decode2_layer(hid_dec_layer))
        #hid_dec_layer = nn.functional.dropout(hid_dec_layer, p = 0.8)
        output = nn.functional.self.output_layer(hid_dec_layer)
        return output

    def forward(self, x):
        """
        Encodes inputs to hidden representations and decodes back.

        x: inputs, Variable of shape (batch_size, inp_size)
        return: reconstructed objects, Variable of shape (batch_size, inp_size)
        """
        return self.decode(self.encode(x))

    def loss_function(self, recon_x, x):
        """
        Calculates the loss function.

        :params recon_x: reconstructed object, Variable of shape (batch_size, inp_size)
        :params x: original object, Variable of shape (batch_size, inp_size)
        :return: loss
        """
        lamb = 0.01
        mse_loss = nn.MSELoss(size_average=False)
        loss = mse_loss(recon_x, x)
        loss = loss + self.input_layer.weight.norm(1) * lamb
        loss = loss + self.encode_layer.weight.norm(1) * lamb
        #loss = loss + self.encode2_layer.weight.norm(1) * lamb
        loss = loss + self.decode_layer.weight.norm(1) * lamb
        #loss = loss + self.decode2_layer.weight.norm(1) * lamb
        loss = loss + self.output_layer.weight.norm(1) * lamb
        


        return loss

In [117]:
def train(model, optimizer, train_loader, test_loader):
    for epoch in range(10):
        model.train()
        train_loss, test_loss = 0, 0
        
        lamb = 1
        to_regularise = []
        for param in model.parameters():
            to_regularise.append(param.view(-1))
        weig = lamb * torch.abs(torch.cat(to_regularise)).sum()
    
        print('grad', weig)
        
        for data, _ in train_loader:
            data = Variable(data).view(-1, 784)
            x_rec = model(data)
            loss = model.loss_function(x_rec, data)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
            
        lamb = 1
        to_regularise = []
        for param in model.parameters():
            to_regularise.append(param.view(-1))
        weig = lamb * torch.abs(torch.cat(to_regularise)).sum()
    
        print('grad', weig)
        print('=> Epoch: %s Average loss: %.10f' % (epoch, train_loss / len(train_loader.dataset)))

        model.eval()
        for data, _ in test_loader:
            data = Variable(data, volatile=True).view(-1, 784)
            x_rec = model(data)
            test_loss += model.loss_function(x_rec, data).data[0]

        test_loss /= len(test_loader.dataset)
        print('=> Test set loss: %.10f' % test_loss)

        n = min(data.size(0), 8)
        comparison = torch.cat([data.view(-1, 1, 28, 28)[:n], x_rec.view(-1, 1, 28, 28)[:n]])
        if not os.path.exists('./pics'): os.makedirs('./pics')
        save_image(comparison.data.cpu(), 'pics/reconstruction_n' + str(epoch) + '.png', nrow=n)
    return model

In [118]:
def test_work():
    print('Start test')
    get_loader = lambda train: torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=train, download=True, transform=transforms.ToTensor()),
        batch_size=50, shuffle=True)
    train_loader, test_loader = get_loader(True), get_loader(False)
    
    try:
        model = AutoEncoder(inp_size=784, hid_size=20)
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
    except Exception:
        assert False, 'Error during model creation'
        return

    try:
        model = train(model, optimizer, train_loader, test_loader)
    except Exception:
        assert False, 'Error during training'
        return

    test_x = Variable(torch.randn(1, 784))    
    rec_x, hid_x = model(test_x), model.encode(test_x)
    submodules = dict(model.named_children())
    layers_with_params = np.unique(['.'.join(n.split('.')[:-1]) for n, _ in model.named_parameters()])
    
    assert (hid_x.dim() == 2) and (hid_x.size(1) == 20),  'Hidden representation size must be equal to 20'
    assert (rec_x.dim() == 2) and (rec_x.size(1) == 784), 'Reconstruction size must be equal to 784'
    assert len(layers_with_params) <= 6, 'The model must have no more than 6 layers '
    assert np.all(np.concatenate([list(p.shape) for p in model.parameters()]) <= 800), 'All hidden sizes must be less than 800'
    print('Success!🎉')

In [119]:
#4 with drop lamd =0.01  relu out
test_work()

Start test
grad Variable containing:
 2309.6309
[torch.FloatTensor of size 1]

grad Variable containing:
 1610.3176
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 33.0552580414
=> Test set loss: 22.1630529236
grad Variable containing:
 1610.3176
[torch.FloatTensor of size 1]

grad Variable containing:
 1462.3767
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 20.7240978689
=> Test set loss: 19.5884071594
grad Variable containing:
 1462.3767
[torch.FloatTensor of size 1]

grad Variable containing:
 1434.7466
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 19.6149927022
=> Test set loss: 19.0110141907
grad Variable containing:
 1434.7466
[torch.FloatTensor of size 1]

grad Variable containing:
 1421.0444
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 19.3256101176
=> Test set loss: 18.9123745239
grad Variable containing:
 1421.0444
[torch.FloatTensor of size 1]

grad Variable containing:
 1409.4445
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [99]:
#4 with drop lamd =0.01  elu all
test_work()

Start test
grad Variable containing:
 2295.6257
[torch.FloatTensor of size 1]

grad Variable containing:
 1642.6779
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 33.5669330037
=> Test set loss: 22.6939718811
grad Variable containing:
 1642.6779
[torch.FloatTensor of size 1]

grad Variable containing:
 1464.5306
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 21.3180523488
=> Test set loss: 19.8343023132
grad Variable containing:
 1464.5306
[torch.FloatTensor of size 1]

grad Variable containing:
 1427.3098
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 19.9735260712
=> Test set loss: 19.5669393066
grad Variable containing:
 1427.3098
[torch.FloatTensor of size 1]

grad Variable containing:
 1436.2330
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 19.7097327291
=> Test set loss: 19.0637307983
grad Variable containing:
 1436.2330
[torch.FloatTensor of size 1]

grad Variable containing:
 1421.9390
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [87]:
#2 with drop lamd =0.01  elu all
test_work()

Start test
grad Variable containing:
 2314.6416
[torch.FloatTensor of size 1]

grad Variable containing:
 1329.6965
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 31.6432058156
=> Test set loss: 20.4173750366
grad Variable containing:
 1329.6965
[torch.FloatTensor of size 1]

grad Variable containing:
 1269.1555
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 19.7074066274
=> Test set loss: 18.9114879761
grad Variable containing:
 1269.1555
[torch.FloatTensor of size 1]

grad Variable containing:
 1258.6685
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 19.2313594320
=> Test set loss: 18.8156986511
grad Variable containing:
 1258.6685
[torch.FloatTensor of size 1]

grad Variable containing:
 1251.4125
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 19.1696670288
=> Test set loss: 18.7627396484
grad Variable containing:
 1251.4125
[torch.FloatTensor of size 1]

grad Variable containing:
 1248.3119
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [75]:
#6 with drop lamd =0.01  elu all
test_work()

Start test
grad Variable containing:
 2310.5317
[torch.FloatTensor of size 1]

grad Variable containing:
 1782.8469
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 35.9894269165
=> Test set loss: 26.4441925049
grad Variable containing:
 1782.8469
[torch.FloatTensor of size 1]

grad Variable containing:
 1569.4493
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 25.2054465007
=> Test set loss: 23.5956553101
grad Variable containing:
 1569.4493
[torch.FloatTensor of size 1]

grad Variable containing:
 1508.8407
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 23.3927992126
=> Test set loss: 22.4288883911
grad Variable containing:
 1508.8407
[torch.FloatTensor of size 1]

grad Variable containing:
 1490.5808
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 22.3072722972
=> Test set loss: 21.5811816589
grad Variable containing:
 1490.5808
[torch.FloatTensor of size 1]

grad Variable containing:
 1464.1202
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [71]:
#6 with drop lamd =0.01  non
test_work()

Start test
grad Variable containing:
 2286.2600
[torch.FloatTensor of size 1]

grad Variable containing:
 1511.4890
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 40.5066887614
=> Test set loss: 29.7542896729
grad Variable containing:
 1511.4890
[torch.FloatTensor of size 1]

grad Variable containing:
 1594.2330
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 28.6358888326
=> Test set loss: 27.4226381714
grad Variable containing:
 1594.2330
[torch.FloatTensor of size 1]

grad Variable containing:
 1524.7728
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 26.9517177795
=> Test set loss: 25.8337398193
grad Variable containing:
 1524.7728
[torch.FloatTensor of size 1]

grad Variable containing:
 1470.5261
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 25.5995509399
=> Test set loss: 24.8847543701
grad Variable containing:
 1470.5261
[torch.FloatTensor of size 1]

grad Variable containing:
 1460.9696
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [50]:
#6 with drop lamd =0.01 
test_work()

Start test
grad Variable containing:
 2311.0378
[torch.FloatTensor of size 1]

grad Variable containing:
 1449.5646
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 42.9684445841
=> Test set loss: 34.1022025146
grad Variable containing:
 1449.5646
[torch.FloatTensor of size 1]

grad Variable containing:
 1399.3937
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 32.1157795919
=> Test set loss: 30.8355845581
grad Variable containing:
 1399.3937
[torch.FloatTensor of size 1]

grad Variable containing:
 1356.3654
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 29.9864371623
=> Test set loss: 28.3810823364
grad Variable containing:
 1356.3654
[torch.FloatTensor of size 1]

grad Variable containing:
 1300.7369
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 28.1422103068
=> Test set loss: 27.3700321533
grad Variable containing:
 1300.7369
[torch.FloatTensor of size 1]

grad Variable containing:
 1288.4093
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [59]:
#6 with drop lamd =0.01 elu
test_work()

Start test
grad Variable containing:
 2314.8250
[torch.FloatTensor of size 1]

grad Variable containing:
 1416.3394
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 41.5609312846
=> Test set loss: 31.7678521606
grad Variable containing:
 1416.3394
[torch.FloatTensor of size 1]

grad Variable containing:
 1356.2966
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 30.1691095500
=> Test set loss: 28.3201942383
grad Variable containing:
 1356.2966
[torch.FloatTensor of size 1]

grad Variable containing:
 1350.3772
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 27.8143228190
=> Test set loss: 26.8189135254
grad Variable containing:
 1350.3772
[torch.FloatTensor of size 1]

grad Variable containing:
 1331.1981
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 26.5865759603
=> Test set loss: 25.7204607422
grad Variable containing:
 1331.1981
[torch.FloatTensor of size 1]

grad Variable containing:
 1361.3058
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [55]:
#6 with drop lamd =0.01 relu
test_work()

Start test
grad Variable containing:
 2309.0820
[torch.FloatTensor of size 1]

grad Variable containing:
 1306.6409
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 51.5289968180
=> Test set loss: 45.7441240845
grad Variable containing:
 1306.6409
[torch.FloatTensor of size 1]

grad Variable containing:
 1353.1969
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 44.2440124349
=> Test set loss: 43.1443000977
grad Variable containing:
 1353.1969
[torch.FloatTensor of size 1]

grad Variable containing:
 1366.6925
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 42.3837350850
=> Test set loss: 42.0536548706
grad Variable containing:
 1366.6925
[torch.FloatTensor of size 1]

grad Variable containing:
 1396.2426
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 41.6112033325
=> Test set loss: 41.4329685425
grad Variable containing:
 1396.2426
[torch.FloatTensor of size 1]

grad Variable containing:
 1401.9677
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [46]:
#6 without drop lamd =0.01 
test_work()

Start test
grad Variable containing:
 2317.4285
[torch.FloatTensor of size 1]

grad Variable containing:
 1504.4011
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 45.4244158183
=> Test set loss: 35.8871722412
grad Variable containing:
 1504.4011
[torch.FloatTensor of size 1]

grad Variable containing:
 1368.9613
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 33.0433653605
=> Test set loss: 31.6789344116
grad Variable containing:
 1368.9613
[torch.FloatTensor of size 1]

grad Variable containing:
 1299.6431
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 31.7259760763
=> Test set loss: 31.2949506104
grad Variable containing:
 1299.6431
[torch.FloatTensor of size 1]

grad Variable containing:
 1308.0544
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 31.0012339193
=> Test set loss: 29.9137347412
grad Variable containing:
 1308.0544
[torch.FloatTensor of size 1]

grad Variable containing:
 1309.4149
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [27]:
#6 without drop lamd =0.01 
test_work()

Start test
grad Variable containing:
 2307.6943
[torch.FloatTensor of size 1]

grad Variable containing:
 1552.4580
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 42.3662590108
=> Test set loss: 30.4274570435
grad Variable containing:
 1552.4580
[torch.FloatTensor of size 1]

grad Variable containing:
 1476.2120
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 29.3135714640
=> Test set loss: 28.1761595581
grad Variable containing:
 1476.2120
[torch.FloatTensor of size 1]

grad Variable containing:
 1440.9937
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 27.9515191264
=> Test set loss: 27.1351969727
grad Variable containing:
 1440.9937
[torch.FloatTensor of size 1]

grad Variable containing:
 1465.4238
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 27.0727358602
=> Test set loss: 26.1822535400
grad Variable containing:
 1465.4238
[torch.FloatTensor of size 1]

grad Variable containing:
 1450.9554
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [42]:
#6 without drop lamd =0
test_work()

Start test
grad Variable containing:
 2310.7456
[torch.FloatTensor of size 1]

grad Variable containing:
 2589.1865
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 44.6964374736
=> Test set loss: 34.3524466309
grad Variable containing:
 2589.1865
[torch.FloatTensor of size 1]

grad Variable containing:
 2675.6182
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 32.3168525004
=> Test set loss: 30.5847254883
grad Variable containing:
 2675.6182
[torch.FloatTensor of size 1]

grad Variable containing:
 2719.4648
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 30.6328585002
=> Test set loss: 30.1271101929
grad Variable containing:
 2719.4648
[torch.FloatTensor of size 1]

grad Variable containing:
 2768.7129
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 29.5340945089
=> Test set loss: 28.9078280151
grad Variable containing:
 2768.7129
[torch.FloatTensor of size 1]

grad Variable containing:
 2830.9885
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [32]:
#6 without drop lamd =1 
test_work()

Start test
grad Variable containing:
 2315.9141
[torch.FloatTensor of size 1]

grad Variable containing:
 257.7109
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 56.3725871623
=> Test set loss: 49.2809444824
grad Variable containing:
 257.7109
[torch.FloatTensor of size 1]

grad Variable containing:
 277.9843
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 48.6047262614
=> Test set loss: 47.9669462646
grad Variable containing:
 277.9843
[torch.FloatTensor of size 1]

grad Variable containing:
 302.8383
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 47.7194173869
=> Test set loss: 47.5160375732
grad Variable containing:
 302.8383
[torch.FloatTensor of size 1]

grad Variable containing:
 344.2684
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 47.4270903788
=> Test set loss: 47.1348263428
grad Variable containing:
 344.2684
[torch.FloatTensor of size 1]

grad Variable containing:
 360.5707
[torch.FloatTensor of size 1]

=> Epoch: 4 Average loss: 46.9

In [23]:
#6 without drop lamd =0.01 
test_work()

Start test
grad Variable containing:
 2196.9106
[torch.FloatTensor of size 1]

grad Variable containing:
 1258.3378
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 45.4643422384
=> Test set loss: 36.7712294556
grad Variable containing:
 1258.3378
[torch.FloatTensor of size 1]

grad Variable containing:
 1287.1506
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 35.5661980123
=> Test set loss: 33.6818182983
grad Variable containing:
 1287.1506
[torch.FloatTensor of size 1]

grad Variable containing:
 1407.0947
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 32.9452833455
=> Test set loss: 31.7150201904
grad Variable containing:
 1407.0947
[torch.FloatTensor of size 1]

grad Variable containing:
 1459.5656
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 31.5732136597
=> Test set loss: 30.8362505493
grad Variable containing:
 1459.5656
[torch.FloatTensor of size 1]

grad Variable containing:
 1510.6660
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

AssertionError: Hidden representation size must be equal to 20

In [260]:
#4 without drop lamd =1 
test_work()

Start test
grad Variable containing:
 2206.4917
[torch.FloatTensor of size 1]

grad Variable containing:
 231.6028
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 50.4122389872
=> Test set loss: 39.7399684448
grad Variable containing:
 231.6028
[torch.FloatTensor of size 1]

grad Variable containing:
 243.0682
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 38.0091780741
=> Test set loss: 36.0479500488
grad Variable containing:
 243.0682
[torch.FloatTensor of size 1]

grad Variable containing:
 241.3143
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 35.7221777771
=> Test set loss: 35.1394902100
grad Variable containing:
 241.3143
[torch.FloatTensor of size 1]

grad Variable containing:
 241.4347
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 35.2495640971
=> Test set loss: 34.8913871094
grad Variable containing:
 241.4347
[torch.FloatTensor of size 1]

grad Variable containing:
 242.1694
[torch.FloatTensor of size 1]

=> Epoch: 4 Average loss: 34.9

In [264]:
#4 without drop lamd = 0.001
test_work()

Start test
grad Variable containing:
 2217.4434
[torch.FloatTensor of size 1]

grad Variable containing:
 1427.7234
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 38.3192189596
=> Test set loss: 28.8169107788
grad Variable containing:
 1427.7234
[torch.FloatTensor of size 1]

grad Variable containing:
 1317.1708
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 27.3541346252
=> Test set loss: 25.4113202637
grad Variable containing:
 1317.1708
[torch.FloatTensor of size 1]

grad Variable containing:
 1279.4215
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 25.1807821635
=> Test set loss: 24.1128293335
grad Variable containing:
 1279.4215
[torch.FloatTensor of size 1]

grad Variable containing:
 1268.9510
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 23.8691389140
=> Test set loss: 23.0228090149
grad Variable containing:
 1268.9510
[torch.FloatTensor of size 1]

grad Variable containing:
 1245.6631
[torch.FloatTensor of size 1]

=> Epoch: 4 Average l

In [256]:
#4 without drop lamb = 0.1
test_work()

Start test
grad Variable containing:
 2210.5491
[torch.FloatTensor of size 1]

grad Variable containing:
 838.8389
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 40.2824123759
=> Test set loss: 29.7709755615
grad Variable containing:
 838.8389
[torch.FloatTensor of size 1]

grad Variable containing:
 743.3848
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 28.6478214172
=> Test set loss: 27.3785946655
grad Variable containing:
 743.3848
[torch.FloatTensor of size 1]

grad Variable containing:
 719.9584
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 27.2106614604
=> Test set loss: 26.2273687134
grad Variable containing:
 719.9584
[torch.FloatTensor of size 1]

grad Variable containing:
 682.5870
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 26.3505672668
=> Test set loss: 25.9053831421
grad Variable containing:
 682.5870
[torch.FloatTensor of size 1]

grad Variable containing:
 665.9402
[torch.FloatTensor of size 1]

=> Epoch: 4 Average loss: 26.1

In [252]:
#4 with drop
test_work()

Start test
grad Variable containing:
 2227.6106
[torch.FloatTensor of size 1]

grad Variable containing:
 731.2108
[torch.FloatTensor of size 1]

=> Epoch: 0 Average loss: 40.8876158488
=> Test set loss: 30.2893124023
grad Variable containing:
 731.2108
[torch.FloatTensor of size 1]

grad Variable containing:
 667.5630
[torch.FloatTensor of size 1]

=> Epoch: 1 Average loss: 28.9914867554
=> Test set loss: 27.8952964355
grad Variable containing:
 667.5630
[torch.FloatTensor of size 1]

grad Variable containing:
 638.4318
[torch.FloatTensor of size 1]

=> Epoch: 2 Average loss: 27.6915644714
=> Test set loss: 27.1802191650
grad Variable containing:
 638.4318
[torch.FloatTensor of size 1]

grad Variable containing:
 612.5133
[torch.FloatTensor of size 1]

=> Epoch: 3 Average loss: 27.3394573690
=> Test set loss: 26.8885466675
grad Variable containing:
 612.5133
[torch.FloatTensor of size 1]

grad Variable containing:
 589.3036
[torch.FloatTensor of size 1]

=> Epoch: 4 Average loss: 27.1

In [70]:
#x = torch.rand(3,2,requires_grad=True)
x = torch.Tensor([[1., -1.], [1., -1.]] )
loss = torch.sum(torch.abs(x))
loss.backward()
## gradient should be all one
x.grad

AttributeError: 'float' object has no attribute 'backward'

In [102]:
x = torch.FloatTensor([-2] )
print(x)
x = Variable(x, requires_grad=True)
loss = torch.sum(torch.abs(x))
loss.backward()
## gradient should be all one
x.grad.data


-2
[torch.FloatTensor of size 1]




-1
[torch.FloatTensor of size 1]

In [65]:
torch.tensor()

TypeError: 'module' object is not callable