In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [73]:
import torchvision
from torchvision import datasets, transforms

batch_size = 128
lr = 0.0002

# data_loader
img_size = 64
transform = transforms.Compose([
#         transforms.Scale(img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True, transform=transform),
    batch_size=batch_size, shuffle=True)

In [38]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 10, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(10 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [69]:
net = Net().float()
net.cuda()

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 10, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=250, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [40]:
# import numpy as np
x = np.random.randn(1,1,28,28)
x = Variable(torch.from_numpy(x))
net(x)

Variable containing:
-0.0740  0.0971 -0.0175  0.0679 -0.0049  0.1140 -0.0614 -0.0920  0.0104  0.0421
[torch.DoubleTensor of size (1,10)]

In [28]:
# net.double()

In [77]:
# create your optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
# criterion = nn.BCEWithLogitsLoss()
criterion = F.nll_loss


# # in your training loop:
# output = net(input)
# loss = criterion(output, target)
# optimizer.zero_grad()   # zero the gradient buffers
# loss.backward()
# optimizer.step()    # Does the update

In [78]:
# import os, time
train_epoch = 20
print('training start!')
start_time = time.time()
for epoch in range(train_epoch):
    D_losses = []
    G_losses = []
    epoch_start_time = time.time()
    for x, y in train_loader:
        x,y = Variable(x.cuda()), Variable(y.cuda())
        output = net(x)
        loss = criterion(output,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    epoch_end_time = time.time()
    per_epoch_ptime = epoch_end_time - epoch_start_time
    print('[%d/%d] - ptime: %.2f, loss: %.3f' % ((epoch + 1), train_epoch, per_epoch_ptime,loss))

training start!
[1/20] - ptime: 3.37, loss: nan
[2/20] - ptime: 3.25, loss: nan
[3/20] - ptime: 3.26, loss: nan


KeyboardInterrupt: 

In [67]:
list(iter(net.parameters()))

[Parameter containing:
 (0 ,0 ,.,.) = 
   0.1142 -0.1127  0.1549  0.0980 -0.0949
  -0.0564  0.1106 -0.0915  0.1552 -0.0260
  -0.1157  0.1807  0.0523 -0.1858 -0.1594
   0.1828 -0.1434  0.0024 -0.1579 -0.0143
  -0.0889  0.1193  0.1695 -0.1254  0.0612
 
 (1 ,0 ,.,.) = 
   0.1500  0.0311 -0.0691  0.0558  0.0506
   0.0522 -0.0571  0.0384 -0.1067  0.1101
  -0.1783  0.0162 -0.0759 -0.1376 -0.1218
   0.0526 -0.1895 -0.0430 -0.1406 -0.1449
  -0.1483 -0.1761  0.1565  0.1516 -0.0942
 
 (2 ,0 ,.,.) = 
  -0.1552  0.1308 -0.1287  0.1107  0.0552
   0.1536 -0.0190 -0.1319  0.1729 -0.1083
  -0.0777 -0.1950  0.1829 -0.1572  0.0569
   0.0093 -0.0777 -0.1810  0.0573  0.1508
  -0.0238 -0.1650  0.1413 -0.0412 -0.0452
 
 (3 ,0 ,.,.) = 
  -0.1334  0.1595 -0.1003 -0.0029  0.0489
  -0.1970 -0.1474 -0.0739  0.1113 -0.0142
  -0.1154 -0.0462  0.0592 -0.0069  0.0551
  -0.0086 -0.0472  0.0565 -0.1754 -0.0094
  -0.1247  0.0253  0.0119  0.0068 -0.1632
 
 (4 ,0 ,.,.) = 
  -0.1007 -0.0019  0.0184 -0.0335 -0.1650
   0.02

In [79]:
labels_one_hot = torch.FloatTensor(128, k, 10).zero_()
labels_one_hot.scatter_(2, x, 1)
labels_one_hot

NameError: name 'k' is not defined