# Mastering PyTorch

## Supervised learning

### Powerful PyTorch

#### Accompanying notebook to Video 1.1

In [1]:
# Import libs
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import random
import time

In [2]:
# Setup globals
batch_size = 1
in_features = 10
hidden = 20
out_features = 1

In [3]:
# Sequential API example
# Create model
model = nn.Sequential(
    nn.Linear(in_features, hidden),
    nn.ReLU(),
    nn.Linear(hidden, out_features)
)
print(model)

Sequential(
  (0): Linear(in_features=10, out_features=20, bias=True)
  (1): ReLU()
  (2): Linear(in_features=20, out_features=1, bias=True)
)


In [4]:
# Create dummy input
x = Variable(torch.randn(batch_size, in_features))
# Run forward pass
output = model(x)
print(output)

Variable containing:
-0.1792
[torch.FloatTensor of size (1,1)]



In [5]:
# Functional API example
# Create model
class CustomNet(nn.Module):
    def __init__(self, in_features, hidden, out_features):
        """
        Create three linear layers
        """
        super(CustomNet, self).__init__()
        self.linear1 = nn.Linear(in_features, hidden)
        self.linear2 = nn.Linear(hidden, hidden)
        self.linear3 = nn.Linear(hidden, out_features)

    def forward(self, x):
        """
        Draw a random number from [0, 10]. 
        If it's 0, skip the second layer. Otherwise loop it!
        """
        x = F.relu(self.linear1(x))
        while random.randint(0, 10) != 0: 
        #while x.norm() > 2:
            print('2nd layer used')
            x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

custom_model = CustomNet(in_features, hidden, out_features)
print(custom_model)

CustomNet(
  (linear1): Linear(in_features=10, out_features=20, bias=True)
  (linear2): Linear(in_features=20, out_features=20, bias=True)
  (linear3): Linear(in_features=20, out_features=1, bias=True)
)


In [6]:
# Run forward pass with same dummy variable
output = custom_model(x)
print(output)

2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
2nd layer used
Variable containing:
1.00000e-02 *
 -9.7912
[torch.FloatTensor of size (1,1)]



In [7]:
# ConvNet example

![ConvNet](images/conv_functional2.png)

In [8]:
# Debug example
# Create Convnet
class ConvNet(nn.Module):
    def __init__(self, in_channels, hidden, out_features):
        """
        Create ConvNet with two parallel convolutions
        """
        super(ConvNet, self).__init__()
        self.conv1_1 = nn.Conv2d(in_channels=in_channels,
                                 out_channels=10,
                                 kernel_size=3,
                                 padding=1)
        self.conv1_2 = nn.Conv2d(in_channels=in_channels,
                                 out_channels=10,
                                 kernel_size=3,
                                 padding=1)
        self.conv2 = nn.Conv2d(in_channels=20,
                               out_channels=1,
                               kernel_size=3,
                               padding=1)
        self.linear1 = nn.Linear(hidden, out_features)

    def forward(self, x):
        """
        Pass input through both ConvLayers and stack them afterwards
        """
        x1 = F.relu(self.conv1_1(x))
        x2 = F.relu(self.conv1_2(x))
        x = torch.cat((x1, x2), dim=1)
        x = self.conv2(x)
        print('x size (after conv2): {}'.format(x.shape))
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        return x
    
conv_model = ConvNet(in_channels=3, hidden=576, out_features=out_features)
# Create dummy input
x_conv = Variable(torch.randn(batch_size, 3, 24, 24))

In [9]:
# Run forward pass
output = conv_model(x_conv)
print(output)

x size (after conv2): torch.Size([1, 1, 24, 24])
Variable containing:
 0.3195
[torch.FloatTensor of size (1,1)]



In [10]:
## Dataset / DataLoader example
# Create a random Dataset
class RandomDataset(Dataset):
    def __init__(self, nb_samples, consume_time=False):
        self.data = torch.randn(nb_samples, in_features)
        self.target = torch.randn(nb_samples, out_features)
        self.consume_time=consume_time

    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]

        # Transform data
        x = x + torch.FloatTensor(x.shape).normal_() * 1e-2
        
        if self.consume_time:
            # Do some time consuming operation
            for i in xrange(5000000):
                j = i + 1

        return x, y

    def __len__(self):
        return len(self.data)

In [11]:
# Training loop
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
def train(loader):
    for batch_idx, (data, target) in enumerate(loader):
        # Wrap data and target into a Variable
        data, target = Variable(data), Variable(target)

        # Clear gradients
        optimizer.zero_grad()

        # Forward pass
        output = model(data)

        # Calculate loss
        loss = criterion(output, target)

        # Backward pass
        loss.backward()

        # Weight update
        optimizer.step()

        print('Batch {}\tLoss {}'.format(batch_idx, loss.data.numpy()[0]))

In [12]:
# Create Dataset
data = RandomDataset(nb_samples=30)
# Create DataLoader
loader = DataLoader(dataset=data,
                    batch_size=batch_size,
                    num_workers=0,
                    shuffle=True)

In [13]:
# Start training
t0 = time.time()
train(loader)
time_fast = time.time() - t0
print('Training finished in {:.2f} seconds'.format(time_fast))

Batch 0	Loss 0.0100054359064
Batch 1	Loss 0.325271308422
Batch 2	Loss 0.206538036466
Batch 3	Loss 0.643678188324
Batch 4	Loss 0.0062838695012
Batch 5	Loss 3.43429660797
Batch 6	Loss 2.78484129906
Batch 7	Loss 1.80068182945
Batch 8	Loss 1.17312073708
Batch 9	Loss 1.64429140091
Batch 10	Loss 0.14378747344
Batch 11	Loss 0.900345027447
Batch 12	Loss 2.21566462517
Batch 13	Loss 0.264903515577
Batch 14	Loss 0.652967095375
Batch 15	Loss 0.696649849415
Batch 16	Loss 0.0397122353315
Batch 17	Loss 1.01731193066
Batch 18	Loss 0.131678208709
Batch 19	Loss 0.00380472932011
Batch 20	Loss 0.0193933118135
Batch 21	Loss 1.26183438301
Batch 22	Loss 0.38179564476
Batch 23	Loss 1.83552241325
Batch 24	Loss 0.342501401901
Batch 25	Loss 1.9629894495
Batch 26	Loss 1.65481686592
Batch 27	Loss 1.07737565041
Batch 28	Loss 0.597848892212
Batch 29	Loss 1.65039634705
Training finished in 0.06 seconds


In [14]:
# Create time consuming Dataset
data_slow = RandomDataset(nb_samples=30, consume_time=True)
loader_slow = DataLoader(dataset=data_slow,
                         batch_size=batch_size,
                         num_workers=0,
                         shuffle=True)
# Start training
t0 = time.time()
train(loader_slow)
time_slow = time.time() - t0
print('Training finished in {:.2f} seconds'.format(time_slow))

Batch 0	Loss 0.000563835317735
Batch 1	Loss 0.859951257706
Batch 2	Loss 0.21441693604
Batch 3	Loss 0.296080857515
Batch 4	Loss 1.13179647923
Batch 5	Loss 1.75049352646
Batch 6	Loss 0.643922448158
Batch 7	Loss 0.599510550499
Batch 8	Loss 0.901458740234
Batch 9	Loss 0.486395895481
Batch 10	Loss 1.80750954151
Batch 11	Loss 0.657498240471
Batch 12	Loss 0.00631656683981
Batch 13	Loss 0.657842814922
Batch 14	Loss 0.14444668591
Batch 15	Loss 1.89582431316
Batch 16	Loss 0.279611676931
Batch 17	Loss 0.223057597876
Batch 18	Loss 3.22094893456
Batch 19	Loss 2.88641214371
Batch 20	Loss 0.440565705299
Batch 21	Loss 4.95409011841
Batch 22	Loss 1.73405170441
Batch 23	Loss 3.30987095833
Batch 24	Loss 0.421838968992
Batch 25	Loss 1.42132568359
Batch 26	Loss 0.472670763731
Batch 27	Loss 5.29290962219
Batch 28	Loss 0.176814392209
Batch 29	Loss 1.1817561388
Training finished in 12.68 seconds


In [15]:
loader_slow_multi_proc = DataLoader(dataset=data_slow,
                                    batch_size=batch_size,
                                    num_workers=4,
                                    shuffle=True)
# Start training
t0 = time.time()
train(loader_slow_multi_proc)
time_multi_proc = time.time() - t0
print('Training finished in {:.2f} seconds'.format(time_multi_proc))

Batch 0	Loss 0.258114695549
Batch 1	Loss 0.364760160446
Batch 2	Loss 4.24717903137
Batch 3	Loss 0.372549623251
Batch 4	Loss 1.21262454987
Batch 5	Loss 0.654698431492
Batch 6	Loss 1.12767219543
Batch 7	Loss 1.16502165794
Batch 8	Loss 1.72392129898
Batch 9	Loss 0.812119007111
Batch 10	Loss 0.0791780874133
Batch 11	Loss 1.08515655994
Batch 12	Loss 4.75121450424
Batch 13	Loss 0.227758780122
Batch 14	Loss 7.81665803515e-05
Batch 15	Loss 0.371630400419
Batch 16	Loss 0.380750238895
Batch 17	Loss 0.0600721649826
Batch 18	Loss 1.81422591209
Batch 19	Loss 0.851997315884
Batch 20	Loss 2.73495793343
Batch 21	Loss 0.132338806987
Batch 22	Loss 2.66250491142
Batch 23	Loss 2.19154214859
Batch 24	Loss 0.11320348084
Batch 25	Loss 0.596353948116
Batch 26	Loss 0.495449334383
Batch 27	Loss 1.47845554352
Batch 28	Loss 0.45920842886
Batch 29	Loss 0.0638920143247
Training finished in 7.44 seconds
