In [30]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim
import torchvision

In [None]:
class ProgNet(nn.Module):
    def __init__(self,depth):
        super(ProgNet,self).__init__()
        
        self.columns = nn.ModuleList([])
        self.depth = depth
        
    
    def forward(self,x,task_id=-1):
        assert self.columns
        inputs = [col[0](x) for col in self.columns]
        
        for l in range(1,self.depth):
            out = []
            
            for i,col in enumerate(self.columns):
                out.append(col[l](inputs[:i+1]))
        
        return out[task_id]
        
    def new_task(self):
        
        

In [3]:
net = nn.Sequential(
            nn.Linear(1,16),
            nn.Linear(16,32),
            nn.Linear(32,1))

In [5]:
dir(net)

['__call__',
 '__class__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backend',
 '_backward_hooks',
 '_buffers',
 '_construct',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_item_by_idx',
 '_get_name',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_modules',
 '_named_members',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_tracing_name',
 '_version',
 'add_module',
 'apply',
 'buffers',
 'children',
 'cpu',
 'cuda',
 'double',
 'dump_patches',
 'eval',
 '

In [84]:
net[1]

Linear(in_features=16, out_features=32, bias=True)

# MNIST Classification

In [19]:
#data loading
batch_size_train = 64
batch_size_test = 1000
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./data', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [20]:
data = next(iter(train_loader))

In [26]:
img,_ = data
print(img[0].view(-1).shape)

torch.Size([784])


In [76]:
#model definition
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        
        self.conv_layers = nn.Sequential(
        nn.Conv2d(1,4,3,stride = 1, padding = 1), #out: (B,4,28,28)
        nn.ReLU(), 
        nn.BatchNorm2d(4),
        nn.MaxPool2d(2,stride = 2), #out: (B,4,14,14)
        nn.Conv2d(4,16,3,stride = 1, padding = 1), #out: (B,16,14,14)
        nn.ReLU(), 
        nn.BatchNorm2d(16),
        nn.MaxPool2d(2,stride = 2), #out: (B,16,7,7)
        nn.Conv2d(16,32,3,stride = 1, padding = 1), #out: (B,32,7,7)
        nn.ReLU(), 
        nn.BatchNorm2d(32),
        nn.MaxPool2d(2,stride = 2)) #out: (B,32,3,3)
        
        self.classifier = nn.Sequential(
        nn.Linear(in_features = 32*3*3,out_features = 128),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(in_features = 128, out_features = 64), 
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(in_features = 64, out_features = 10))
        
    def forward(self,x):
        x = self.conv_layers(x)
        x = x.view(x.shape[0],-1)
        x = self.classifier(x)
        return x

In [77]:
model = MyNet()
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of parameters is: {}".format(params))
print(model)

Total number of parameters is: 51274
MyNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=288, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inp

In [78]:
#Training
learning_rate = 0.001
num_epochs = 10

optimizer = optim.Adam(model.parameters(),
                       lr = learning_rate)
criterion = F.cross_entropy


In [79]:
train_losses = []
model.train()
for epoch in range(num_epochs):
    train_loss = 0
    for batch_idx, data in enumerate(train_loader):
        img, y = data
        optimizer.zero_grad()
        # forward
        y_pred = model(img)
        loss = criterion(y_pred, y,reduction = 'mean')
        # backward
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch + 1, num_epochs, train_loss / len(train_loader)))

    train_losses.append(train_loss/ len(train_loader))




epoch [1/10], loss:0.3411
epoch [2/10], loss:0.1091
epoch [3/10], loss:0.0876
epoch [4/10], loss:0.0727
epoch [5/10], loss:0.0657
epoch [6/10], loss:0.0600
epoch [7/10], loss:0.0523
epoch [8/10], loss:0.0496
epoch [9/10], loss:0.0469
epoch [10/10], loss:0.0454


In [45]:
288*128

36864

In [65]:
i = torch.randn(3,5,requires_grad = True) 
target = torch.randint(5,(3,), dtype = torch.int64)

In [66]:
target

tensor([3, 3, 2])

In [67]:
i

tensor([[-0.4914, -0.7180, -2.0485,  1.2676, -0.7600],
        [-0.4588,  0.2163,  0.7504, -0.7150,  0.9818],
        [ 0.5471, -1.5557, -0.8752,  0.5892, -1.0807]], requires_grad=True)

In [86]:
seq_net = nn.Sequential(
        nn.Conv2d(1,4,3,stride = 1, padding = 1), 
        nn.BatchNorm2d(4),
        nn.MaxPool2d(2,stride = 2),
        nn.Conv2d(4,16,3,stride = 1, padding = 1),
        nn.ReLU(), 
        nn.BatchNorm2d(16),
        nn.MaxPool2d(2,stride = 2), 
        nn.Conv2d(16,32,3,stride = 1, padding = 1), 
        nn.ReLU(), 
        nn.BatchNorm2d(32),
        nn.MaxPool2d(2,stride = 2),
        nn.Linear(in_features = 32*3*3,out_features = 128),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(in_features = 128, out_features = 64), 
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(in_features = 64, out_features = 10))

In [89]:
for layer in seq_net: 
    print(layer.weight)

Parameter containing:
tensor([[[[ 0.0228, -0.2719, -0.1253],
          [ 0.1473, -0.3019, -0.0158],
          [ 0.0801,  0.3254,  0.2780]]],


        [[[-0.1355,  0.0735,  0.0446],
          [-0.2309,  0.0472,  0.0460],
          [ 0.3166, -0.1354,  0.1199]]],


        [[[ 0.2010, -0.2906,  0.1148],
          [-0.0604, -0.2402,  0.1401],
          [-0.1778, -0.2526,  0.2906]]],


        [[[ 0.2458, -0.1598, -0.3101],
          [-0.3148,  0.0281, -0.0030],
          [-0.0711,  0.0145,  0.1097]]]], requires_grad=True)
Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True)


AttributeError: 'MaxPool2d' object has no attribute 'weight'