In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 8, 5, padding=2)
        self.conv2 = nn.Conv2d(8, 8, 5, padding=2)
        self.conv3 = nn.Conv2d(8, 16, 3, padding=1)
        self.conv4 = nn.Conv2d(16, 16, 3, padding=1)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16*32*32, 64)
        self.fc2 = nn.Linear(64, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, (2, 2))
        
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x, (2, 2))
        
        # If the size is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


model = Net()
print(model)

Net(
  (conv1): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(8, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=16384, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)


In [31]:
X = torch.randn(5, 3, 128, 128)
y = torch.randn(5, 1)

In [32]:
def train(X, y, model, epoch=10, batch_size=None, optimizer=None, criterion=None):
    
    if batch_size is None:
        batch_size = X.size()[0]
        
    for t in range(epoch):
        permutation = torch.randperm(X.size()[0])
        
        for i in range(0, X.size()[0], batch_size):

            indices = permutation[i:i+batch_size]
            batch_x, batch_y = X[indices], y[indices]
        
            batch_y_pred = model(batch_x)
            loss = criterion(batch_y_pred, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        print ("epoch %d, loss %.6f"%(t, loss))

In [35]:
optimizer = optim.Adam(net.parameters())
criterion = nn.MSELoss()

In [36]:
train(X, y, net, optimizer=optimizer, criterion=criterion)

epoch 0, loss 0.448529
epoch 1, loss 0.316407
epoch 2, loss 0.415447
epoch 3, loss 0.331212
epoch 4, loss 0.287186
epoch 5, loss 0.297790
epoch 6, loss 0.305666
epoch 7, loss 0.296974
epoch 8, loss 0.276058
epoch 9, loss 0.254486


In [None]:
oof_preds = np.zeros(y_train.shape)

for n_fold, (trn_idx, val_idx) in enumerate(folds.split(X_train)):
    trn_x, val_x = X_train[trn_idx], X_train[val_idx]
    trn_y, val_y = y_train[trn_idx], y_train[val_idx]
        
    train(X, y, model, optimizer=optimizer, criterion=criterion)
    with torch.no_grad():
        oof_preds[val_idx] = model.forward(val_x)