In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# torch.backends.cudnn.benchmark = True

In [3]:
class LinearModel(nn.Module):
    def __init__(self, in_features, inter_features, out_features, bias=True):
        super(LinearModel, self).__init__()
        self.bias = bias
        self.training = True
        self.weight_param1 = nn.Parameter(
            torch.FloatTensor(inter_features, in_features).normal_(0.0, 0.01)
        )
        self.weight_param2 = nn.Parameter(
            torch.FloatTensor(out_features, inter_features).normal_(0.0, 0.01)
        )
        if self.bias:
            self.bias_param1 = nn.Parameter(
                torch.FloatTensor(inter_features).fill_(0.)
            )
            self.bias_param2 = nn.Parameter(
                torch.FloatTensor(out_features).fill_(0.)
            )
    def set_mode(self, training):
        self.training = training
    def forward(self, x):
        # the shape of x should be batch_size x in_features
        x = F.linear(input=x, weight=self.weight_param1,
            bias=self.bias_param1 if self.bias else None)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.linear(input=x, weight=self.weight_param2,
            bias=self.bias_param2 if self.bias else None)
        return x

In [4]:
if __name__ == "__main__":
    # define the model
    model = LinearModel(64, 32, 4, True)
    model = model.to(device)

    # loss function
    criterion = nn.MSELoss()

    # data, batch_size = 1000
    # the data is random numbers, so the following training results are meaningless
    input_data  = torch.randn(1000, 64).to(device)
    target_data = torch.randn(1000, 4).to(device)
    test_input  = torch.randn(200, 64).to(device)
    test_target = torch.randn(200, 4).to(device)

    # optimizer
    optimizer = optim.SGD(params=model.parameters(), lr=0.01)

    # train for 100 epochs
    for epoch in range(100): # iterate through epochs
        torch.cuda.empty_cache()
        for i in range(1000): # iterate through dataset
            # set to training mode
            model.set_mode(True)
            # clear grad cache
            model.zero_grad()
            optimizer.zero_grad()
            # forward
            input  = input_data[i,:].unsqueeze(0)
            target = target_data[i,:].unsqueeze(0)
            out = model(input)
            loss = criterion(out, target)
            # backward
            loss.backward()
            optimizer.step()
        #  after each epoch: check loss on test set
        model.set_mode(False)
        with torch.no_grad():
            out_test = model(test_input)
            loss_test = criterion(out_test, test_target)
            print("epoch %d: test loss %.4f" % (epoch, loss.item()))

epoch 0: test loss 2.1063
epoch 1: test loss 2.0717
epoch 2: test loss 2.0460
epoch 3: test loss 2.1756
epoch 4: test loss 2.0124
epoch 5: test loss 2.1441
epoch 6: test loss 1.8370
epoch 7: test loss 1.8810
epoch 8: test loss 1.9047
epoch 9: test loss 1.6981
epoch 10: test loss 1.7473
epoch 11: test loss 2.8441
epoch 12: test loss 2.0492
epoch 13: test loss 1.5215
epoch 14: test loss 1.5431
epoch 15: test loss 1.8616
epoch 16: test loss 1.5933
epoch 17: test loss 1.4065
epoch 18: test loss 0.9138
epoch 19: test loss 2.3036
epoch 20: test loss 1.7263
epoch 21: test loss 1.5956
epoch 22: test loss 1.4211
epoch 23: test loss 1.2523
epoch 24: test loss 1.7488
epoch 25: test loss 1.4299
epoch 26: test loss 2.0662
epoch 27: test loss 1.8751
epoch 28: test loss 0.7659
epoch 29: test loss 1.6802
epoch 30: test loss 1.7868
epoch 31: test loss 2.0200
epoch 32: test loss 0.9676
epoch 33: test loss 1.3913
epoch 34: test loss 1.5627
epoch 35: test loss 1.5456
epoch 36: test loss 0.7565
epoch 37: t