In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# torch.backends.cudnn.benchmark = True

In [3]:
class LinearModel(nn.Module):
    def __init__(self, in_features, inter_features, out_features, bias=True):
        super(LinearModel, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=inter_features, bias=bias),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=inter_features, out_features=out_features, bias=bias)
        )
        self.reset_parameters()
    def reset_parameters(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    def forward(self, x):
        # the shape of x should be batch_size x in_features
        x = self.layer(x)
        return x

In [4]:
if __name__ == "__main__":
    # define the model
    model = LinearModel(64, 32, 4, True)
    model = model.to(device)

    # loss function
    criterion = nn.MSELoss()

    # data, batch_size = 1000
    input_data  = torch.randn(1000, 64).to(device)
    target_data = torch.randn(1000, 4).to(device)
    test_input  = torch.randn(200, 64).to(device)
    test_target = torch.randn(200, 4).to(device)

    # optimizer
    optimizer = optim.SGD(params=model.parameters(), lr=0.01)

    # train for 100 epochs
    for epoch in range(100): # iterate through epochs
        torch.cuda.empty_cache()
        for i in range(1000): # iterate through dataset
            # set to training mode
            model.train()
            # clear grad cache
            model.zero_grad()
            optimizer.zero_grad()
            # forward
            input  = input_data[i,:].unsqueeze(0)
            target = target_data[i,:].unsqueeze(0)
            out = model(input)
            loss = criterion(out, target)
            # backward
            loss.backward()
            optimizer.step()
        #  after each epoch: check loss on test set
        model.eval()
        with torch.no_grad():
            out_test = model(test_input)
            loss_test = criterion(out_test, test_target)
            print("epoch %d: test loss %.4f" % (epoch, loss.item()))

epoch 0: test loss 1.9764
epoch 1: test loss 2.0172
epoch 2: test loss 1.9989
epoch 3: test loss 1.9635
epoch 4: test loss 1.9897
epoch 5: test loss 2.0383
epoch 6: test loss 2.5544
epoch 7: test loss 1.7550
epoch 8: test loss 2.2068
epoch 9: test loss 2.1446
epoch 10: test loss 1.8300
epoch 11: test loss 2.0583
epoch 12: test loss 2.2782
epoch 13: test loss 2.1492
epoch 14: test loss 2.2195
epoch 15: test loss 2.1914
epoch 16: test loss 1.8402
epoch 17: test loss 1.8640
epoch 18: test loss 1.6344
epoch 19: test loss 2.1087
epoch 20: test loss 1.9724
epoch 21: test loss 1.8955
epoch 22: test loss 1.9944
epoch 23: test loss 1.6867
epoch 24: test loss 1.3211
epoch 25: test loss 1.4139
epoch 26: test loss 1.7521
epoch 27: test loss 1.7928
epoch 28: test loss 1.9752
epoch 29: test loss 2.0323
epoch 30: test loss 2.0787
epoch 31: test loss 1.5781
epoch 32: test loss 1.7483
epoch 33: test loss 1.8882
epoch 34: test loss 1.7926
epoch 35: test loss 1.8656
epoch 36: test loss 1.7317
epoch 37: t