In [4]:
import torch
import os
import random
import numpy as np
from torch import nn
import torch.optim as optim



def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
#randomize some params in for different functions

class Net(nn.Module):  # inheritance in python
    def __init__(self, in_shape: int, out_shape: int):
        super().__init__()
        # in_shape = 300 , ##out_shape = 4 which are numbers of categories. This basically creates a "weight matrix of in_shape x out_shape"
        self.linearTrans = nn.Linear(in_shape, out_shape, bias=True)

        nn.init.constant_(self.linearTrans.bias.data, 0)
        # for now we make all bias values to 0
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.linearTrans(x)
        x = self.softmax(x)
        return x


seed_everything()
filedir_in = '../Data/Output/Chapter8'
modelpath = '../Data/Output/Chapter8/torch_single_layer.pth'

train_path = os.path.join(filedir_in, 'train.pt')
x_train = torch.load(train_path)

net = Net(in_shape=x_train.shape[1], out_shape=4) #in shape is just 300

train_label_path = os.path.join(filedir_in, 'train_label.pt')
y_label = torch.load(train_label_path)
y_label = torch.nn.functional.one_hot(y_label.long()).to(torch.float)
##this is just basically creating a bunch of vectors of the form [0,1,0,0] or [1,0,0,0]. Basically, only one "1" in the four element vector to indicate category

criterion = nn.BCELoss()
# The loss function
optimizer = optim.SGD(net.parameters(),
                        lr=0.001,
                        momentum=0.9)
#Applying stochastic gradient descent; lr stands for learning rate
for epoch in range(100):
    optimizer.zero_grad()
        #reset the gradient of the parameters for accumulated training datas in current epoch
    y_pred = net(x_train)
    #calling the forward function
    loss = criterion(y_pred, y_label)
    loss.backward()
    #calculate the gradient, backpropagation; calculate the derivative of loss function with respect to the paramenters to be optimized
    optimizer.step()

    if epoch % 10 == 0:
        print('epoch: {}, loss: {:.4f}'.format(epoch+1, loss))
torch.save(net.state_dict(), modelpath)

epoch: 1, loss: 0.5623
epoch: 11, loss: 0.5619
epoch: 21, loss: 0.5609
epoch: 31, loss: 0.5599
epoch: 41, loss: 0.5588
epoch: 51, loss: 0.5577
epoch: 61, loss: 0.5566
epoch: 71, loss: 0.5555
epoch: 81, loss: 0.5545
epoch: 91, loss: 0.5535
