In [1]:
import math
import torch
import os
import random
import numpy as np
from torch import nn

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


class Net(nn.Module):  # inheritance in python
    def __init__(self, in_shape: int, out_shape: int):
        super().__init__()
        # in_shape = 300 , ##out_shape = 4 which are numbers of categories. This basically creates a "weight matrix of out_shape x in_shape"
        self.linearTrans = nn.Linear(in_shape, out_shape, bias=True)

        nn.init.constant_(self.linearTrans.bias.data, 0)
        # for now we make all bias values to 0
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.linearTrans(x)
        x = self.softmax(x)
        return x



seed_everything()
filedir_in = '../Data/Output/Chapter8/'

train_path = os.path.join(filedir_in, 'Ex70-train.pt')
x_train = torch.load(train_path)

net = Net(in_shape=x_train.shape[1], out_shape=4)
y_pred = net(x_train)

train_label_path = os.path.join(filedir_in, 'ex70-train_label.pt')
y_label = torch.load(train_label_path)
y_label = torch.nn.functional.one_hot(y_label.long()).to(torch.float)

loss = nn.BCELoss()
output = loss(y_pred, y_label)
print(output)
output.backward()
#backward propagation, which basically optimizes the parameter by using gradient descent
print(net.linearTrans.weight.grad)
print(y_label)
print(y_pred)
# assert net.linearTrans.weight.grad.shape == net.linearTrans.weight.shape
print(-math.log(0.25)*4)

tensor(0.5623, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[ 2.1097e-06, -1.6584e-06, -4.6904e-07,  ..., -1.1038e-06,
          1.3178e-06,  3.3685e-06],
        [-7.4744e-07,  5.0572e-07,  1.3389e-07,  ...,  4.3032e-07,
         -4.4466e-07, -1.1497e-06],
        [-7.0701e-07,  5.9917e-07,  1.7039e-07,  ...,  3.5053e-07,
         -4.5378e-07, -1.1540e-06],
        [-6.5526e-07,  5.5355e-07,  1.6476e-07,  ...,  3.2295e-07,
         -4.1934e-07, -1.0648e-06]])
tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])
tensor([[0.2500, 0.2500, 0.2500, 0.2500],
        [0.2500, 0.2500, 0.2500, 0.2500],
        [0.2500, 0.2500, 0.2500, 0.2500],
        ...,
        [0.2501, 0.2502, 0.2498, 0.2499],
        [0.2524, 0.2479, 0.2507, 0.2491],
        [0.2586, 0.2479, 0.2540, 0.2395]], grad_fn=<SoftmaxBackward0>)
5.545177444479562
