<a href="https://colab.research.google.com/github/MatteoGuglielmi-tech/Polarity-and-Subjectivity-Detection/blob/main/src/MyModel/NTN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### NTN (Neural Tensor Network) [[reference_paper](https://proceedings.neurips.cc/paper/2013/file/b337e84de8752b27eda3a12363109e80-Paper.pdf)]

<u><i>Goal</i></u> : state if two entities $(e_1, e_2)$ are in a certain relationship $R$.   
>Ex. defines whehter $$(e_1, R, e_2) = (\text{Bengal tiger}, \text{has part}, \text{tail})$$ is true and with which certainty.

- $e_1$ and $e_2$ are vector representations or features of the two entities.
- NTN, unlike a linear canoncical NN layer, uses a bilinear tensor layer that directly relates two entity vectors across differet dimensions.
- Model computes a score of how likely it is two entities are in a specific position following : $$g(e_1, R, e_2) = u_R^Tf\biggr(e_i^T W_R^{[1:K]}e_2+V_R \begin{align}
    \begin{bmatrix}
           e_{1} \\
           e_{2} \\
         \end{bmatrix}
  \end{align} + b_R\Biggl)$$  
where : 
- $f=\tanh$
- $W_R^{[1:K]} \in \mathbb{R}^{d\times d\times k}$ is a multi-dimensional tensor
- $e_1^TW_R^{[1:k]}e_2=h\in\mathbb{R}$ is the bilinear tensor
- $V_R \in \mathbb{R}^{k\times2d}$, $U \in \mathbb{R}^K$, $b_R\in \mathbb{R}^K$ are NN parameters




In [1]:
import torch
import torch.nn as nn
from typing import Tuple
import numpy as np

In [2]:
class NeuralTensorNetwork(nn.Module):
    def __init__(self, output_dim: int, input_dim: int, activation: str="tanh", mean: float=0.0, std: float=1.0, final_layer: str='linear'):
        
        super(NeuralTensorNetwork, self).__init__()

        # setting input and output dimensions
        self.k = output_dim
        self.d = input_dim # e1,e2

        # setting mean and std for random initialization
        self.mean = mean
        self.std = std

        self.activation = activation

        # parameters has been used in order to consider W, V, b as model parameters
        # inference -> they'll be optimized

        # normal sampling -> https://pytorch.org/docs/stable/generated/torch.normal.html
        # parameter -> https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html#torch.nn.parameter.Parameter
        self.W = nn.Parameter(torch.normal(self.mean, self.std, size=(self.k, self.d, self.d)))
        self.V = nn.Parameter(torch.normal(self.mean, self.std, size=(2*self.d, self.k)))
        self.b = nn.Parameter(torch.zeros(size=(self.d,)))
        
        if activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        elif self.activation == 'relu':
            self.activation = nn.ReLU()
        # checking for a good activation function
        else:
            raise ValueError('Possible activation choices are tanh, sigmoid or ReLU')

    def forward(self, inputs: Tuple[torch.Tensor, torch.Tensor]):

        # getting the entities
        e1 = inputs[0]
        e2 = inputs[1]

        # input tensor should be of shape (batch_size, padded_length, 768)
        batch_size = e1[0]
        k = self.k
        d = self.d

        # bilinear tensor + bias
        bil_bias = [torch.sum((e2 * torch.dot(e1, self.W[0])) + self.b, axis=1)]
        for i in range(1,k):
            bil_bias.append(torch.sum((e2*torch.dot(e1, self.W[i]))) + self.b, axis=1)
        bil_bias = torch.reshape(torch.cat(bil_bias, axis=0), (batch_size, k))

        # Vr * [e1, e2]
        rest = torch.dot(torch.cat([e1,e2]), self.V)

        e1_R_e2 = bil_bias + rest

        # applying activation
        f = self.activation(e1_R_e2)
        g = nn.Linear(f)

        return f


In [3]:
import numpy as np


# Dummy training data
x_train1 = np.random.random((1000, 300))
x_train2 = np.random.random((1000, 300))
y_train = np.random.random((1000, 1))

# Dummy validation data
x_val1 = np.random.random((100, 300))
x_val2 = np.random.random((100, 300))
y_val = np.random.random((100, 1))


print ('Shape of Training Data: ', x_train1.shape, x_train2.shape, y_train.shape)
print ('Shape of Validation Data', x_val1.shape, x_val2.shape, y_val.shape)

Shape of Training Data:  (1000, 300) (1000, 300) (1000, 1)
Shape of Validation Data (100, 300) (100, 300) (100, 1)


In [4]:
def get_mse() -> torch.nn.MSELoss:
    return nn.MSELoss()

In [5]:
def get_optimizer(model, lr):
    return torch.optim.Adam(model.parameters(), lr)

In [6]:
def training_step(model, data, optimizer, cf, device='cuda'):
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
  
    model.train() 
 
    # iterate over the training set
    for batch_idx, (inputs, targets) in enumerate(data):
        # load data into GPU
        inputs = inputs.to(device)
        targets = targets.to(device)
      
        # forward pass
        outputs = model(inputs)

        # loss computation
        loss = get_mse(outputs,targets)

        # backward pass
        loss.backward()
    
        # parameters update
        optimizer.step()

        # gradients reset
        optimizer.zero_grad()

        # fetch prediction and loss value
        samples += inputs.shape[0]
        cumulative_loss += loss.item()
        _, predicted = outputs.max(dim=1) # max() returns (maximum_value, index_of_maximum_value)

        # compute training accuracy
        cumulative_accuracy += predicted.eq(targets).sum().item()

    return cumulative_loss/samples, (cumulative_accuracy/samples)*100

In [7]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler


def batching_data(dataset, batch_size: int=64) -> Tuple[torch.utils.data.DataLoader]:
    
    # dataset will be of the form [x_train, x_train,]
    # defining DataLoader that batches ids randomly
    batched = DataLoader(dataset=dataset, sampler=RandomSampler(dataset), batch_size=batch_size, shuffle=False)


    return batched

In [8]:
def main():
    model = NeuralTensorNetwork(output_dim=32, input_dim=300, activation='relu')
    optimizer = get_optimizer(model, 0.001)
    loss = get_mse()

    data = batching_data([x_train1, x_train2])

    for e in range(0,5):
        train_loss, train_accuracy = training_step(model, data, optimizer, loss, 'cuda')
        print(f"Training loss: {train_loss} \n Training accuracy: {train_accuracy}")


In [9]:
main()

RuntimeError: ignored