In [1]:
import torch
from torch import optim
from torch import nn

class MAGNET(nn.Module):
  def __init__(self, input_size, hidden_size, adjacency, embeddings, heads=4, slope=0.01, dropout=0.5):
    super(MAGNET, self).__init__()
    self.embedding = nn.Embedding.from_pretrained(embeddings)
    self.biLSTM = nn.LSTM(input_size,hidden_size,batch_first=True,bidirectional=True)
    self.adjacency = nn.Parameter(adjacency)
    self.dropout = nn.Dropout(dropout)
    self.edge_weights = nn.Linear(hidden_size*2*2, 1, bias=False)
    self.activation = nn.LeakyReLU(slope)
    self.softmax = nn.Softmax(dim=1)
    self.tanh = nn.Tanh()
    self.heads = heads
    self.transform_dim1 = nn.Linear(input_size, hidden_size*2, bias=False)
    self.transform_dim2 = nn.Linear(hidden_size*2, hidden_size*2, bias=False)
    self.transform_dimensions = [self.transform_dim1, self.transform_dim2]

  def forward(self, token, label_embedding):
      #BILSTM part
      features = self.embedding(token)
      out, (h, _) = self.biLSTM(features)
      embedding = torch.cat([h[-2, :, :], h[-1, :, :]], dim=1)
      embedding = self.dropout(embedding)

      #GAT PART
      for td in self.transform_dimensions: #Two Multiheaded GAT layers
        outputs = []
        for head in range(self.heads):
          label_embed = td(label_embedding)
          n, embed_size = label_embed.shape

          label_embed_combinations = label_embed.unsqueeze(1).expand(-1, n, -1)
          label_embed_combinations = torch.cat([label_embed_combinations, label_embed.unsqueeze(0).expand(n, -1, -1)], dim=2)
          e = self.activation(self.edge_weights(label_embed_combinations).squeeze(2))

          attention_coefficients = self.tanh(torch.mul(e,self.adjacency))

          new_h = torch.matmul(attention_coefficients.to(label_embed.dtype), label_embed)
          outputs.append(new_h)
        outputs = self.activation(torch.mean(torch.stack(outputs, dim=0),dim=0))

        label_embedding = outputs
      attention_features = self.dropout(label_embedding)
      attention_features = attention_features.transpose(0, 1)
      predicted_labels = torch.matmul(embedding, attention_features)
      return predicted_labels
import os
def load_checkpoint(model, filename='/home/kh4ngptt/Documents/COURSE/DL/project/Real-time-icons-/checkpoint/MAGNET_best_model.pt'):
    """
    Load the best model checkpoint
    """
    if not os.path.exists(filename):
        print("No checkpoint found")
        return model

    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded best model from epoch {checkpoint['epoch']} with loss {checkpoint['loss']:.5f}")
    return model
model = MAGNET(input_size=300, hidden_size=250, adjacency=torch.randn(10, 10), embeddings=torch.randn(10000, 128))

model = load_checkpoint(model)

  checkpoint = torch.load(filename)


RuntimeError: Error(s) in loading state_dict for MAGNET:
	size mismatch for adjacency: copying a param with shape torch.Size([90, 90]) from checkpoint, the shape in current model is torch.Size([10, 10]).
	size mismatch for embedding.weight: copying a param with shape torch.Size([24630, 300]) from checkpoint, the shape in current model is torch.Size([10000, 128]).