In [None]:
!pip install sentence-transformers
!pip install torchmetrics
import nltk
from sentence_transformers import SentenceTransformer

nltk.download('punkt')
rubert_sentence = SentenceTransformer('all-distilroberta-v1')

In [None]:
import numpy as np
augmented_chats = np.load("data/augmented_chats.npy")
augmented_targets = np.load("data/augmented_targets.npy")
augmented_chats.shape

In [None]:
chat_embeddings = []
for i in range(augmented_chats.shape[0]):
  chat_embeddings.append(rubert_sentence.encode(augmented_chats[i]))
  if(i%10000==0):
    print(i)

In [None]:
import numpy as np

all_targets = np.asarray(augmented_targets[:len(chat_embeddings)])
all_chat_embeddings = np.asarray(chat_embeddings)

train_targets = np.asarray(augmented_targets[:2500])
train_chat_embeddings = np.asarray(chat_embeddings[:2500])

test_targets = np.asarray(augmented_targets[2500:])
test_chat_embeddings = np.asarray(chat_embeddings[2500:])

In [None]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torch.nn.functional import one_hot

class TargetDataset(Dataset):
    def __init__(self, target_list, chat_list):
        self.target_list = target_list
        self.chat_list = chat_list

    def __len__(self):
        return len(self.target_list)

    def __getitem__(self, idx): 
        target = self.target_list[idx]
        target_tensor_target = torch.tensor(target, dtype=torch.long)
        chat_tensor = self.chat_list[idx]
        return chat_tensor, target_tensor_target
    
training_dataset = TargetDataset(train_targets, train_chat_embeddings)
test_dataset = TargetDataset(test_targets, test_chat_embeddings)

## Training on split Dataset

In [None]:
train_set, val_set = torch.utils.data.random_split(training_dataset, [2300, 200])
train_dataloader = DataLoader(train_set, batch_size=8, shuffle=True)
val_dataloader = DataLoader(val_set, batch_size=8, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
import torch
from torch import nn
from torch import optim
import numpy as np

device = torch.device('cuda')

class TargetDecoder(nn.Module):
    def __init__(self, features_dim=768): 
        super(TargetDecoder, self).__init__()

        self.linear = nn.Sequential(nn.Linear(features_dim, 15680))

        self.cnn = nn.Sequential(
            nn.ConvTranspose3d(64, 64, kernel_size=3),
            nn.ReLU(),
            nn.ConvTranspose3d(64, 32, kernel_size=3, padding=1),
            nn.ReLU(), 
            nn.ConvTranspose3d(32, 7, kernel_size=3),
            # nn.Tanh()
        )

    def forward(self, x):
        x = self.linear(x)
        x = x.reshape(x.shape[0], 64, 5, 7, 7)
        x = self.cnn(x)
        return x

    

target_decoder = TargetDecoder().to(device)
loss_function = nn.CrossEntropyLoss()

In [None]:
from torchmetrics import Accuracy

accuracy = Accuracy()
EPOCHS = 100
optimizer = optim.Adam(target_decoder.parameters(), lr=1e-3)
count = 0
i = 0
for epoch in range(EPOCHS):
  count_per_epoch = 0
  target_decoder.train()
  train_loss = []
  train_acc = []
  for target_tensor_input, target_tensor_target in train_dataloader:
    i += 1
    target_tensor_input = target_tensor_input.float().to(device)
    target_tensor_target = target_tensor_target.to(device)
    optimizer.zero_grad()
    predict = target_decoder(target_tensor_input)
    loss = loss_function(predict, target_tensor_target)
    _,pred_label = torch.max(predict, dim = 1)
    for k in range(len(pred_label)):
      if(np.all(pred_label[k].to("cpu").numpy() == target_tensor_target[k].to("cpu").numpy())):
        count+=1
        count_per_epoch += 1
    train_loss.append(loss.item())
    train_acc.append(accuracy(predict.to("cpu"),target_tensor_target.to("cpu")))
    loss.backward()
    optimizer.step()
  train_loss = np.array(train_loss).mean()
  train_acc = np.array(train_acc).mean()

  target_decoder.eval()
  val_loss = []
  val_acc = []
  for target_tensor_input, target_tensor_target in val_dataloader:
    i += 1
    target_tensor_input = target_tensor_input.float().to(device)
    predict = target_decoder(target_tensor_input)
    _,pred_label = torch.max(predict, dim = 1)
    for k in range(len(pred_label)):
      if(np.all(pred_label[k].to("cpu").numpy() == target_tensor_target[k].to("cpu").numpy())):
        count+=1
        count_per_epoch += 1
    loss = loss_function(predict.to(device), target_tensor_target.to(device))
    val_loss.append(loss.item())
    val_acc.append(accuracy(predict.to("cpu"), target_tensor_target.to("cpu")))
  
  val_loss = np.array(val_loss).mean()
  val_acc = np.array(val_acc).mean()
  print(f"epoch: {epoch} | loss: {train_loss} | val_loss: {val_loss}")
  print(f"train_acc: {train_acc} | val_acc: {val_acc} | count: {count_per_epoch}")
i = i/EPOCHS

In [None]:
from torchmetrics import F1
f1 = F1(num_classes=7,mdmc_average="global", ignore_index=0)

In [None]:
test_loss = []
test_acc = []
test_f1 = []
target_decoder.eval()
count = 0
i = 0
for target_tensor_input, target_tensor_target in test_dataloader:
  i+=1
  target_tensor_input = target_tensor_input.float().to(device)
  predict = target_decoder(target_tensor_input)
  loss = loss_function(predict.to(device), target_tensor_target.to(device))
  _,pred_label = torch.max(predict, dim = 1)
  if(np.all(pred_label.to("cpu").numpy() == target_tensor_target.to("cpu").numpy())):
    count+=1
  test_loss.append(loss.item())
  test_acc.append(accuracy(predict.to("cpu"), target_tensor_target.to("cpu")))
  test_f1.append(f1(predict.to("cpu"), target_tensor_target.to("cpu")))
test_loss = np.array(test_loss).mean()
test_acc = np.array(test_acc).mean()
test_f1 = np.array(test_f1).mean()
print(f"test_loss: {test_loss} | test_acc: {test_acc} | f1: {test_f1}")

## Final Training on the whole Dataset


In [None]:
all_training_dataset = TargetDataset(all_targets, all_chat_embeddings)
all_train_dataloader = DataLoader(all_training_dataset, batch_size=8, shuffle=True)

In [None]:
import torch
from torch import nn
from torch import optim
import numpy as np

device = torch.device('cuda')

class TargetDecoder(nn.Module):
    def __init__(self, features_dim=768): 
        super(TargetDecoder, self).__init__()

        self.linear = nn.Sequential(nn.Linear(features_dim, 15680))

        self.cnn = nn.Sequential(
            nn.ConvTranspose3d(64, 64, kernel_size=3),
            nn.ReLU(),
            nn.ConvTranspose3d(64, 32, kernel_size=3, padding=1),
            nn.ReLU(), 
            nn.ConvTranspose3d(32, 7, kernel_size=3),
            # nn.Tanh()
        )

    def forward(self, x):
        x = self.linear(x)
        x = x.reshape(x.shape[0], 64, 5, 7, 7)
        x = self.cnn(x)
        return x

    

target_decoder = TargetDecoder().to(device)
loss_function = nn.CrossEntropyLoss()

In [None]:
from torchmetrics import Accuracy

optimizer = optim.Adam(target_decoder.parameters(), lr=1e-6)
accuracy = Accuracy()
EPOCHS = 50
count = 0
for epoch in range(EPOCHS):
  count_per_epoch = 0
  target_decoder.eval()
  train_loss = []
  train_acc = []
  for target_tensor_input, target_tensor_target in all_train_dataloader:
    target_tensor_input = target_tensor_input.float().to(device)
    target_tensor_target = target_tensor_target.to(device)
    optimizer.zero_grad()
    predict = target_decoder(target_tensor_input)
    loss = loss_function(predict, target_tensor_target)
    _,pred_label = torch.max(predict, dim = 1)
    for k in range(len(pred_label)):
      if(np.all(pred_label[k].to("cpu").numpy() == target_tensor_target[k].to("cpu").numpy())):
        count+=1
        count_per_epoch += 1
    train_loss.append(loss.item())
    train_acc.append(accuracy(predict.to("cpu"),target_tensor_target.to("cpu")))
    loss.backward()
    optimizer.step()
  train_loss = np.array(train_loss).mean()
  train_acc = np.array(train_acc).mean()

  print(f"epoch: {epoch} | loss: {train_loss} | train_acc: {train_acc} | count: {count_per_epoch}")


In [None]:
torch.save(target_decoder.state_dict(), 'fragm_target_decoder.pth')