In [None]:
%cd ..

In [None]:
%ls

In [None]:
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import tqdm

import torch
import torch.optim as optim
from transformers import AutoModel, AutoTokenizer, RobertaForSequenceClassification
from sklearn.metrics import classification_report

from src.utils import *
from src.model import *
from src.env import *

phobert = AutoModel.from_pretrained("vinai/phobert-base-v2")
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base-v2")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Current device:', device)

# Import data

In [None]:
data_name = 'UIT-ViSFD'

train_data = pd.read_csv('./Data/' + data_name + '/train.csv').drop(columns=['label', 'index', 'date_time'])
test_data = pd.read_csv('./Data/' + data_name + '/test.csv').drop(columns=['label', 'index', 'date_time'])
val_data = pd.read_csv('./Data/' + data_name + '/dev.csv').drop(columns=['label', 'index', 'date_time'])

train_data['n_star'] = train_data['n_star'] - 1
test_data['n_star'] = test_data['n_star'] - 1
val_data['n_star'] = val_data['n_star'] - 1

In [None]:

train_data['n_star'].value_counts().sort_index().plot(kind='bar', color=['red', 'blue', 'green'])
plt.xlabel('Rating')
plt.ylabel('Number of Reviews')
plt.title('Number of Reviews per Rating')
plt.show()

In [None]:
label_frequencies = train_data['n_star'].value_counts(normalize=True).to_dict()
label_frequencies

In [None]:
content_lengths = train_data['comment'].apply(lambda x: len(x.split()))
content_lengths.plot(kind='hist', bins=int(content_lengths.std()), range=(0, int(content_lengths.max())), figsize=(10, 6))
plt.xlabel('Length of content')
plt.ylabel('Number of content')
plt.title('Length of content in the dataset')
plt.show()

print(content_lengths.describe())

In [None]:
train_data['content_length'] = train_data['comment'].apply(lambda x: len(x.split()))

max_content_length = train_data['content_length'].max()  # Define the maximum content length

for i in train_data['n_star'].unique():  # Iterate over unique n_star values directly
    plt.figure(figsize=(10, 6))
    # Optional: Consider a different approach for determining bins, e.g., fixed number or sqrt rule
    length_bins = int(np.sqrt(len(train_data[train_data['n_star'] == i])))  # Example alternative
    plt.hist(train_data[train_data['n_star'] == i]['content_length'], bins=length_bins, alpha=0.7, label=f'n_star {i}', range=(0, max_content_length))
    plt.xlabel('Length of content')
    plt.ylabel('Number of content')
    plt.title('Length of content in the dataset')
    plt.legend()
    plt.show()

In [None]:
wordcloud = WordCloud(width=800, height=400, background_color='black').generate(' '.join(train_data['comment']))
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
num_words = len(set(' '.join(train_data['comment']).split()))
print('Number of words in the dataset:', num_words)

word_freq = Counter(' '.join(train_data['comment']).split()).most_common(num_words)
word_freq_df = pd.DataFrame(word_freq, columns=['word', 'frequency'])
word_freq_df['frequency'] = word_freq_df['frequency'].astype(int)
bins = int(word_freq_df['frequency'].mean())

word_freq_df['frequency'].plot(kind='hist', bins=bins, range=(0, bins), figsize=(10, 5))
plt.xlabel('Frequency')
plt.ylabel('Number of words')
plt.title('Frequency of words in the dataset')
plt.show()
print(word_freq_df['frequency'].describe())

## Vectorize

In [None]:
train_data = train_data.dropna()
test_data = test_data.dropna()
val_data = val_data.dropna()

In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

MAX_LEN = 50
BATCH_SIZE = 64

encode_data = lambda x: tokenizer(x, padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors='pt')

train_data_comment = encode_data(train_data['comment'].tolist())
test_data_comment = encode_data(test_data['comment'].tolist())
val_data_comment = encode_data(val_data['comment'].tolist())

train_labels = torch.tensor(train_data['n_star']).long()
test_labels = torch.tensor(test_data['n_star']).long()
val_labels = torch.tensor(val_data['n_star']).long()

In [None]:
train_dataset = TensorDataset(train_data_comment['input_ids'], train_data_comment['attention_mask'], train_labels)
test_dataset = TensorDataset(test_data_comment['input_ids'], test_data_comment['attention_mask'], test_labels)
val_dataset = TensorDataset(val_data_comment['input_ids'], val_data_comment['attention_mask'], val_labels)

train_sampler = RandomSampler(train_dataset)
test_sampler = SequentialSampler(test_dataset)
val_sampler = SequentialSampler(val_dataset)

train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(val_dataset, sampler=val_sampler, batch_size=BATCH_SIZE)

In [None]:
train_dataloader.dataset.tensors[0].shape, val_dataloader.dataset.tensors[0].shape, test_dataloader.dataset.tensors[0].shape

In [None]:
encode_data = tokenizer.batch_encode_plus(train_data['comment'].values.tolist(), 
                                          max_length=MAX_LEN, 
                                          truncation=True,
                                          padding='max_length', 
                                          return_tensors='pt')

indexs, counts = np.unique(encode_data['input_ids'], return_counts=True)

print('Percent of UNK token:', counts[indexs == tokenizer.unk_token_id].sum() / np.sum(counts) * 100)
print('Percent of PAD token:', counts[indexs == tokenizer.pad_token_id].sum() / np.sum(counts) * 100)
print('Percent of CLS token:', counts[indexs == tokenizer.cls_token_id].sum() / np.sum(counts) * 100)

In [None]:
class AgentRLSentimentClassifier:
    """
    A reinforcement learning agent for sentiment classification.
    """

    def __init__(self, num_class, 
                 actor_critic_net: ActorCriticNet, classifier_net: Classifier, feature_extracter,
                 lr_actor_critic=1e-3, lr_classifier=1e-3, gamma=0.99, device='cpu'):
        
        self.num_class = num_class

        self.actor_critic_net = actor_critic_net
        self.classifier_net = classifier_net
        self.feature_extracter = feature_extracter

        self.optimizer_actor_critic = optim.RMSprop(actor_critic_net.parameters(), lr=lr_actor_critic, weight_decay=1e-5)
        self.optimizer_classifier = optim.Adam(classifier_net.parameters(), lr=lr_classifier, weight_decay=1e-5)

        self.gamma = gamma
        self.device = device
        self.epsilon = 1
        self.epsilon_decay = 0.9
    
    def step(self, batches_sentence: torch.Tensor, batches_label: torch.Tensor, batches_attention_mask: torch.Tensor, training=True):
        eps = self.epsilon if training else 0

        batches_label = batches_label.to(self.device)
        with torch.no_grad():
            batches_sentence_embedding = self.feature_extracter(batches_sentence.to(self.device, dtype=torch.long), \
                                                                attention_mask=batches_attention_mask.to(self.device, dtype=torch.long))[0]
        batch_size, max_len = batches_sentence.shape

        noise = torch.randn_like(batches_sentence_embedding) * 0.1
        values, policy = self.actor_critic_net(batches_sentence_embedding + noise) # (batch_size, max_len, 1) and (batch_size, max_len, num_actions)
        action = self._choose_action(policy, eps)
        # print(action.shape)
        
        filtered_batches_sentence_embedding = batches_sentence_embedding * action.unsqueeze(-1) + noise
        # filtered_batches_sentence_embedding = action.unsqueeze(-1).float()
        next_values, _ = self.actor_critic_net(filtered_batches_sentence_embedding)


        classifier_probs = self.classifier_net(filtered_batches_sentence_embedding)
        classifier_loss = F.cross_entropy(classifier_probs, batches_label)

        correct_predictions = (classifier_probs.argmax(dim=-1) == batches_label).float()
        rewards = self._get_reward(classifier_probs, batches_label).unsqueeze(-1).unsqueeze(-1)

        with torch.no_grad():  # Don't backpropagate through target calculation
            td_target = rewards + self.gamma * next_values
            
        advantage = td_target - values
        log_probs = torch.log(policy) # Log probs for chosen actions
        actor_loss = -torch.sum(log_probs * action.unsqueeze(-1), dim=-1)
        actor_loss = (actor_loss.unsqueeze(-1) * advantage.detach()).mean()
        actor_critic_loss = actor_loss + classifier_loss

        accuracy = correct_predictions.mean()
        
        return actor_critic_loss, classifier_loss, accuracy, rewards.mean().item()
    
    def _choose_action(self, policy, epsilon):
        if np.random.rand() < epsilon:
            return torch.randint(0, policy.shape[-1], policy.shape[:-1]).to(self.device)
        
        return torch.distributions.Categorical(policy).sample()

    def _get_reward(self, predicted_probs, true_labels):
        true_class_probs = F.one_hot(true_labels, num_classes=self.num_class).float()
        rewards = -F.kl_div(predicted_probs, true_class_probs, reduction='batchmean')#.sum(dim=-1)
        # print(rewards)
        return rewards     

    def train(self, train_dataloader, val_dataloader, n_epochs=10):
        best_val_loss = float('inf')
        epoch = 0
        self.training_ac_losses, self.training_classifier_losses, self.training_accuracies, self.training_rewards = [], [], [], []
        self.val_ac_losses, self.val_classifier_losses, self.val_accuracies, self.val_rewards = [], [], [], []
        while epoch < n_epochs:
            print('-' * 50)
            print(f'Epoch: {epoch + 1} / {n_epochs}')
            self.actor_critic_net.train()
            self.classifier_net.train()
            running_ac_loss, running_classifier_loss, running_accuracy, running_reward = 0.0, 0.0, 0.0, 0.0
            for i, data in enumerate(tqdm.tqdm(train_dataloader), 0):
                ac_loss, classifier_loss, accuracy, reward = self.step(data[0], data[2], data[1])

                self.optimizer_actor_critic.zero_grad()
                self.optimizer_classifier.zero_grad()
                # ac_loss.backward()
                

                total_loss = ac_loss + classifier_loss
                total_loss.backward()

                self.optimizer_actor_critic.step()

                if epoch % 2 == 0:
                    # classifier_loss.backward()
                    self.optimizer_classifier.step()

                running_ac_loss += ac_loss.item()
                running_classifier_loss += classifier_loss.item()
                running_accuracy += accuracy
                running_reward += reward

                torch.cuda.empty_cache()
            self.epsilon = max(0.1, self.epsilon * self.epsilon_decay)
            
            self.training_classifier_losses.append(running_classifier_loss / len(train_dataloader))
            self.training_ac_losses.append(running_ac_loss / len(train_dataloader))
            self.training_accuracies.append(running_accuracy / len(train_dataloader))
            self.training_rewards.append(running_reward / len(train_dataloader))

            self.actor_critic_net.eval()
            self.classifier_net.eval()
            val_ac_loss, val_classifier_loss, val_accuracy, val_reward = 0.0, 0.0, 0.0, 0.0
            with torch.no_grad():
                for i, data in enumerate(val_dataloader, 0):
                    ac_loss, classifier_loss, accuracy, reward = self.step(data[0], data[2], data[1], training=False)
                    val_ac_loss += ac_loss.item()
                    val_classifier_loss += classifier_loss.item()
                    val_accuracy += accuracy
                    val_reward += reward

                    torch.cuda.empty_cache()

            self.val_classifier_losses.append(val_classifier_loss / len(val_dataloader))
            self.val_ac_losses.append(val_ac_loss / len(val_dataloader))
            self.val_accuracies.append(val_accuracy / len(val_dataloader))
            self.val_rewards.append(val_reward / len(val_dataloader))

            print(f'Training AC Loss: {running_ac_loss / len(train_dataloader) :.2f}, Training Classifier Loss: {running_classifier_loss / len(train_dataloader) :.2f}, Training Accuracy: {running_accuracy / len(train_dataloader) :.2f}, Training Reward: {running_reward / len(train_dataloader) :.2f}, Epsilon: {self.epsilon:.2f}')
            print(f'Validation AC Loss: {val_ac_loss / len(val_dataloader) :.2f}, Validation Classifier Loss: {val_classifier_loss / len(val_dataloader) :.2f}, Validation Accuracy: {val_accuracy / len(val_dataloader) :.2f}, Validation Reward: {val_reward / len(val_dataloader) :.2f}')

            if val_classifier_loss < best_val_loss:
                best_val_loss = val_classifier_loss
                torch.save(self.actor_critic_net.state_dict(), './Model/actor_critic_model.pth')
                torch.save(self.classifier_net.state_dict(), './Model/classifier_model.pth')
                print('Model saved.')
            
            epoch += 1

        print('Finished Training')
        torch.cuda.empty_cache()
        return self.training_ac_losses, self.training_classifier_losses, self.training_accuracies, self.training_rewards, \
               self.val_ac_losses, self.val_classifier_losses, self.val_accuracies, self.val_rewards

In [None]:
from sklearn.metrics import f1_score

class AgentRLSentimentClassifier:
    """
    A reinforcement learning agent for sentiment classification.
    """

    def __init__(self, num_class, 
                 actor_net: ActorNet, critic_classifier_net: CriticClassifierNet, feature_extracter,
                 lr_actor_critic=1e-3, lr_classifier=1e-3, gamma=0.99, device='cpu'):
        
        self.num_class = num_class

        self.actor_net = actor_net
        self.critic_classifier_net = critic_classifier_net
        self.feature_extracter = feature_extracter

        self.optimizer_actor_net = optim.RMSprop(self.actor_net.parameters(), lr=lr_actor_critic, weight_decay=1e-5)
        self.optimizer_critic_classifier = optim.Adam(self.critic_classifier_net.parameters(), lr=lr_classifier, weight_decay=1e-5)

        self.gamma = gamma
        self.device = device
        self.epsilon = 1
        self.epsilon_decay = 0.9

        self.reward_weights = self._calculate_reward_weights().to(device)
        print('Reward weights:', self.reward_weights)
    
    def _calculate_reward_weights(self):
        total_samples = sum(label_frequencies.values())
        reward_weights = torch.tensor([total_samples / (label_frequencies[label] * len(label_frequencies)) for label in sorted(label_frequencies)], dtype=torch.float)
        return reward_weights
    
    def step(self, 
             batches_sentence: torch.Tensor, 
             batches_label: torch.Tensor, 
             batches_attention_mask: torch.Tensor, 
             training=True) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, float]:
        eps = self.epsilon if training else 0

        batches_label = batches_label.to(self.device)
        with torch.no_grad():
            batches_sentence_embedding = self.feature_extracter(batches_sentence.to(self.device, dtype=torch.long), \
                                                                attention_mask=batches_attention_mask.to(self.device, dtype=torch.long))[0]

        policy = self.actor_net.forward(batches_sentence_embedding)
        values, _ = self.critic_classifier_net.forward(batches_sentence_embedding)
        action = self._choose_action(policy, eps)
        
        filtered_batches_sentence_embedding = batches_sentence_embedding * action.unsqueeze(-1).float()
        # filtered_batches_sentence_embedding = action.unsqueeze(-1).float()
        next_values, classifier_probs = self.critic_classifier_net.forward(filtered_batches_sentence_embedding)

        classifier_loss = F.cross_entropy(classifier_probs, batches_label)

        correct_predictions = (classifier_probs.argmax(dim=-1) == batches_label).float()
        accuracy = correct_predictions.mean()

        with torch.no_grad():
            rewards = self._get_reward(correct_predictions, batches_label)
            td_target = rewards + self.gamma * next_values.detach()
            
        advantage = td_target - values
        actor_loss = -(policy.log() * action.unsqueeze(-1)).sum(dim=-1)
        actor_loss = (actor_loss.unsqueeze(-1) * advantage.detach()).mean()
        actor_critic_loss = actor_loss + classifier_loss
        
        return actor_critic_loss, classifier_loss, accuracy, rewards.mean().item()
    
    def _choose_action(self, policy, epsilon):
        if np.random.rand() < epsilon:
            return torch.randint(0, policy.shape[-1], policy.shape[:-1], device=self.device)
        return torch.distributions.Categorical(policy).sample()

    def _get_reward(self, correct_predictions, true_labels) -> torch.Tensor:
        true_labels_list = true_labels.tolist()
        rewards = correct_predictions * self.reward_weights[true_labels_list]
        return rewards
    
    def evaluate(self, dataloader):
        self.actor_net.eval()
        self.critic_classifier_net.eval()
        running_ac_loss, running_classifier_loss, running_accuracy, running_reward = 0.0, 0.0, 0.0, 0.0
        y_true, y_pred = [], []
        with torch.no_grad():
            for i, data in enumerate(dataloader, 0):
                ac_loss, classifier_loss, accuracy, reward = self.step(data[0], data[2], data[1], training=False)
                running_ac_loss += ac_loss.item()
                running_classifier_loss += classifier_loss.item()
                running_accuracy += accuracy
                running_reward += reward

                preds = self.critic_classifier_net.forward(self.feature_extracter(data[0].to(self.device, dtype=torch.long), \
                                                                                       attention_mask=data[1].to(self.device, dtype=torch.long))[0])[1].argmax(dim=-1).tolist()

                y_true.extend(data[2].tolist())
                y_pred.extend(preds)

                torch.cuda.empty_cache()

        f1 = f1_score(y_true, y_pred, average='macro')

        return running_ac_loss / len(dataloader), \
               running_classifier_loss / len(dataloader), \
               running_accuracy / len(dataloader), \
               running_reward / len(dataloader), \
               f1

    def train(self, train_dataloader, val_dataloader, n_epochs=10):
        best_val_loss = float('inf')
        epoch = 0
        self.training_ac_losses, self.training_classifier_losses, self.training_accuracies, self.training_rewards = [], [], [], []
        self.val_ac_losses, self.val_classifier_losses, self.val_accuracies, self.val_rewards = [], [], [], []
        while epoch < n_epochs:
            print('-' * 50)
            print(f'Epoch: {epoch + 1} / {n_epochs}')

            # ---------------------- Training ---------------------- #
            self.actor_net.train()
            self.critic_classifier_net.train()
            running_ac_loss, running_classifier_loss, running_accuracy, running_reward = 0.0, 0.0, 0.0, 0.0
            for i, data in enumerate(tqdm.tqdm(train_dataloader), 0):
                ac_loss, classifier_loss, accuracy, reward = self.step(data[0], data[2], data[1])

                self.optimizer_actor_net.zero_grad()
                self.optimizer_critic_classifier.zero_grad()
                # ac_loss.backward()

                total_loss = ac_loss + classifier_loss
                total_loss.backward()

                self.optimizer_actor_net.step()
                self.optimizer_critic_classifier.step()

                running_ac_loss += ac_loss.item()
                running_classifier_loss += classifier_loss.item()
                running_accuracy += accuracy
                running_reward += reward

                torch.cuda.empty_cache()
            self.epsilon = max(0.1, self.epsilon * self.epsilon_decay)
            
            self.training_classifier_losses.append(running_classifier_loss / len(train_dataloader))
            self.training_ac_losses.append(running_ac_loss / len(train_dataloader))
            self.training_accuracies.append(running_accuracy / len(train_dataloader))
            self.training_rewards.append(running_reward / len(train_dataloader))

            # ---------------------- Validation ---------------------- #
            val_ac_loss, val_classifier_loss, val_accuracy, val_reward, val_f1 = self.evaluate(val_dataloader)

            self.val_classifier_losses.append(val_classifier_loss)
            self.val_ac_losses.append(val_ac_loss)
            self.val_accuracies.append(val_accuracy)
            self.val_rewards.append(val_reward)


            print(f'Training AC Loss: {running_ac_loss / len(train_dataloader) :.2f}, Training Classifier Loss: {running_classifier_loss / len(train_dataloader) :.2f}, Training Accuracy: {running_accuracy / len(train_dataloader) :.2f}, Training Reward: {running_reward / len(train_dataloader) :.2f}, Epsilon: {self.epsilon:.2f}')
            print(f'Validation AC Loss: {val_ac_loss :.2f}, Validation Classifier Loss: {val_classifier_loss :.2f}, Validation Accuracy: {val_accuracy :.2f}, Validation Reward: {val_reward :.2f}, Validation F1: {val_f1 :.2f}')

            if val_classifier_loss < best_val_loss:
                best_val_loss = val_classifier_loss
                torch.save(self.actor_net.state_dict(), './Model/actor_model_'+ data_name +'.pth')
                torch.save(self.critic_classifier_net.state_dict(), './Model/critic_classifier_model_'+ data_name +'.pth')
                print('Model saved.')
            
            epoch += 1

        print('Finished Training')
        torch.cuda.empty_cache()
        return self.training_ac_losses, self.training_classifier_losses, self.training_accuracies, self.training_rewards, \
               self.val_ac_losses, self.val_classifier_losses, self.val_accuracies, self.val_rewards

In [None]:
from src.model import ActorNet, CriticClassifierNet

In [None]:
# num_class = 5
# actor_critic_net = ActorCriticNet(input_dim=768, output_dim=2).to(device)
# classifier_net = Classifier(input_dim=768, output_dim=num_class).to(device)
# feature_extracter = phobert.to(device)
# feature_extracter.eval()

# agent = AgentRLSentimentClassifier(num_class=num_class, actor_critic_net=actor_critic_net, classifier_net=classifier_net, feature_extracter=feature_extracter, device=device, 
#                                    lr_actor_critic=1e-4, lr_classifier=1e-5, gamma=0.999)

In [None]:
num_class = 5
actor_net = ActorNet(input_dim=768, output_dim=2).to(device)
critic_classifier_net = CriticClassifierNet(input_dim=768, seq_len=MAX_LEN, output_dim=num_class).to(device)
feature_extracter = phobert.to(device)
feature_extracter.eval()

agent = AgentRLSentimentClassifier(num_class=num_class, actor_net=actor_net, critic_classifier_net=critic_classifier_net, 
                                   feature_extracter=feature_extracter, device=device, 
                                   lr_actor_critic=1e-5, lr_classifier=2e-5, gamma=0.999)

In [None]:
training_ac_losses, training_classifier_losses, training_accuracy, training_rewards, \
val_ac_losses, val_classifier_losses, val_accuracy, val_rewards = agent.train(train_dataloader, 
                                                                              val_dataloader, 
                                                                              n_epochs=50)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(agent.training_ac_losses, label='Train AC Loss')
plt.plot(agent.val_ac_losses, label='Val AC Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(agent.training_classifier_losses, label='Train CLS Loss')
plt.plot(agent.val_classifier_losses, label='Val CLS Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

In [None]:
training_rewards = torch.tensor(agent.training_rewards)
training_rewards.tolist()
plt.figure(figsize=(10, 6))
plt.plot(training_rewards.tolist(), label='Train Rewards')
plt.plot(torch.tensor(agent.val_rewards).cpu(), label='Val Rewards')
plt.xlabel('Epoch')
plt.ylabel('Rewards')
plt.title('Training and Validation Rewards')
plt.legend()
plt.show()

In [None]:
training_accuracy = torch.tensor(agent.training_accuracies).cpu()
val_accuracy = torch.tensor(agent.val_accuracies).cpu()
plt.figure(figsize=(10, 6))
plt.plot(training_accuracy, label='Train Accuracy')
plt.plot(val_accuracy, label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()

In [None]:
def evaluate_model(actor_model, critic_classifier_model, dataloader):
    actor_model.eval()
    critic_classifier_model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for i, data in enumerate(tqdm.tqdm(dataloader), 0):
            sentence_embedding = phobert(data[0].to(device), attention_mask=data[1].to(device))[0]
            policy = actor_model(sentence_embedding)
            action = torch.argmax(policy, dim=-1)
            filtered_sentence_embedding = sentence_embedding * action.unsqueeze(-1).float()
            _, classifier_preds = critic_classifier_model(filtered_sentence_embedding)
            preds.extend(classifier_preds.argmax(dim=-1).cpu().numpy())
            labels.extend(data[2].cpu().numpy())
    return preds, labels

actor_net.load_state_dict(torch.load('./Model/actor_model_'+ data_name +'.pth'))
critic_classifier_net.load_state_dict(torch.load('./Model/critic_classifier_model_'+ data_name +'.pth'))

test_preds, test_labels = evaluate_model(actor_net, critic_classifier_net, test_dataloader)
print(classification_report(test_labels, test_preds))

In [None]:
actor_net.eval()
actions = []
with torch.no_grad():
    for i, data in enumerate(tqdm.tqdm(train_dataloader), 0):
        batch_sentences = data[0].to(device)
        sentence_embedding = phobert(batch_sentences, attention_mask=data[1].to(device))[0]
        policy = actor_net(sentence_embedding)
        action = torch.argmax(policy, dim=-1).squeeze()
        actions.extend(action.cpu().tolist())

action_maked = np.array(actions)
action_distribuution = np.unique(action_maked, return_counts=True)

In [None]:
action_distribuution_data = pd.DataFrame(action_distribuution[1], index=action_distribuution[0], columns=['count'])
action_distribuution_data