In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
import torch
import torch.nn as nn
import torch.optim as optim
from sentence_transformers import SentenceTransformer




In [2]:
df = pd.read_csv("data.csv")

label_encoder = LabelEncoder()
df['Sentiment_Label'] = label_encoder.fit_transform(df['Sentiment'])

embedder = SentenceTransformer('all-MiniLM-L6-v2')
X = embedder.encode(df['Sentence'], show_progress_bar=True)
y = df['Sentiment_Label'].values

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/183 [00:00<?, ?it/s]

In [3]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# RL Environment
class SentimentEnv:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.index = 0

    def reset(self):
        self.index = 0
        return self.X[self.index]

    def step(self, action):
        reward = 2 if action == self.y[self.index] else -1
        self.index += 1
        done = self.index >= len(self.X)
        state = None if done else self.X[self.index]
        return state, reward, done

# Improved Policy Network
class PolicyNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PolicyNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_dim),
            nn.Softmax(dim=-1)
        )

    def forward(self, x):
        return self.net(x)

In [6]:
# Initialize environment and model
env = SentimentEnv(X_train, y_train)
input_dim = X_train.shape[1]
output_dim = len(np.unique(y))
policy_net = PolicyNet(input_dim, output_dim)
optimizer = optim.Adam(policy_net.parameters(), lr=0.001, weight_decay=1e-5)

In [7]:
# Training with REINFORCE
epochs = 20
for epoch in range(epochs):
    state = env.reset()
    log_probs = []
    rewards = []
    while True:
        state_tensor = torch.FloatTensor(state)
        probs = policy_net(state_tensor)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        log_probs.append(log_prob)

        next_state, reward, done = env.step(action.item())
        rewards.append(reward)

        if done:
            break
        state = next_state

    # Compute loss
    loss = -torch.stack(log_probs) * torch.FloatTensor(rewards)
    loss = loss.sum()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}, Total reward: {sum(rewards)}")


Epoch 1, Total reward: -272
Epoch 2, Total reward: 40
Epoch 3, Total reward: -32
Epoch 4, Total reward: -17
Epoch 5, Total reward: -89
Epoch 6, Total reward: 355
Epoch 7, Total reward: 154
Epoch 8, Total reward: 352
Epoch 9, Total reward: 154
Epoch 10, Total reward: 412
Epoch 11, Total reward: 598
Epoch 12, Total reward: 694
Epoch 13, Total reward: 859
Epoch 14, Total reward: 925
Epoch 15, Total reward: 1030
Epoch 16, Total reward: 1135
Epoch 17, Total reward: 1462
Epoch 18, Total reward: 1534
Epoch 19, Total reward: 1750
Epoch 20, Total reward: 2041


In [8]:
# Evaluate
policy_net.eval()
preds = []
for x in X_test:
    with torch.no_grad():
        probs = policy_net(torch.FloatTensor(x))
        pred = torch.argmax(probs).item()
        preds.append(pred)


In [9]:
print("Accuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average='weighted'))

Accuracy: 0.5320786997433704
F1 Score: 0.36957336821929243
