# Sentiment analysis using BiRNN

In [None]:
!pip install datasets

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import AutoTokenizer

In [None]:
dataset = load_dataset("behbudiy/uzbek-sentiment-analysis")


In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')


In [None]:
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=128)


In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


In [None]:
train_size = int(0.8 * len(tokenized_datasets["train"]))  # 80% for training
test_size = len(tokenized_datasets["train"]) - train_size  # 20% for testing


In [None]:
from torch.utils.data import random_split
train_dataset, test_dataset = random_split(tokenized_datasets["train"], [train_size, test_size])


In [None]:
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)


In [None]:
class BiRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers):
        super(BiRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.encoder = nn.LSTM(embed_size, num_hiddens, num_layers=num_layers, bidirectional=True)
        self.decoder = nn.Linear(4 * num_hiddens, 2)  # Binary classification

    def forward(self, inputs):
        embeddings = self.embedding(inputs.T)  # Transpose for LSTM
        self.encoder.flatten_parameters()
        outputs, _ = self.encoder(embeddings)  # Outputs shape: (time steps, batch size, 2*num_hiddens)
        encoding = torch.cat((outputs[0], outputs[-1]), dim=1)  # Concatenate first and last states
        outs = self.decoder(encoding)
        return outs

In [None]:
vocab_size = tokenizer.vocab_size
embed_size, num_hiddens, num_layers = 100, 100, 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
model = BiRNN(vocab_size, embed_size, num_hiddens, num_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [None]:
# def train_model(model, train_loader, test_loader, num_epochs, criterion, optimizer):
#     model.train()
#     for epoch in range(num_epochs):
#         total_loss = 0
#         for batch in train_loader:
#             inputs = batch["input_ids"].to(device)
#             labels = batch["labels"].to(device)


#             outputs = model(inputs)
#             loss = criterion(outputs, labels)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             total_loss += loss.item()

#         print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}")

#     evaluate_model(model, test_loader)


# def evaluate_model(model, test_loader):
#     model.eval()
#     correct, total = 0, 0
#     with torch.no_grad():
#         for batch in test_loader:
#             inputs = batch["input_ids"].to(device)
#             labels = batch["labels"].to(device)
#             outputs = model(inputs)
#             predictions = torch.argmax(outputs, dim=1)
#             correct += (predictions == labels).sum().item()
#             total += labels.size(0)
#     print(f"Accuracy: {correct / total:.4f}")
from sklearn.metrics import precision_score, recall_score

def train_model(model, train_loader, test_loader, num_epochs, criterion, optimizer):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            inputs = batch["input_ids"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}")

    evaluate_model(model, test_loader)


def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            inputs = batch["input_ids"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(inputs)
            predictions = torch.argmax(outputs, dim=1)

            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = (np.array(all_preds) == np.array(all_labels)).mean()
    precision = precision_score(all_labels, all_preds, zero_division=0)
    recall = recall_score(all_labels, all_preds, zero_division=0)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")


In [None]:
import numpy as np

num_epochs = 5
train_model(model, train_dataloader, test_dataloader, num_epochs, criterion, optimizer)


In [None]:
def predict_sentiment(model, tokenizer, text):
    model.eval()
    tokens = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
    inputs = tokens["input_ids"].to(device)  # No need to transpose here
    with torch.no_grad():
        outputs = model(inputs)  # Directly pass inputs
        label = torch.argmax(outputs, dim=1).item()  # Take argmax over class dimension
    return "positive" if label == 1 else "negative"


In [None]:

print(predict_sentiment(model, tokenizer, "Oka yaxwi chqmapt"))


In [None]:
import json

filename = "BiRNN_sentiment.ipynb"  

with open(filename, 'r', encoding='utf-8') as f:
    data = json.load(f)

if 'widgets' in data.get('metadata', {}):
    print("Fixing metadata.widgets...")
    del data['metadata']['widgets']

with open(filename, 'w', encoding='utf-8') as f:
    json.dump(data, f, indent=2)

print("Notebook metadata fixed!")
