In [None]:
import torch
torch.device("cuda" if torch.cuda.is_available() else "cpu")

device(type='cuda')

In [None]:
! pip install --upgrade datasets fsspec aiohttp
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader


from datasets import load_dataset

dataset = load_dataset("cardiffnlp/tweet_eval", "sentiment")

Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec
  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting aiohttp
  Downloading aiohttp-3.12.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.6 kB)
Collecting fsspec
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-4.0.0-py3-none-any.whl (494 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.8/494.8 kB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading aiohttp-3.12.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m85.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, aiohttp,

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.78M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/901k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/167k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/45615 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/12284 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
from torch.nn.utils.rnn import pad_sequence
def build_vocab(data):
  vocab = {"<unk>": 0, "<pad>": 1}
  for each_data in data:
    text = each_data['text']
    tokens = text.lower().split()
    for token in tokens:
      if token not in vocab:
        vocab[token] = len(vocab)
  return vocab


vocab = build_vocab(dataset['train'])

vocab_size = len(vocab)


def text_to_ids(text):
  tokens = text.lower().split()
  ids = [vocab.get(token, vocab["<unk>"]) for token in tokens]
  ids = torch.tensor(ids, dtype=torch.long)
  return ids

class SentimentDataset(Dataset):
  def __init__(self, dataset):
    self.dataset = dataset


  def __len__(self):
    return len(self.dataset)


  def __getitem__(self, idx):
    item = self.dataset[idx]
    text = text_to_ids(item['text'])
    label = item['label']
    return text, label


train_dataset = SentimentDataset(dataset['train'])
val_data = SentimentDataset(dataset["validation"])
test_data = SentimentDataset(dataset["test"])


def collate_fn(batch):
  texts, labels = zip(*batch)
  labels = torch.tensor(labels, dtype=torch.long)
  padded_texts = pad_sequence(texts, batch_first=True, padding_value=vocab['<unk>'])
  return padded_texts, labels



train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, collate_fn=collate_fn)



class SentimentLSTM(nn.Module):
  def __init__(self, vocab_size, emb_dim, hidden_dim, output_dim):
    super().__init__()
    self.embeddings = nn.Embedding(vocab_size, emb_dim, padding_idx=vocab['<pad>'])
    self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True)
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    embedded = self.embeddings(x)
    output, (hidden, cell) = self.lstm(embedded)
    output = self.fc(hidden[-1])
    return output


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentimentLSTM(vocab_size, emb_dim=100, hidden_dim=128, output_dim=3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10

for epoch in range(1, epochs+1):
  model.train()
  total_loss = 0
  for texts, labels in train_loader:
    texts, labels = texts.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(texts)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  print(f"Epoch {epoch}/{epochs} ::: Loss {total_loss/len(train_loader):.4f}")

  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for texts, labels in val_loader:
      texts, labels = texts.to(device), labels.to(device)
      outputs = model(texts)
      preds = outputs.argmax(dim=1)
      correct += (preds == labels).sum().item()
      total += labels.size(0)
  print(f"Validation Accuracy: {correct/total:.4f}")






model.eval()
correct = 0
total = 0
with torch.no_grad():
  for texts, labels in test_loader:
    texts, labels = texts.to(device), labels.to(device)
    outputs = model(texts)
    preds = outputs.argmax(dim=1)
    correct += (preds == labels).sum().item()
    total += labels.size(0)
print(f"Test Accuracy: {correct/total:.4f}")



Epoch 1/10 ::: Loss 0.9784
Validation Accuracy: 0.5520
Epoch 2/10 ::: Loss 0.8064
Validation Accuracy: 0.6170
Epoch 3/10 ::: Loss 0.6359
Validation Accuracy: 0.6325
Epoch 4/10 ::: Loss 0.4632
Validation Accuracy: 0.6310
Epoch 5/10 ::: Loss 0.3000
Validation Accuracy: 0.6330
Epoch 6/10 ::: Loss 0.1777
Validation Accuracy: 0.6200
Epoch 7/10 ::: Loss 0.1023
Validation Accuracy: 0.6195
Epoch 8/10 ::: Loss 0.0613
Validation Accuracy: 0.6140
Epoch 9/10 ::: Loss 0.0419
Validation Accuracy: 0.6085
Epoch 10/10 ::: Loss 0.0309
Validation Accuracy: 0.6045
Test Accuracy: 0.5374


In [None]:
model.state_dict()

OrderedDict([('embeddings.weight',
              tensor([[-0.8579, -0.0078,  0.4120,  ...,  1.7053,  0.5173,  1.6322],
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
                      [-0.6267, -1.3040,  0.8679,  ..., -1.8982,  0.3001,  1.0238],
                      ...,
                      [-0.6462, -0.5976, -0.1683,  ..., -1.7469, -0.6650, -0.4156],
                      [-1.3915,  1.7011, -0.3309,  ...,  0.6398,  0.5068,  0.1470],
                      [-2.1128, -0.7503, -0.0606,  ...,  0.3798,  1.1922,  0.4164]],
                     device='cuda:0')),
             ('lstm.weight_ih_l0',
              tensor([[-0.0422, -0.2280,  0.1530,  ..., -0.1096, -0.0288, -0.1209],
                      [-0.1299,  0.1253,  0.1629,  ..., -0.1734, -0.2779, -0.0584],
                      [ 0.1167, -0.1972,  0.0915,  ...,  0.1164, -0.5339, -0.1028],
                      ...,
                      [ 0.1969,  0.1016, -0.0188,  ...,  0.0231,  0.0514,  0.02

In [None]:
# save the model
model_path = 'sentiment_llm_bata.pth'
torch.save(model.state_dict(), model_path)

In [None]:
def predict_sentiment(text):
  model.eval()
  ids = text_to_ids(text)
  ids = ids.to(device)
  with torch.no_grad():
    output = model(ids)
    pred = output.argmax().item() # 0, 1, 2
  label_map = {0: "negative", 1: "neutral", 2: "positive"}
  return label_map[pred]



text = "icecream was not that bad"
predict_sentiment(text)

'neutral'