<a href="https://colab.research.google.com/github/OSAKAAI/DEEP_LEARNING/blob/main/Sentiment_Analysis_RNN_LAB_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch
!pip install torchtext
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting torchtext
  Downloading torchtext-0.18.0-cp312-cp312-manylinux1_x86_64.whl.metadata (7.9 kB)
Downloading torchtext-0.18.0-cp312-cp312-manylinux1_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchtext
Successfully installed torchtext-0.18.0
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m99.4 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# 1. Simple dataset (positive/negative)

sentences = [
    "I love this course it is amazing",
    "The explanation was very clear and helpful",
    "I really enjoy learning from this teacher",
    "This class is terrible and confusing",
    "I hate the way the course was taught",
    "The material is bad and useless",
    "the course is extremely bad",
    "the course is decent not extremely good"
]

# 1 = positive, 0 = negative
# you can choose label for last sentence (here I mark 'decent' as slightly positive)
labels = [1, 1, 1, 0, 0, 0, 0, 1]


# 2. Build vocabulary

def tokenize(text):
    return text.lower().split()

vocab = {}
idx = 1  # start from 1 (0 is padding)

for s in sentences:
    for word in tokenize(s):
        if word not in vocab:
            vocab[word] = idx
            idx += 1

vocab_size = len(vocab) + 1  # +1 for padding index 0
print("Vocabulary:", vocab)
print("Vocab size:", vocab_size)


# Convert sentence → integer sequence

def encode_sentence(sentence):
    return [vocab[word] for word in tokenize(sentence)]  # assumes all words known


encoded_sentences = [encode_sentence(s) for s in sentences]


# 3. Padding sequences (make equal length)

max_len = max(len(s) for s in encoded_sentences)

def pad(seq):
    return seq + [0] * (max_len - len(seq))

padded = [pad(s) for s in encoded_sentences]

X = torch.tensor(padded)          # shape: (num_samples, max_len)
y = torch.tensor(labels).float()  # shape: (num_samples,)


# 4. RNN MODEL

class SentimentRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim=16, hidden_dim=32):
        super(SentimentRNN, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: (batch, seq_len)
        x = self.embedding(x)         # (batch, seq_len, embed_dim)
        out, hidden = self.rnn(x)     # out: (batch, seq_len, hidden_dim)
        last_output = out[:, -1, :]   # (batch, hidden_dim) - final time step
        return self.sigmoid(self.fc(last_output))  # (batch, 1)


model = SentimentRNN(vocab_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


# 5. Train model

epochs = 200
for epoch in range(epochs):
    model.train()

    optimizer.zero_grad()
    preds = model(X).squeeze()  # shape: (num_samples,)
    loss = criterion(preds, y)

    loss.backward()
    optimizer.step()

    if epoch % 40 == 0:
        print(f"Epoch {epoch} | Loss = {loss.item():.4f}")


# 6. Test on new text

def predict_sentiment(text):
    model.eval()
    seq = encode_sentence(text)
    seq = pad(seq)
    seq = torch.tensor([seq])  # batch of size 1

    with torch.no_grad():
        output = model(seq).item()

    if output > 0.5:
        return "Positive", output
    else:
        return "Negative", output


# Try your example
text = "The course explanation was very good"

sentiment, score = predict_sentiment(text)

print("\nText:", text)
print("Sentiment:", sentiment)
print("Score:", score)


text1 = "The course explanation was terrible"

sentiment, score = predict_sentiment(text1)

print("\nText:", text1)
print("Sentiment:", sentiment)
print("Score:", score)


text2 = "The course explanation was decent not very good"

sentiment, score = predict_sentiment(text2)

print("\nText:", text2)
print("Sentiment:", sentiment)
print("Score:", score)




Vocabulary: {'i': 1, 'love': 2, 'this': 3, 'course': 4, 'it': 5, 'is': 6, 'amazing': 7, 'the': 8, 'explanation': 9, 'was': 10, 'very': 11, 'clear': 12, 'and': 13, 'helpful': 14, 'really': 15, 'enjoy': 16, 'learning': 17, 'from': 18, 'teacher': 19, 'class': 20, 'terrible': 21, 'confusing': 22, 'hate': 23, 'way': 24, 'taught': 25, 'material': 26, 'bad': 27, 'useless': 28, 'extremely': 29, 'decent': 30, 'not': 31, 'good': 32}
Vocab size: 33
Epoch 0 | Loss = 0.6863
Epoch 40 | Loss = 0.0007
Epoch 80 | Loss = 0.0004
Epoch 120 | Loss = 0.0003
Epoch 160 | Loss = 0.0002

Text: The course explanation was very good
Sentiment: Positive
Score: 0.5981553196907043

Text: The course explanation was terrible
Sentiment: Negative
Score: 0.05820850282907486

Text: The course explanation was decent not very good
Sentiment: Positive
Score: 0.5542441606521606
