In [None]:
!pip install torchtext==0.6.0 torch==2.0.0

Collecting torch==2.0.0
  Downloading torch-2.0.0-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.0)
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m78.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.0)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m74.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.0)
  Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB

In [None]:
import spacy
from spacy.cli.download import download
download(model="en_core_web_sm")

[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext.data import Field, LabelField, TabularDataset, BucketIterator

from sklearn.metrics import classification_report

spacy_en = spacy.load('en_core_web_sm')
TEXT = Field(spacy_en.tokenizer, lower=True, include_lengths=True)
LABEL = LabelField(dtype=torch.float)

train_data, test_data = IMDB.splits(TEXT, LABEL)
# Build vocabulary
TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.100d")
LABEL.build_vocab(train_data)

# Create iterators for batching
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data), batch_size=64, device=device, sort_within_batch=True)





In [None]:
class SentimentLSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout, pad_idx):
        super(SentimentLSTM, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(TEXT.vocab.vectors, padding_idx=pad_idx)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        embedded = self.embedding(text)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'))
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        return self.fc(hidden)

embedding_dim = 100
hidden_dim = 256
output_dim = 1
n_layers = 2
bidirectional = True
dropout = 0.2
pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = SentimentLSTM(embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout, pad_idx)
model = model.to(device)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters())


def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    accuracy = correct.sum() / len(correct)
    return accuracy

def train(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    epoch_acc = 0

    for batch in iterator:
        text, text_lengths = batch.text
        optimizer.zero_grad()
        predictions = model(text, text_lengths).squeeze(1)
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)


def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    epoch_acc = 0

    with torch.no_grad():
        for batch in iterator:
            text, text_lengths = batch.text
            predictions = model(text, text_lengths).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc = binary_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)


N_EPOCHS = 50
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, test_iterator, criterion)
    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

# Evaluate the model
def test_model(model, iterator):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for batch in iterator:
            text, text_lengths = batch.text
            predictions = model(text, text_lengths).squeeze(1)
            rounded_preds = torch.round(torch.sigmoid(predictions))
            y_true.extend(batch.label.cpu().numpy())
            y_pred.extend(rounded_preds.cpu().numpy())
    return y_true, y_pred

y_true, y_pred = test_model(model, test_iterator)
print(classification_report(y_true, y_pred))

Epoch: 01
	Train Loss: 0.660 | Train Acc: 59.92%
	 Val. Loss: 0.573 |  Val. Acc: 71.19%
Epoch: 02
	Train Loss: 0.469 | Train Acc: 78.27%
	 Val. Loss: 0.391 |  Val. Acc: 82.42%
Epoch: 03
	Train Loss: 0.381 | Train Acc: 83.13%
	 Val. Loss: 0.350 |  Val. Acc: 84.57%
Epoch: 04
	Train Loss: 0.339 | Train Acc: 85.28%
	 Val. Loss: 0.335 |  Val. Acc: 85.35%
Epoch: 05
	Train Loss: 0.308 | Train Acc: 86.89%
	 Val. Loss: 0.315 |  Val. Acc: 86.26%
Epoch: 06
	Train Loss: 0.287 | Train Acc: 87.93%
	 Val. Loss: 0.302 |  Val. Acc: 86.91%
Epoch: 07
	Train Loss: 0.259 | Train Acc: 89.29%
	 Val. Loss: 0.306 |  Val. Acc: 87.45%
Epoch: 08
	Train Loss: 0.230 | Train Acc: 90.82%
	 Val. Loss: 0.314 |  Val. Acc: 86.84%
Epoch: 09
	Train Loss: 0.206 | Train Acc: 92.07%
	 Val. Loss: 0.308 |  Val. Acc: 87.39%
Epoch: 10
	Train Loss: 0.175 | Train Acc: 93.29%
	 Val. Loss: 0.333 |  Val. Acc: 86.62%
Epoch: 11
	Train Loss: 0.145 | Train Acc: 94.66%
	 Val. Loss: 0.362 |  Val. Acc: 87.07%
Epoch: 12
	Train Loss: 0.120 | T

In [None]:
from googleapiclient.discovery import build

# Replace 'YOUR_API_KEY' with your actual YouTube Data API key
API_KEY = 'YOUR_API_KEY'

# Create a YouTube Data API client
youtube = build('youtube', 'v3', developerKey=API_KEY)


In [None]:
def fetch_limited_video_comments(video_id, max_results=1000):
    comments = []
    total_fetched = 0

    nextPageToken = None

    while total_fetched < max_results:
        response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            maxResults=min(100, max_results - total_fetched),  # Fetch at most 100 comments or the remaining needed
            pageToken=nextPageToken
        ).execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)
            total_fetched += 1

        nextPageToken = response.get('nextPageToken')

        if not nextPageToken:
            break

    return comments

if __name__ == '__main__':
    # https://www.youtube.com/watch?v=VuNIsY6JdUw
    youTube_link = input('Enter YouTube video link: ')
    video_id = youTube_link.split('=')[1]
    # video_id = 'VuNIsY6JdUw'  # Replace with the actual video ID
    max_results = 10000  # Maximum number of comments to fetch

    video_comments = fetch_limited_video_comments(video_id, max_results)
    print("Total comments: ", len(video_comments))


Enter YouTube video link: https://www.youtube.com/watch?v=tSbScjc0cIk
Total comments:  115


In [None]:
def predict_sentiment(model, sentence, TEXT, device):
    model.eval()
    tokenized = [tok.text for tok in spacy_en.tokenizer(sentence)]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    length_tensor = torch.LongTensor(length)
    prediction = torch.sigmoid(model(tensor, length_tensor))
    return prediction.item()

# Input sentence for testing
test_sentence = "I don't like this movie."

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
prediction = predict_sentiment(model, test_sentence, TEXT, device)

if prediction >= 0.5:
    print("Positive sentiment")
else:
    print("Negative sentiment")

Negative sentiment


In [None]:
positive = 0
negative = 0

for comment in video_comments:
    if comment:
      prediction = predict_sentiment(model, comment, TEXT, device)
      if prediction >= 0.5:
          print('Positive: ', comment)
          positive += 1
      else:
          print('Negative: ', comment)
          negative += 1

print(positive, negative)

Negative:  This is supposed to be a bad album? I love it!
Positive:  Rather an interesting than a good experiment. Lots of good elements spread all over the album for sure, but somehow collectively, it's not the magic formula that convinces me fully.
Positive:  John Bonham on Drums.
Negative:  Essential stuff, will never be in your constant rotation. Yet, you'll be more complete as a rocker. Ssh, some consider it better than Stooges, Velvet Underground & MC5, LOL ;)
Negative:  bruh this is killer
Positive:  owned the original
Negative:  insane !!!
Negative:  Bought this when it came out   just awful
Positive:  Listen to anything by Danzig and you’ll hear how Lord Sutch’s vocals were far ahead of their time.
Positive:  He sang In the first track with jimmy page u can’t go wrong that’s fn amazing this beat almost could pass for Achilles Last stand
Positive:  No doubt jimmy page abs bonzo playing on the first track toss is awesome
Positive:  It’s typical Lord Sutch and the music is typica

In [None]:
print('positivity: ', (positive)/(positive + negative))

positivity:  0.5304347826086957
