In [None]:
import pandas as pd
import numpy as np
from googleapiclient.discovery import build
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer


In [None]:
pip install retrying

Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: retrying
Successfully installed retrying-1.3.4


In [None]:
import os
import googleapiclient.discovery
from config import API_KEY
import retrying


# Set up the API client
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

video_ids=['H3jaIhSj23E','cBpGq-vDr2Y','h89uOvUDVO4','9TX212oemWY','znG4vIsnqTw','Tuw8hxrFBH8',]

# Function to retrieve comments for a video
@retrying.retry(wait_fixed=2000, stop_max_attempt_number=3)  # Retry with 2-second delay, up to 3 times
def get_video_comments(youtube, **kwargs):
    comments = []
    results = youtube.commentThreads().list(**kwargs).execute()

    while results and len(comments) < 100:
        for item in results["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)

        # Check if there are more pages of comments
        nextPageToken = results.get("nextPageToken")
        if not nextPageToken:
            break

        # Use nextPageToken to fetch the next page of comments
        kwargs["pageToken"] = nextPageToken

    return comments

# Scrape comments for each video
all_comments = []
for video_id in video_ids:
    comments = get_video_comments(
        youtube,
        part="snippet",
        videoId=video_id,
        textFormat="plainText",
        maxResults=100
    )
    all_comments.extend(comments)

# Print the comments
for i, comment in enumerate(all_comments, start=1):
    print(f"Comment {i}: {comment}")


Comment 1: This was Nolans worst movie by far. The worst soundtrack ever - PAINFUL. A terrible terrible movie with absolutely no substance, no characters, no story, no nothing - BORING and PAINFUL. Go watch "Dr Strangelove instead".
Comment 2: Cillian Murphy is Cilling it! Cilling it!
Comment 3: There are definitely VFX shots, like the shot of rockets going through the sky, the shot of the US nuclear missiles lined up near the end, even the map being shown with cities lighting up, it's not completely VFX-free, that would be crazy.
Comment 4: How could he be viewed as a hero? He helped kill thousands of people.
Comment 5: Looked like garbage to me. Nothing but a movie stuck in the past. So tired of these type movies
Comment 6: Growing up with subtitles, I believe we have an audio edge over not having them.

For this reason I think there’s a case for non-SDH English subs for media. As a support for non-native speakers and films with… divisive mixing choices.
Comment 7: 25% of dialogue mi

In [None]:
import nltk#natural language toolkit
from nltk.sentiment.vader import SentimentIntensityAnalyzer


nltk.download('vader_lexicon')#lexicon is consist of words in it


analyzer = SentimentIntensityAnalyzer()

sentiment_list = []




for i, comment in enumerate(comments, start=1):
    sentiment_scores = analyzer.polarity_scores(comment)


    compound_score = sentiment_scores['compound']
    if compound_score >= 0.05:
        sentiment_label = "Positive"
    elif compound_score <= -0.05:
        sentiment_label = "Negative"
    else:
        sentiment_label = "Neutral"

    print(f"Comment {i}: {comment}")
    print(f"Sentiment: {sentiment_label}")
    sentiment_list.append(sentiment_label)


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Comment 1: Steve Jobs: The Exclusive Biography - https://amzn.to/3zKeTM6

Thank you so much for watching, if you enjoyed the video please leave a like and share this video with someone who could use some inspiration. God bless you all and I hope you all accomplish what you set out to do today. 😊
Sentiment: Positive
Comment 2: *I began my investment journey at the age of 27, primarily through hard work and dedication. I am to share that my passive income exceeded $100k in a single month for the first time. This success reinforces the importance of the advice mentioned earlier. It is not about achieving quick wealth, but rather ensuring long-term financial prosperity.*
Sentiment: Positive
Comment 3: Moral of the story: "Never put a baby for adoption. He could be a CEO of Apple Company".
Sentiment: Neutral
Comment 4: I dream of having my own house. 
help me realize my dream... 2202206205239390
Sentiment: Positive
Comment 5: Ajeeeee, zase jsem pinďa, co potřebuje motivaci :D

Jste legendy.

In [None]:
# Create a DataFrame
df = pd.DataFrame({'comments': comments, 'sentiment': sentiment_list})

# Label encoding
label_encoder = LabelEncoder()
df['sentiment_encoded'] = label_encoder.fit_transform(df['sentiment'])


In [None]:
df.head()

Unnamed: 0,comments,sentiment,sentiment_encoded
0,Steve Jobs: The Exclusive Biography - https://...,Positive,2
1,*I began my investment journey at the age of 2...,Positive,2
2,"Moral of the story: ""Never put a baby for adop...",Neutral,1
3,I dream of having my own house. \nhelp me real...,Positive,2
4,"Ajeeeee, zase jsem pinďa, co potřebuje motivac...",Positive,2


In [None]:
vectorizer = CountVectorizer(max_features=5000, stop_words='english')

X = vectorizer.fit_transform(df['comments']).toarray()
y = df['sentiment_encoded']


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [None]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout

model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=X_train.shape[1]))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax'))  # Three classes (negative, neutral, positive)

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7c953886aad0>

In [None]:
predictions = model.predict(X_test)

# Interpret predictions (e.g., class 0 for negative, 1 for neutral, 2 for positive)
predicted_labels = np.argmax(predictions, axis=1)




In [None]:
accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy[1]:.2f}")


Test Accuracy: 0.55


In [None]:
# Define sentiment labels
sentiment_labels = ["Negative", "Neutral", "Positive"]

# Make predictions on new comments
new_comments = ["THIS IS VERY BAD I DISLIKE IT", "This is terrible.", "It's okay."]
new_comments_vectorized = vectorizer.transform(new_comments).toarray()

predictions = model.predict(new_comments_vectorized)

# Interpret predictions and display sentiment names
predicted_labels = np.argmax(predictions, axis=1)

for i in range(len(new_comments)):
    print(f"Comment: {new_comments[i]}")
    print(f"Predicted Sentiment: {sentiment_labels[predicted_labels[i]]}")


Comment: THIS IS VERY BAD I DISLIKE IT
Predicted Sentiment: Positive
Comment: This is terrible.
Predicted Sentiment: Positive
Comment: It's okay.
Predicted Sentiment: Positive
