In [37]:
import googleapiclient.discovery
import googleapiclient.errors
import pandas as pd
import re
import string

# Import API key from config.py
from config import API_KEY

In [46]:
api_service_name = "youtube"
api_version = "v3"

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=API_KEY)

request = youtube.commentThreads().list(
    part="snippet",
    videoId="pL064KP46UE",
    maxResults=100
)
response = request.execute()

comments = []

for item in response['items']:
    comment_text = item['snippet']['topLevelComment']['snippet']['textDisplay']
    comments.append(comment_text)

df = pd.DataFrame(comments, columns=['comment'])

# df.to_csv('comments.csv', index=False)

df.head(100)

Unnamed: 0,comment
0,Test your Office know-how with The Office Holi...
1,&quot;I threw the keys in out of anger&quot; <...
2,phyllis snorting &amp; dwight opening the door...
3,Tho half of this actually made the final cut ..
4,"<a href=""https://www.youtube.com/watch?v=pL064..."
...,...
95,I threw my keys in out of anger
96,When Creed says does it have to be clean..... ...
97,"<a href=""https://www.youtube.com/watch?v=pL064..."
98,That toilet mesh invention needs to be a real ...


In [39]:
# data type of the dataframe
print(df.dtypes)

comment    object
dtype: object


In [40]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to /Users/aazain/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/aazain/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/aazain/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [41]:
def preprocess_text(text):
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)

    # Convert text to lowercase
    text = text.lower()

    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]

    return ' '.join(filtered_text)

In [42]:
# Apply preprocessing to each comment
df['processed_comment'] = df['comment'].apply(preprocess_text)

In [43]:
def analyze_sentiment_vader(comment):
    sia = SentimentIntensityAnalyzer()
    score = sia.polarity_scores(comment)
    compound = score['compound']
    if compound >= 0.05:
        return 'Positive'
    elif compound <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'


In [44]:
# Apply sentiment analysis on the processed comments
df['sentiment'] = df['processed_comment'].apply(analyze_sentiment_vader)

# Display the DataFrame
display(df.head(10))


Unnamed: 0,comment,processed_comment,sentiment
0,Test your Office know-how with The Office Holi...,test office knowhow office holiday quiz premie...,Positive
1,&quot;I threw the keys in out of anger&quot; <...,quoti threw keys angerquot killed 😂,Negative
2,phyllis snorting &amp; dwight opening the door...,phyllis snorting amp dwight opening door w but...,Neutral
3,Tho half of this actually made the final cut ..,tho half actually made final cut,Negative
4,"<a href=""https://www.youtube.com/watch?v=pL064...",141 love dwight exhibits objects like art gall...,Positive
5,what do you mean so we don&#39;t have to?..the...,mean don39t tothese good,Positive
6,How is Michael more mature in the deleted scenes,michael mature deleted scenes,Positive
7,Why were these scenes deleted is all I want to...,scenes deleted want knowanyone,Positive
8,Not Michael calling Pam pervy.. 😭😂,michael calling pam pervy 😭😂,Neutral
9,"I love the self awareness @<a href=""https://ww...",love self awareness 412 🤣,Positive
