In [3]:
import googleapiclient.discovery
from googleapiclient.errors import HttpError
from textblob import TextBlob
import matplotlib.pyplot as plt
import pandas as pd
from langdetect import detect, LangDetectException
from datetime import datetime, timedelta

In [4]:
# Set up YouTube Data API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyAMVbpj1t3zoXDLcZYBRp_i8hrt8uAn_JY"

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

In [5]:
def search_videos(query, max_results=200):
    try:
        request = youtube.search().list(
            q=query,
            part="snippet",
            # order="relevance",
            maxResults=max_results
        )
        response = request.execute()
        
        videos = []
        for item in response['items']:
            if item['id']['kind'] == 'youtube#video':
                videos.append({
                    'videoId': item['id']['videoId'],
                    'title': item['snippet']['title'],
                    'publishedAt': item['snippet']['publishedAt']
                })
        return videos
    except HttpError as e:
        print(f"An HTTP error {e.resp.status} occurred: {e.content}")
        return []


In [6]:
def get_comments(video_id, video_title, language='en'):
    comments = []
    six_months_ago = datetime.now() - timedelta(days=180)
    
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            textFormat="plainText",
        )
        response = request.execute()
        
        while request:
            for item in response['items']:
                comment_snippet = item['snippet']['topLevelComment']['snippet']
                comment_text = comment_snippet['textOriginal']
                print(comment_snippet['publishedAt'])
                # comment_date = datetime.strptime(comment_snippet['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
                comment_date = datetime.strptime(comment_snippet['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")

                # detect language
                try: 
                    detected_language = detect(comment_text)
                except LangDetectException:
                    detected_language = None
                
                # if comment_date >= six_months_ago and 'neutrogena' in comment_text and detected_language == language:
                if comment_date >= six_months_ago and detected_language == language:
                    comments.append({
                        'Date': comment_date.strftime('%Y-%m-%d'),
                        'Title': video_title,
                        'Comment': comment_text,
                        'Url': f'https://www.youtube.com/watch?v={video_id}',
                        # 'videoID': video_id,
                        'Source': 'youtube',
                        'Language': detected_language,
                        # 'sentiment': ''
                    })
            
            if 'nextPageToken' in response:
                request = youtube.commentThreads().list(
                    part="snippet",
                    videoId=video_id,
                    pageToken=response['nextPageToken'],
                    textFormat="plainText"
                )
                response = request.execute()
            else:
                break
    except HttpError as e:
        print(f"An HTTP error {e.resp.status} occurred: {e.content}")
    
    return comments

In [7]:
def create_dataframe(comments):
    df = pd.DataFrame(comments, columns=['Date', 'Title', 'Comment', 'Url', 'Source', 'Language'])
    return df

In [8]:
query = "neutrogena"
videos = search_videos(query)
    
all_comments = []
for video in videos:
    video_comments = get_comments(video['videoId'], video['title'])
    all_comments.extend(video_comments)
    
df = create_dataframe(all_comments)

df

An HTTP error 403 occurred: b'{\n  "error": {\n    "code": 403,\n    "message": "The video identified by the \\u003ccode\\u003e\\u003ca href=\\"/youtube/v3/docs/commentThreads/list#videoId\\"\\u003evideoId\\u003c/a\\u003e\\u003c/code\\u003e parameter has disabled comments.",\n    "errors": [\n      {\n        "message": "The video identified by the \\u003ccode\\u003e\\u003ca href=\\"/youtube/v3/docs/commentThreads/list#videoId\\"\\u003evideoId\\u003c/a\\u003e\\u003c/code\\u003e parameter has disabled comments.",\n        "domain": "youtube.commentThread",\n        "reason": "commentsDisabled",\n        "location": "videoId",\n        "locationType": "parameter"\n      }\n    ]\n  }\n}\n'
2021-10-31T18:28:47Z
2024-05-19T15:36:45Z
2024-05-12T02:03:29Z
2024-05-08T09:41:51Z
2024-05-06T03:53:55Z
2024-05-04T00:04:22Z
2024-05-01T21:10:14Z
2024-05-01T21:08:22Z
2024-04-01T20:22:18Z
2024-03-10T09:48:25Z
2024-02-29T02:03:48Z
2024-02-21T02:02:55Z
2024-02-12T18:59:21Z
2024-02-03T06:38:42Z
2024-02-0

Unnamed: 0,Date,Title,Comment,Url,Source,Language
0,2024-05-19,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,So ChatGPT told me about this lol. \nBut I hav...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
1,2024-05-12,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I recently started using the water gel and I’m...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
2,2024-05-06,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,(Subbed) good info thank you.🙏🏽,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
3,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I might stick to my g&g hydration station,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
4,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I guess now i know beauty josoen glow seurm di...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
...,...,...,...,...,...,...
645,2023-12-28,How to use Neutrogena Hydro Boost Water Gel,Hy thanks for your review pls do i need to app...,https://www.youtube.com/watch?v=u7usUGrgjmY,youtube,en
646,2024-04-17,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thanks for for doing this. I found you whil...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en
647,2024-01-28,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thank you so much for the review!!! Very infor...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en
648,2024-04-21,Neutrogena Sunscreen Review,You are too slow and it's annoying. In the fir...,https://www.youtube.com/watch?v=p8u0G6I6lAo,youtube,en


In [9]:
df

Unnamed: 0,Date,Title,Comment,Url,Source,Language
0,2024-05-19,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,So ChatGPT told me about this lol. \nBut I hav...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
1,2024-05-12,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I recently started using the water gel and I’m...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
2,2024-05-06,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,(Subbed) good info thank you.🙏🏽,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
3,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I might stick to my g&g hydration station,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
4,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I guess now i know beauty josoen glow seurm di...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en
...,...,...,...,...,...,...
645,2023-12-28,How to use Neutrogena Hydro Boost Water Gel,Hy thanks for your review pls do i need to app...,https://www.youtube.com/watch?v=u7usUGrgjmY,youtube,en
646,2024-04-17,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thanks for for doing this. I found you whil...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en
647,2024-01-28,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thank you so much for the review!!! Very infor...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en
648,2024-04-21,Neutrogena Sunscreen Review,You are too slow and it's annoying. In the fir...,https://www.youtube.com/watch?v=p8u0G6I6lAo,youtube,en


In [10]:
csv_filename = 'youtube_comments.csv'
df.to_csv(csv_filename, index=False)

In [11]:
from transformers import pipeline, AutoTokenizer

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name)

def truncate_text(text, max_tokens=510):
    tokens = tokenizer.tokenize(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return tokenizer.convert_tokens_to_string(tokens)

def get_sentiment_score(text):
    truncated_text = truncate_text(text)
    result = sentiment_pipeline(truncated_text)[0]
    return result['label'], result['score']

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
# apply sentiment analysis
df['Sentiment'], df['Sentiment_Score'] = zip(*df['Comment'].apply(get_sentiment_score))

In [13]:
df

Unnamed: 0,Date,Title,Comment,Url,Source,Language,Sentiment,Sentiment_Score
0,2024-05-19,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,So ChatGPT told me about this lol. \nBut I hav...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.630520
1,2024-05-12,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I recently started using the water gel and I’m...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.971291
2,2024-05-06,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,(Subbed) good info thank you.🙏🏽,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.972633
3,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I might stick to my g&g hydration station,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,neutral,0.784086
4,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I guess now i know beauty josoen glow seurm di...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,negative,0.886157
...,...,...,...,...,...,...,...,...
645,2023-12-28,How to use Neutrogena Hydro Boost Water Gel,Hy thanks for your review pls do i need to app...,https://www.youtube.com/watch?v=u7usUGrgjmY,youtube,en,neutral,0.537940
646,2024-04-17,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thanks for for doing this. I found you whil...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en,positive,0.894170
647,2024-01-28,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thank you so much for the review!!! Very infor...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en,positive,0.986510
648,2024-04-21,Neutrogena Sunscreen Review,You are too slow and it's annoying. In the fir...,https://www.youtube.com/watch?v=p8u0G6I6lAo,youtube,en,negative,0.914460


# Sentiment Analysis by Week #

In [14]:
!pip install nbformat --upgrade
import plotly.graph_objects as go

# Convert the 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Group by week and sentiment
df['Week'] = df['Date'].dt.to_period('W').apply(lambda r: r.start_time)
weekly_sentiment = df.groupby(['Week', 'Sentiment']).size().unstack(fill_value=0)

# Create the plot
fig = go.Figure()

# Add positive sentiments
if 'positive' in weekly_sentiment:
    fig.add_trace(go.Bar(
        x=weekly_sentiment.index,
        y=weekly_sentiment['positive'],
        name='Positive',
        marker_color='green'
    ))

# Add neutral sentiments
if 'neutral' in weekly_sentiment:
    fig.add_trace(go.Bar(
        x=weekly_sentiment.index,
        y=weekly_sentiment['neutral'],
        name='Neutral',
        marker_color='orange'
    ))

# Add negative sentiments (inverted to point downwards)
if 'negative' in weekly_sentiment:
    fig.add_trace(go.Bar(
        x=weekly_sentiment.index,
        y=-weekly_sentiment['negative'],
        name='Negative',
        marker_color='red'
    ))

# Update layout
fig.update_layout(
    title='Sentiment Over Time',
    xaxis_title='Time',
    yaxis_title='Count',
    barmode='group',
    bargap=0.2,
    bargroupgap=0.1,
    legend_title_text='Sentiment'
)

# Show the plot
fig.show()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


You should consider upgrading via the '/Users/syahrezapratama/.pyenv/versions/3.9.10/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [15]:
df

Unnamed: 0,Date,Title,Comment,Url,Source,Language,Sentiment,Sentiment_Score,Week
0,2024-05-19,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,So ChatGPT told me about this lol. \nBut I hav...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.630520,2024-05-13
1,2024-05-12,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I recently started using the water gel and I’m...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.971291,2024-05-06
2,2024-05-06,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,(Subbed) good info thank you.🙏🏽,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.972633,2024-05-06
3,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I might stick to my g&g hydration station,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,neutral,0.784086,2024-04-29
4,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I guess now i know beauty josoen glow seurm di...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,negative,0.886157,2024-04-29
...,...,...,...,...,...,...,...,...,...
645,2023-12-28,How to use Neutrogena Hydro Boost Water Gel,Hy thanks for your review pls do i need to app...,https://www.youtube.com/watch?v=u7usUGrgjmY,youtube,en,neutral,0.537940,2023-12-25
646,2024-04-17,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thanks for for doing this. I found you whil...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en,positive,0.894170,2024-04-15
647,2024-01-28,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thank you so much for the review!!! Very infor...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en,positive,0.986510,2024-01-22
648,2024-04-21,Neutrogena Sunscreen Review,You are too slow and it's annoying. In the fir...,https://www.youtube.com/watch?v=p8u0G6I6lAo,youtube,en,negative,0.914460,2024-04-15


# Sentiment Analysis by Day #

In [16]:
# Group by day and sentiment
df['Day'] = df['Date'].dt.to_period('D').apply(lambda r: r.start_time)
daily_sentiment = df.groupby(['Day', 'Sentiment']).size().unstack(fill_value=0)

# Create the plot
fig = go.Figure()

# Add positive sentiments
if 'positive' in daily_sentiment:
    fig.add_trace(go.Bar(
        x=daily_sentiment.index,
        y=daily_sentiment['positive'],
        name='Positive',
        marker_color='green'
    ))

# Add neutral sentiments
if 'neutral' in daily_sentiment:
    fig.add_trace(go.Bar(
        x=daily_sentiment.index,
        y=daily_sentiment['neutral'],
        name='Neutral',
        marker_color='orange'
    ))

# Add negative sentiments (inverted to point downwards)
if 'negative' in daily_sentiment:
    fig.add_trace(go.Bar(
        x=daily_sentiment.index,
        y=-daily_sentiment['negative'],
        name='Negative',
        marker_color='red'
    ))

# Update layout
fig.update_layout(
    title='Daily Sentiment Over Time',
    xaxis_title='Time',
    yaxis_title='Count',
    barmode='group',  # Change to group bars side by side
    bargap=0.2,
    bargroupgap=0.1,
    legend_title_text='Sentiment',
)

# Show the plot
fig.show()

In [17]:
df

Unnamed: 0,Date,Title,Comment,Url,Source,Language,Sentiment,Sentiment_Score,Week,Day
0,2024-05-19,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,So ChatGPT told me about this lol. \nBut I hav...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.630520,2024-05-13,2024-05-19
1,2024-05-12,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I recently started using the water gel and I’m...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.971291,2024-05-06,2024-05-12
2,2024-05-06,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,(Subbed) good info thank you.🙏🏽,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,positive,0.972633,2024-05-06,2024-05-06
3,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I might stick to my g&g hydration station,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,neutral,0.784086,2024-04-29,2024-05-01
4,2024-05-01,Dr. Sugai Compares: Neutrogena Hydro Boost Wat...,I guess now i know beauty josoen glow seurm di...,https://www.youtube.com/watch?v=b3ab3eAnxZw,youtube,en,negative,0.886157,2024-04-29,2024-05-01
...,...,...,...,...,...,...,...,...,...,...
645,2023-12-28,How to use Neutrogena Hydro Boost Water Gel,Hy thanks for your review pls do i need to app...,https://www.youtube.com/watch?v=u7usUGrgjmY,youtube,en,neutral,0.537940,2023-12-25,2023-12-28
646,2024-04-17,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thanks for for doing this. I found you whil...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en,positive,0.894170,2024-04-15,2024-04-17
647,2024-01-28,Sunscreen Showdown | WATCH BEFORE YOU BUY ⚠️ S...,Thank you so much for the review!!! Very infor...,https://www.youtube.com/watch?v=EQTLIAJsBmI,youtube,en,positive,0.986510,2024-01-22,2024-01-28
648,2024-04-21,Neutrogena Sunscreen Review,You are too slow and it's annoying. In the fir...,https://www.youtube.com/watch?v=p8u0G6I6lAo,youtube,en,negative,0.914460,2024-04-15,2024-04-21


In [18]:
csv_filename = 'youtube_comments_with_sentiment.csv'
df.to_csv(csv_filename, index=False)