In [2]:
import googleapiclient.discovery
from googleapiclient.errors import HttpError
from textblob import TextBlob
import matplotlib.pyplot as plt
import pandas as pd
from langdetect import detect, LangDetectException

In [3]:
# Set up YouTube Data API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyAMVbpj1t3zoXDLcZYBRp_i8hrt8uAn_JY"

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

In [4]:
from datetime import datetime, timedelta

# setting timeframe
six_months_ago = datetime.now() - timedelta(days=180)

In [5]:
# get channel metadata
def get_channel_metadata(channel_name):
    request = youtube.search().list(
        q=channel_name,
        part="snippet",
        type="channel"
    )
    response = request.execute()
    # print(response)
    
    for item in response['items']:
        if item['snippet']['title'].lower() == channel_name.lower():
            # return item['snippet']['channelId']
            return item['snippet']
    
    raise ValueError("Channel not found")

In [6]:
def get_latest_videos(query, max_results=2):
    request = youtube.search().list(
        q=query,
        # channelId=channel_id,
        part="snippet",
        order="relevance",
        maxResults=max_results,
        # q=search_query
    )
    response = request.execute()
    print('response', response)
    
    videos = []
    for item in response['items']:
        if item['id']['kind'] == 'youtube#video':
            videos.append({
            'videoId': item['id']['videoId'],
            'title': item['snippet']['title'],
            'publishedAt': item['snippet']['publishedAt']
        })
    
    return videos


In [7]:
data = get_latest_videos('neutrogena')

response {'kind': 'youtube#searchListResponse', 'etag': '4UWeNH3EhhZ4RT5ccqlcF-pJ7D4', 'nextPageToken': 'CAIQAA', 'regionCode': 'DE', 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 2}, 'items': [{'kind': 'youtube#searchResult', 'etag': 'uoUMzsVj3yOXh_b8FQYxfn65xYo', 'id': {'kind': 'youtube#channel', 'channelId': 'UCNGr_3LSXp3KL55jZW7Pvhg'}, 'snippet': {'publishedAt': '2011-05-05T20:05:45Z', 'channelId': 'UCNGr_3LSXp3KL55jZW7Pvhg', 'title': 'Neutrogena', 'description': 'Welcome to the Neutrogena® YouTube Channel, where you can find anything from makeup tutorials to beauty guru reviews.', 'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/ytc/AIdro_nrCCIwr97K0vWpJwrzubNlb2B9pSCjPvnqQCPvn-3TJ20=s88-c-k-c0xffffffff-no-rj-mo'}, 'medium': {'url': 'https://yt3.ggpht.com/ytc/AIdro_nrCCIwr97K0vWpJwrzubNlb2B9pSCjPvnqQCPvn-3TJ20=s240-c-k-c0xffffffff-no-rj-mo'}, 'high': {'url': 'https://yt3.ggpht.com/ytc/AIdro_nrCCIwr97K0vWpJwrzubNlb2B9pSCjPvnqQCPvn-3TJ20=s800-c-k-c0xffffffff-no-rj-

In [8]:
def get_comments(video_id, max_results=50):
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        # maxResults=max_results,
        textFormat="plainText",
    )
    try:
        response = request.execute()
        print(response)
    except HttpError as e:
        if e.resp.status == 403:
            error_details = e.error_details
            for detail in error_details:
                if detail.get('reason') == 'commentsDisabled':
                    print(f"Comments are disabled for video: {video_id}")
                    return comments
        else:
            raise e
    
    # while request and len(comments) < max_results:
    while request:
        for item in response['items']:
            comment_snippet = item['snippet']['topLevelComment']['snippet']
            comment_text = comment_snippet['textOriginal']
            comment_date = datetime.strptime(comment_snippet['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
            if comment_date >= six_months_ago and 'neutrogena' in comment_text:
                comments.append(comment_text)
            # comments.append(comment_text)
            # language = TextBlob(comment).detect_language()
            # if language == 'en':
            #     comments.append(comment)
        
        if 'nextPageToken' in response:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                # maxResults=max_results - len(comments),
                pageToken=response['nextPageToken'],
                textFormat="plainText"
            )
            response = request.execute()
        else:
            break
    
    return comments


In [9]:
neutrogena_comment = get_comments('GJOHSaNyb4E')
neutrogena_comment

{'kind': 'youtube#commentThreadListResponse', 'etag': 'ZItkK3Y4muKZRlhWd9wRlnXZ1FM', 'nextPageToken': 'Z2V0X25ld2VzdF9maXJzdC0tQ2dnSWdBUVZGN2ZST0JJRkNKMGdHQUVTQlFpSUlCZ0FFZ1VJaHlBWUFCSUZDSWtnR0FBU0JRaW9JQmdBR0FBaURnb01DS0Q5dmJFR0VPQ1U5NVVC', 'pageInfo': {'totalResults': 20, 'resultsPerPage': 20}, 'items': [{'kind': 'youtube#commentThread', 'etag': 'f8M_NkxIzENF8k94BwFVXKH0I9Q', 'id': 'UgwamWHFztZueyn8jGZ4AaABAg', 'snippet': {'channelId': 'UCnxmUrGMtpQT844Yd_l7Zyg', 'videoId': 'GJOHSaNyb4E', 'topLevelComment': {'kind': 'youtube#comment', 'etag': 'Z5VaN6X2M6ljehAYDUdbdSyGZTw', 'id': 'UgwamWHFztZueyn8jGZ4AaABAg', 'snippet': {'channelId': 'UCnxmUrGMtpQT844Yd_l7Zyg', 'videoId': 'GJOHSaNyb4E', 'textDisplay': 'Which one of these would you recommend for someone with seborrheic dermatitis?', 'textOriginal': 'Which one of these would you recommend for someone with seborrheic dermatitis?', 'authorDisplayName': '@elizabethhunt5382', 'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AIdro_n5jhBMg

["Hello , I noticed that in the neutrogena hydro boost gel, the bigger tube's 1.7 do have the ingredient blue 1 while the smaller ones do not.why is that? Same with the hydro body gel and cream",
 'the new formula of neutrogena hydro boost water gel and gel cream both have tocopherol and citric acid🙂 these breakout my sensitive  combination-acne prone skin. The aveeno calm+restore oat gel moisturizer also re-formulated their moisturizer last year and added citric acid and tocopherol🙂\n\n@Dr Dray please recommend some good non-comedogenic moisturizers withoout citric acid,tocopherol and any sort of oils😭',
 'I love neutrogena skincare! They also discontinued the Daily moisturizer with AHA and Spf 15...heartbroken!',
 'I picked up the water gel randomly as my first moisturizer. I LOVE the way this feels when I put it on! I get so excited. It’s refreshing. \n\nIdk if it’s because I’m 36 or if I have some issue, but in the past year my forehead has developed wrinkles and I now have mouth l

In [10]:
len(neutrogena_comment)

10

In [21]:
def search_videos(query, max_results=200):
    try:
        request = youtube.search().list(
            q=query,
            part="snippet",
            # order="relevance",
            maxResults=max_results
        )
        response = request.execute()
        
        videos = []
        for item in response['items']:
            if item['id']['kind'] == 'youtube#video':
                videos.append({
                    'videoId': item['id']['videoId'],
                    'title': item['snippet']['title'],
                    'publishedAt': item['snippet']['publishedAt']
                })
        return videos
    except HttpError as e:
        print(f"An HTTP error {e.resp.status} occurred: {e.content}")
        return []


In [22]:
def get_comments(video_id, video_title, language='en'):
    comments = []
    six_months_ago = datetime.now() - timedelta(days=180)
    
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            textFormat="plainText",
        )
        response = request.execute()
        
        while request:
            for item in response['items']:
                comment_snippet = item['snippet']['topLevelComment']['snippet']
                comment_text = comment_snippet['textOriginal']
                comment_date = datetime.strptime(comment_snippet['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")

                # detect language
                try: 
                    detected_language = detect(comment_text)
                except LangDetectException:
                    detected_language = None
                
                # if comment_date >= six_months_ago and 'neutrogena' in comment_text and detected_language == language:
                if comment_date >= six_months_ago and detected_language == language:
                    comments.append({
                        'Date': comment_date.strftime('%Y-%m-%d'),
                        'Title': video_title,
                        'Comment': comment_text,
                        'Url': f'https://www.youtube.com/watch?v={video_id}',
                        # 'videoID': video_id,
                        'Source': 'youtube',
                        'Language': detected_language,
                        # 'sentiment': ''
                    })
            
            if 'nextPageToken' in response:
                request = youtube.commentThreads().list(
                    part="snippet",
                    videoId=video_id,
                    pageToken=response['nextPageToken'],
                    textFormat="plainText"
                )
                response = request.execute()
            else:
                break
    except HttpError as e:
        print(f"An HTTP error {e.resp.status} occurred: {e.content}")
    
    return comments

In [25]:
def create_dataframe(comments):
    df = pd.DataFrame(comments, columns=['Date', 'Title', 'Comment', 'Url', 'Source', 'Language'])
    return df

In [26]:
query = "neutrogena"
videos = search_videos(query)
    
all_comments = []
for video in videos:
    video_comments = get_comments(video['videoId'], video['title'])
    all_comments.extend(video_comments)
    
df = create_dataframe(all_comments)

df

An HTTP error 403 occurred: b'{\n  "error": {\n    "code": 403,\n    "message": "The video identified by the \\u003ccode\\u003e\\u003ca href=\\"/youtube/v3/docs/commentThreads/list#videoId\\"\\u003evideoId\\u003c/a\\u003e\\u003c/code\\u003e parameter has disabled comments.",\n    "errors": [\n      {\n        "message": "The video identified by the \\u003ccode\\u003e\\u003ca href=\\"/youtube/v3/docs/commentThreads/list#videoId\\"\\u003evideoId\\u003c/a\\u003e\\u003c/code\\u003e parameter has disabled comments.",\n        "domain": "youtube.commentThread",\n        "reason": "commentsDisabled",\n        "location": "videoId",\n        "locationType": "parameter"\n      }\n    ]\n  }\n}\n'
An HTTP error 403 occurred: b'{\n  "error": {\n    "code": 403,\n    "message": "The video identified by the \\u003ccode\\u003e\\u003ca href=\\"/youtube/v3/docs/commentThreads/list#videoId\\"\\u003evideoId\\u003c/a\\u003e\\u003c/code\\u003e parameter has disabled comments.",\n    "errors": [\n      {\n

Unnamed: 0,Date,Title,Comment,Url,Source,Language
0,2024-04-21,Neutrogena ultra sheer dry touch sunblock suns...,Yeh review hua ya sunscreen application guide....,https://www.youtube.com/watch?v=hFgNhm862ow,youtube,en
1,2024-01-16,Neutrogena ultra sheer dry touch sunblock suns...,They have increased their price .. I have swit...,https://www.youtube.com/watch?v=hFgNhm862ow,youtube,en
2,2024-03-23,"In one week, Jennifer Garner&#39;s Skin Transf...",Why is the product being marketed to very youn...,https://www.youtube.com/watch?v=n714NHr7neI,youtube,en
3,2024-06-08,BEST AND WORST OF NEUTROGENA #shorts,Neutrogena gel cream tried it hate its absolut...,https://www.youtube.com/watch?v=asKKlL027CU,youtube,en
4,2024-05-22,BEST AND WORST OF NEUTROGENA #shorts,What about sunscreen form Neutrogena,https://www.youtube.com/watch?v=asKKlL027CU,youtube,en
...,...,...,...,...,...,...
551,2024-05-02,Neutrogena Retinol Serum | Our Point Of View,Is this serum oily?,https://www.youtube.com/watch?v=nRqIExW_44Q,youtube,en
552,2024-05-31,DOCTOR V Reviews NEUTROGENA for skin of colour...,But some says that dimethicone isn't good for ...,https://www.youtube.com/watch?v=AtGl9vj5ijE,youtube,en
553,2024-04-06,DOCTOR V Reviews NEUTROGENA for skin of colour...,I tried had massive breakouts,https://www.youtube.com/watch?v=AtGl9vj5ijE,youtube,en
554,2024-03-14,DOCTOR V Reviews NEUTROGENA for skin of colour...,Skin color has nothing to do with race.Caucasi...,https://www.youtube.com/watch?v=AtGl9vj5ijE,youtube,en


In [27]:
csv_filename = 'youtube_comments.csv'
df.to_csv(csv_filename, index=False)

### the code below this is currently not being used for creating the csv file ###

In [15]:
df

Unnamed: 0,date,text,source,videoID,videoURL,language,sentiment
0,2024-03-23,Why is the product being marketed to very youn...,youtube,n714NHr7neI,https://www.youtube.com/watch?v=n714NHr7neI,en,
1,2024-06-08,Neutrogena gel cream tried it hate its absolut...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
2,2024-05-22,What about sunscreen form Neutrogena,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
3,2024-05-21,Their deep clean face wash is the best. I can ...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
4,2024-05-18,Yes I agree that this should be a series with ...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
...,...,...,...,...,...,...,...
618,2024-02-18,how much it iss ?,youtube,p8u0G6I6lAo,https://www.youtube.com/watch?v=p8u0G6I6lAo,en,
619,2023-12-17,White cast Sunscreens look great under foundat...,youtube,p8u0G6I6lAo,https://www.youtube.com/watch?v=p8u0G6I6lAo,en,
620,2024-02-27,You’re out of your depth,youtube,PHmKQ6J8IHg,https://www.youtube.com/watch?v=PHmKQ6J8IHg,en,
621,2024-04-17,Thanks for for doing this. I found you whil...,youtube,EQTLIAJsBmI,https://www.youtube.com/watch?v=EQTLIAJsBmI,en,


In [16]:
start_date = '2024-01-01'
end_date = '2024-06-01'

filtered_df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
filtered_df

Unnamed: 0,date,text,source,videoID,videoURL,language,sentiment
0,2024-03-23,Why is the product being marketed to very youn...,youtube,n714NHr7neI,https://www.youtube.com/watch?v=n714NHr7neI,en,
2,2024-05-22,What about sunscreen form Neutrogena,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
3,2024-05-21,Their deep clean face wash is the best. I can ...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
4,2024-05-18,Yes I agree that this should be a series with ...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
5,2024-05-18,What about Witch Hazel astringent,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,
...,...,...,...,...,...,...,...
617,2024-04-21,You are too slow and it's annoying. In the fir...,youtube,p8u0G6I6lAo,https://www.youtube.com/watch?v=p8u0G6I6lAo,en,
618,2024-02-18,how much it iss ?,youtube,p8u0G6I6lAo,https://www.youtube.com/watch?v=p8u0G6I6lAo,en,
620,2024-02-27,You’re out of your depth,youtube,PHmKQ6J8IHg,https://www.youtube.com/watch?v=PHmKQ6J8IHg,en,
621,2024-04-17,Thanks for for doing this. I found you whil...,youtube,EQTLIAJsBmI,https://www.youtube.com/watch?v=EQTLIAJsBmI,en,


In [17]:
from transformers import pipeline, AutoTokenizer

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
def truncate_text(text, max_tokens=510):
    tokens = tokenizer.tokenize(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return tokenizer.convert_tokens_to_string(tokens)

def get_sentiment_score(text):
    truncated_text = truncate_text(text)
    result = sentiment_pipeline(truncated_text)[0]
    return result['label'], result['score']

In [19]:
df['sentiment'], df['sentiment_score'] = zip(*df['text'].apply(get_sentiment_score))

Token indices sequence length is longer than the specified maximum sequence length for this model (612 > 512). Running this sequence through the model will result in indexing errors


In [20]:
df

Unnamed: 0,date,text,source,videoID,videoURL,language,sentiment,sentiment_score
0,2024-03-23,Why is the product being marketed to very youn...,youtube,n714NHr7neI,https://www.youtube.com/watch?v=n714NHr7neI,en,NEGATIVE,0.998036
1,2024-06-08,Neutrogena gel cream tried it hate its absolut...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,NEGATIVE,0.999554
2,2024-05-22,What about sunscreen form Neutrogena,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,NEGATIVE,0.996303
3,2024-05-21,Their deep clean face wash is the best. I can ...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,POSITIVE,0.999797
4,2024-05-18,Yes I agree that this should be a series with ...,youtube,asKKlL027CU,https://www.youtube.com/watch?v=asKKlL027CU,en,POSITIVE,0.932193
...,...,...,...,...,...,...,...,...
618,2024-02-18,how much it iss ?,youtube,p8u0G6I6lAo,https://www.youtube.com/watch?v=p8u0G6I6lAo,en,NEGATIVE,0.981490
619,2023-12-17,White cast Sunscreens look great under foundat...,youtube,p8u0G6I6lAo,https://www.youtube.com/watch?v=p8u0G6I6lAo,en,POSITIVE,0.998665
620,2024-02-27,You’re out of your depth,youtube,PHmKQ6J8IHg,https://www.youtube.com/watch?v=PHmKQ6J8IHg,en,POSITIVE,0.967587
621,2024-04-17,Thanks for for doing this. I found you whil...,youtube,EQTLIAJsBmI,https://www.youtube.com/watch?v=EQTLIAJsBmI,en,POSITIVE,0.998847
