## Getting API Keys

In [23]:
from dotenv import load_dotenv
import os

load_dotenv()

YT_KEY = os.getenv("YOUTUBE_API_KEY")

## Importing Necessary Libraries

In [24]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

## 🔑 Authenticate with YouTube Data API v3

In [25]:
YOUTUBE_API_KEY = YT_KEY
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
analyzer = SentimentIntensityAnalyzer()

## 🔥 Get Trending Videos (from YouTube)

In [26]:
def get_trending_videos(region_code='US', max_results=50):
    request = youtube.videos().list(
        part="snippet,statistics",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=max_results
    )
    response = request.execute()
    videos = [{
        'video_id': item['id'],
        'title': item['snippet']['title'],
        'channel': item['snippet']['channelTitle'],
        'description': item['snippet']['description']
    } for item in response['items']]
    return videos

## 🧠 Extract Transcripts

In [27]:
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return ' '.join([entry['text'] for entry in transcript])
    except:
        return ""

## 💬 Fetch Comments

In [28]:
def get_comments(video_id, max_comments=5):
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_comments,
            textFormat="plainText"
        )
        response = request.execute()
        if 'items' in response:
            return [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]
        else:
            return []
    except HttpError as e:
        if e.resp.status == 403:
            print(f"🚫 Comments disabled for video: {video_id}")
        else:
            print(f"⚠️ Error for video {video_id}: {e}")
        return []

## 🧪 Put It Together

In [29]:
%%time
desired_video_count = 15
collected = 0
data = []

all_videos = get_trending_videos(max_results=50)

for vid in all_videos:
    if collected >= desired_video_count:
        break

    video_id = vid['video_id']
    comments = get_comments(video_id, max_comments=50)

    if not comments:
        continue  # Skip video if no usable comments

    transcript = get_transcript(video_id)
    transcript_sentiment = analyzer.polarity_scores(transcript)['compound']

    video_url = f"https://www.youtube.com/watch?v={video_id}"

    for comment in comments:
        data.append({
            'video_title': vid['title'],
            'channel': vid['channel'],
            'comment': comment,
            'comment_sentiment': analyzer.polarity_scores(comment)['compound'],
            'transcript_sentiment': transcript_sentiment,
            'video_url': video_url  # ✅ added here
        })

    collected += 1

df = pd.DataFrame(data)
df.to_csv("../data/youtube_data.csv", index=False)
print("✅ Finished collecting data.")


✅ Finished collecting data.
CPU times: user 309 ms, sys: 28 ms, total: 337 ms
Wall time: 9.69 s


In [30]:
df.tail(15)

Unnamed: 0,video_title,channel,comment,comment_sentiment,transcript_sentiment,video_url
735,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,Slop,0.0,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
736,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,Wish they'd chosen a more interesting time per...,0.7425,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
737,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,YESSSSSSSSSSSSSSSSSSS\n\nINJECT THIS INTO MY V...,0.0,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
738,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,Yes!!,0.508,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
739,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,Is that an Umbaran that's crazy,-0.34,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
740,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,When I saw mention of an xcom-style Star Wars ...,0.6476,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
741,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,An RTS is the last thing I ever expected. Defi...,0.5093,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
742,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,What is this a mobile game? Another EA fail. ...,-0.5423,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
743,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,The United Kingdom\r\n\r\nThe UK’s Health and ...,-0.9075,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU
744,Star Wars Zero Company | Official Announce Tra...,EA Star Wars,Make Star Wars great again!,0.2003,0.0,https://www.youtube.com/watch?v=rcxnRaZ6slU


In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   video_title           750 non-null    object 
 1   channel               750 non-null    object 
 2   comment               750 non-null    object 
 3   comment_sentiment     750 non-null    float64
 4   transcript_sentiment  750 non-null    float64
 5   video_url             750 non-null    object 
dtypes: float64(2), object(4)
memory usage: 35.3+ KB


In [32]:
df["video_title"].value_counts()

video_title
Brawl Talk: A NEW BRAWLER RARITY?!                                                                      50
$1 vs $25,000 Build Challenge                                                                           50
Doechii - Anxiety (Official Video)                                                                      50
THE GASLIGHT DISTRICT: PILOT                                                                            50
Remontada épica. Abajo por 2 goles, el Barcelona ganó 4-3 a Celta de Vigo. Raphinha, héroe | La Liga    50
My Daughter Survives TEN BROTHERS                                                                       50
HIM | Official Teaser Trailer                                                                           50
Man. United vs. Lyon: Extended Highlights | UEL Quarter-final Leg 2 | CBS Sports Golazo - Europe        50
The Best Things In Life Are Unexpected... WE'RE PREGNANT!                                               50
The Fantastic Four: First

## 💾 Save to CSV

In [33]:
df.to_csv("../data/youtube_data.csv", index=False)

### 🔮 Suggested Combined Analysis for Master Notebook
**Once Reddit and YouTube data are merged:**

✅ Sentiment Comparison  
Compare Reddit vs. YouTube sentiment for the same topic.

Highlight divergence between video vs. comments sentiment.

✅ Engagement Analysis  
Plot word count vs. sentiment.  

Comment likes vs. sentiment score (for YouTube).  

✅ Timeline Tracking  
If timestamped, see how sentiment evolves over time across platforms.  

✅ Word Cloud or Topic Modeling  
Extract common themes using LDA or nltk.FreqDist.