## Getting API Keys

In [12]:
from dotenv import load_dotenv
import os

load_dotenv()

YT_KEY = os.getenv("YOUTUBE_API_KEY")

## Importing Necessary Libraries

In [13]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

## 🔑 Authenticate with YouTube Data API v3

In [14]:
YOUTUBE_API_KEY = YT_KEY
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
analyzer = SentimentIntensityAnalyzer()

## 🔥 Get Trending Videos (from YouTube)

In [15]:
def get_trending_videos(region_code='US', max_results=50):
    request = youtube.videos().list(
        part="snippet,statistics",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=max_results
    )
    response = request.execute()
    videos = [{
        'video_id': item['id'],
        'title': item['snippet']['title'],
        'channel': item['snippet']['channelTitle'],
        'description': item['snippet']['description']
    } for item in response['items']]
    return videos

## 🧠 Extract Transcripts

In [16]:
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return ' '.join([entry['text'] for entry in transcript])
    except:
        return ""

## 💬 Fetch Comments

In [17]:
def get_comments(video_id, max_comments=5):
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_comments,
            textFormat="plainText"
        )
        response = request.execute()
        if 'items' in response:
            return [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]
        else:
            return []
    except HttpError as e:
        if e.resp.status == 403:
            print(f"🚫 Comments disabled for video: {video_id}")
        else:
            print(f"⚠️ Error for video {video_id}: {e}")
        return []

## 🧪 Put It Together

In [None]:
%%time
desired_video_count = 15
collected = 0
data = []

all_videos = get_trending_videos(max_results=50)

for vid in all_videos:
    if collected >= desired_video_count:
        break

    video_id = vid['video_id']
    comments = get_comments(video_id, max_comments=50)

    if not comments:
        continue  # Skip video if no usable comments

    transcript = get_transcript(video_id)
    transcript_sentiment = analyzer.polarity_scores(transcript)['compound']

    video_url = f"https://www.youtube.com/watch?v={video_id}"

    for comment in comments:
        data.append({
            'video_title': vid['title'],
            'channel': vid['channel'],
            'comment': comment,
            'comment_sentiment': analyzer.polarity_scores(comment)['compound'],
            'transcript_sentiment': transcript_sentiment,
            'video_url': video_url  # ✅ added here
        })

    collected += 1

df = pd.DataFrame(data)
df.to_csv("../data/youtube_data.csv", index=False)
print("✅ Finished collecting data.")


✅ Finished collecting data.
CPU times: user 249 ms, sys: 34.8 ms, total: 284 ms
Wall time: 10.2 s


In [19]:
df.tail(15)

Unnamed: 0,video_title,channel,comment,comment_sentiment,transcript_sentiment,video_url
59,EBK Jaaybo - Homebody (Official Music Video),EBK Jaaybo,Gzz,0.0,0.0,https://www.youtube.com/watch?v=sI0VDF65Cac
60,EBK Jaaybo - Homebody (Official Music Video),EBK Jaaybo,Standover mUsik nexr,0.0,0.0,https://www.youtube.com/watch?v=sI0VDF65Cac
61,EBK Jaaybo - Homebody (Official Music Video),EBK Jaaybo,GOOD MIX . KEEP ENGINEER,0.5622,0.0,https://www.youtube.com/watch?v=sI0VDF65Cac
62,EBK Jaaybo - Homebody (Official Music Video),EBK Jaaybo,Gzz,0.0,0.0,https://www.youtube.com/watch?v=sI0VDF65Cac
63,EBK Jaaybo - Homebody (Official Music Video),EBK Jaaybo,this nigga on a gen run like yb every song top...,0.5106,0.0,https://www.youtube.com/watch?v=sI0VDF65Cac
64,아스트로(ASTRO) - 꿈속의 문(Moon) : Memory of the Moon,ASTRO 아스트로,😢😢😢😢😘😘,-0.7783,0.0,https://www.youtube.com/watch?v=UUFJlUy5aA4
65,아스트로(ASTRO) - 꿈속의 문(Moon) : Memory of the Moon,ASTRO 아스트로,La cancion es increible u toca el corazon de u...,0.0,0.0,https://www.youtube.com/watch?v=UUFJlUy5aA4
66,아스트로(ASTRO) - 꿈속의 문(Moon) : Memory of the Moon,ASTRO 아스트로,Beautiful song! I cried like a baby and felt t...,0.9686,0.0,https://www.youtube.com/watch?v=UUFJlUy5aA4
67,아스트로(ASTRO) - 꿈속의 문(Moon) : Memory of the Moon,ASTRO 아스트로,I've been an Astro fan for a lot of years. I ...,-0.8591,0.0,https://www.youtube.com/watch?v=UUFJlUy5aA4
68,아스트로(ASTRO) - 꿈속의 문(Moon) : Memory of the Moon,ASTRO 아스트로,This song was soo strong.... I am not an Astro...,0.9323,0.0,https://www.youtube.com/watch?v=UUFJlUy5aA4


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74 entries, 0 to 73
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   video_title           74 non-null     object 
 1   channel               74 non-null     object 
 2   comment               74 non-null     object 
 3   comment_sentiment     74 non-null     float64
 4   transcript_sentiment  74 non-null     float64
 5   video_url             74 non-null     object 
dtypes: float64(2), object(4)
memory usage: 3.6+ KB


In [21]:
df["video_title"].value_counts()

video_title
Brawl Talk: A NEW BRAWLER RARITY?!                                                                      5
$1 vs $25,000 Build Challenge                                                                           5
Doechii - Anxiety (Official Video)                                                                      5
Remontada épica. Abajo por 2 goles, el Barcelona ganó 4-3 a Celta de Vigo. Raphinha, héroe | La Liga    5
My Daughter Survives TEN BROTHERS                                                                       5
HIM | Official Teaser Trailer                                                                           5
Man. United vs. Lyon: Extended Highlights | UEL Quarter-final Leg 2 | CBS Sports Golazo - Europe        5
Telling My Husband I'm Pregnant                                                                         5
The Best Things In Life Are Unexpected... WE'RE PREGNANT!                                               5
The Fantastic Four: First Steps | 

## 💾 Save to CSV

In [22]:
df.to_csv("../data/youtube_data.csv", index=False)

### 🔮 Suggested Combined Analysis for Master Notebook
**Once Reddit and YouTube data are merged:**

✅ Sentiment Comparison  
Compare Reddit vs. YouTube sentiment for the same topic.

Highlight divergence between video vs. comments sentiment.

✅ Engagement Analysis  
Plot word count vs. sentiment.  

Comment likes vs. sentiment score (for YouTube).  

✅ Timeline Tracking  
If timestamped, see how sentiment evolves over time across platforms.  

✅ Word Cloud or Topic Modeling  
Extract common themes using LDA or nltk.FreqDist.