## Getting API Keys

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

YT_KEY = os.getenv("YOUTUBE_API_KEY")



## Importing Necessary Libraries

In [None]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

## 🔑 Authenticate with YouTube Data API v3

In [None]:
YOUTUBE_API_KEY = 'AIzaSyD0Lm4K9FMKlYmHDKHGi0hRhFi1Rk2jYUc'  # <-- Replace with your API key
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
analyzer = SentimentIntensityAnalyzer()

## 🔥 Get Trending Videos (from YouTube)

In [None]:
def get_trending_videos(region_code='US', max_results=50):
    request = youtube.videos().list(
        part="snippet,statistics",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=max_results
    )
    response = request.execute()
    videos = [{
        'video_id': item['id'],
        'title': item['snippet']['title'],
        'channel': item['snippet']['channelTitle'],
        'description': item['snippet']['description']
    } for item in response['items']]
    return videos

## 🧠 Extract Transcripts

In [None]:
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return ' '.join([entry['text'] for entry in transcript])
    except:
        return ""

## 💬 Fetch Comments

In [None]:
def get_comments(video_id, max_comments=5):
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_comments,
            textFormat="plainText"
        )
        response = request.execute()
        if 'items' in response:
            return [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]
        else:
            return []
    except HttpError as e:
        if e.resp.status == 403:
            print(f"🚫 Comments disabled for video: {video_id}")
        else:
            print(f"⚠️ Error for video {video_id}: {e}")
        return []

## 🧪 Put It Together

In [None]:
%%time
desired_video_count = 5
collected = 0
data = []

all_videos = get_trending_videos(max_results=50)

for vid in all_videos:
    if collected >= desired_video_count:
        break

    video_id = vid['video_id']
    comments = get_comments(video_id, max_comments=5)

    if not comments:
        continue  # Skip video if no usable comments

    transcript = get_transcript(video_id)
    transcript_sentiment = analyzer.polarity_scores(transcript)['compound']

    for comment in comments:
        data.append({
            'video_title': vid['title'],
            'channel': vid['channel'],
            'comment': comment,
            'comment_sentiment': analyzer.polarity_scores(comment)['compound'],
            'transcript_sentiment': transcript_sentiment
        })

    collected += 1

# Converting the data to DataFrame
df = pd.DataFrame(data)
print("✅ Finished collecting data.")

✅ Finished collecting data.
CPU times: user 3.76 s, sys: 53.2 ms, total: 3.82 s
Wall time: 11.3 s


In [None]:
df.head()

Unnamed: 0,video_title,channel,comment,comment_sentiment,transcript_sentiment
0,Doechii - Anxiety (Official Video),IamdoechiiVEVO,آقا جهانی شدیم❤❤❤❤❤❤,0.9803,-0.9945
1,Doechii - Anxiety (Official Video),IamdoechiiVEVO,"Your videos are always so helpful, thank you!",0.7708,-0.9945
2,Doechii - Anxiety (Official Video),IamdoechiiVEVO,Just a cheap version of somebody thay I used t...,0.0,-0.9945
3,Doechii - Anxiety (Official Video),IamdoechiiVEVO,I am saying now. Doechii will be part of the E...,0.0,-0.9945
4,Doechii - Anxiety (Official Video),IamdoechiiVEVO,Her white socks getting dirty from dancing on ...,-0.34,-0.9945


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   video_title           25 non-null     object 
 1   channel               25 non-null     object 
 2   comment               25 non-null     object 
 3   comment_sentiment     25 non-null     float64
 4   transcript_sentiment  25 non-null     float64
dtypes: float64(2), object(3)
memory usage: 1.1+ KB


In [None]:
df["video_title"].value_counts()

Unnamed: 0_level_0,count
video_title,Unnamed: 1_level_1
Doechii - Anxiety (Official Video),5
THE GASLIGHT DISTRICT: PILOT,5
Man. United vs. Lyon: Extended Highlights | UEL Quarter-final Leg 2 | CBS Sports Golazo - Europe,5
The Fantastic Four: First Steps | Official Trailer | Only in Theaters July 25,5
The Best Things In Life Are Unexpected... WE'RE PREGNANT!,5


## 💾 Save to CSV

In [None]:
df.to_csv("data/youtube_data.csv", index=False)

### 🔮 Suggested Combined Analysis for Master Notebook
**Once Reddit and YouTube data are merged:**

✅ Sentiment Comparison  
Compare Reddit vs. YouTube sentiment for the same topic.

Highlight divergence between video vs. comments sentiment.

✅ Engagement Analysis  
Plot word count vs. sentiment.  

Comment likes vs. sentiment score (for YouTube).  

✅ Timeline Tracking  
If timestamped, see how sentiment evolves over time across platforms.  

✅ Word Cloud or Topic Modeling  
Extract common themes using LDA or nltk.FreqDist.