In [None]:
# --------------------------------------------
# 1. Install + Import Required Libraries
# --------------------------------------------
!pip install --quiet google-api-python-client textblob nltk

import os
import pandas as pd
import re
import nltk
from textblob import TextBlob
from googleapiclient.discovery import build
from nltk.corpus import stopwords
from collections import Counter

nltk.download('stopwords')
nltk.download('punkt')

# --------------------------------------------
# 2. Mount Google Drive & Load API Key
# --------------------------------------------
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

with open('/content/drive/MyDrive/Colab/DAI_AID/keys/google_key.txt') as f:
    api_key = f.read().strip()

# --------------------------------------------
# 3. Search & Collect Comments
# --------------------------------------------
youtube = build('youtube', 'v3', developerKey=api_key)
search_term = "Sennheiser Momentum 4 Wireless review"
max_results = 5

search_response = youtube.search().list(
    q=search_term,
    part='id,snippet',
    type='video',
    maxResults=max_results
).execute()

videos = [(item['id']['videoId'], item['snippet']['title']) for item in search_response['items']]

all_data = []
for video_id, title in videos:
    print(f"Fetching comments for: {title}")
    comments = []
    try:
        response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            maxResults=100,
            textFormat='plainText'
        ).execute()
        while response:
            for item in response['items']:
                comment = item['snippet']['topLevelComment']['snippet']
                comments.append({
                    "video_id": video_id,
                    "video_title": title,
                    "comment": comment['textDisplay'],
                    "likes": comment['likeCount']
                })
            if 'nextPageToken' in response:
                response = youtube.commentThreads().list(
                    part='snippet',
                    videoId=video_id,
                    pageToken=response['nextPageToken'],
                    maxResults=100,
                    textFormat='plainText'
                ).execute()
            else:
                break
    except Exception as e:
        print(f"Error: {e}")
    all_data.extend(comments)

df = pd.DataFrame(all_data)

# --------------------------------------------
# 4. Clean & Process Comments (Preserve Expression)
# --------------------------------------------
import re

def preserve_meaningful_clean(text):
    text = str(text)
    text = re.sub(r"http\S+", "", text)         # remove links
    text = re.sub(r"\s+", " ", text)            # normalize whitespace
    text = re.sub(r"\[removed\]", "", text)     # remove [removed] comments
    text = re.sub(r"\[deleted\]", "", text)     # remove [deleted] comments
    return text.strip()

df['cleaned'] = df['comment'].apply(preserve_meaningful_clean)

# --------------------------------------------
# 5. Sentiment Score + Weight
# --------------------------------------------
def get_sentiment_score(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

df['sentiment'] = df['cleaned'].apply(get_sentiment_score)
df['length'] = df['cleaned'].apply(lambda x: len(x.split()))
df['weight'] = df['sentiment'] * 0.5 + (df['likes'] / (df['likes'].max()+1)) * 0.3 + (df['length'] / (df['length'].max()+1)) * 0.2

# --------------------------------------------
# 6. Save to Drive
# --------------------------------------------
output_dir = "/content/drive/MyDrive/Colab/DAI_AID/extracted_data/youtube_reviews"
os.makedirs(output_dir, exist_ok=True)

output_path = os.path.join(output_dir, "sennheiser_momentum4_youtube_comments.csv")
df.to_csv(output_path, index=False)
print(f"Saved {len(df)} comments to {output_path}")


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Mounted at /content/drive
Fetching comments for: Sennheiser Momentum 4 Wireless Review | vs. Sony XM5
Fetching comments for: Sennheiser Momentum 4 Wireless Review | LIFE IN SPEED
Fetching comments for: Sennheiser Momentum 4 Vs Sony 1000XM5 Vs Sony 1000XM4
Fetching comments for: Flash Versus - Bose QC Ultra vs Sennheiser Momentum 4
Fetching comments for: Hands On Review - Sennheiser Momentum 4 Over-Ear Headphones
Saved 809 comments to /content/drive/MyDrive/Colab/DAI_AID/extracted_data/youtube_reviews/sennheiser_momentum4_youtube_comments.csv
