# YouTube Data Extraction and Transcript Translation
This notebook extracts YouTube video metadata, fetches transcripts, and translates them to English.

In [None]:
from googleapiclient.discovery import build
import pandas as pd

API_KEY = "YOUR_API_KEY"  # Hear API Key is Hidden
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_video_details(video_ids):
    all_data = []
    for video_id in video_ids:
        request = youtube.videos().list(part="snippet,statistics", id=video_id)
        response = request.execute()
        for item in response["items"]:
            all_data.append({
                "Video_ID": video_id,
                "Title": item["snippet"]["title"],
                "Channel": item["snippet"]["channelTitle"],
                "Published": item["snippet"]["publishedAt"],
                "Views": item["statistics"].get("viewCount", 0),
                "Likes": item["statistics"].get("likeCount", 0),
                "Comments": item["statistics"].get("commentCount", 0)
            })
    return pd.DataFrame(all_data)


In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from deep_translator import GoogleTranslator

def get_transcripts(video_ids):
    transcripts = []
    for vid in video_ids:
        try:
            result = YouTubeTranscriptApi.get_transcript(vid)
            text = " ".join([r["text"] for r in result])
            translated = GoogleTranslator(source='auto', target='en').translate(text)
            transcripts.append({"Video_ID": vid, "Transcript": translated})
        except TranscriptsDisabled:
            transcripts.append({"Video_ID": vid, "Transcript": ""})
    return pd.DataFrame(transcripts)


In [None]:
# Example usage (replace with real video IDs)
video_ids = ['dQw4w9WgXcQ', 'nCgQDjiotG0']  # Example placeholders
video_df = get_video_details(video_ids)
trans_df = get_transcripts(video_ids)
final_df = pd.merge(video_df, trans_df, on='Video_ID', how='left')
final_df.to_excel("youtube_transcripts_translated.xlsx", index=False)
final_df.head()
