In [None]:
!pip install --upgrade google-api-python-client
!pip install isodate




In [None]:
from googleapiclient.discovery import build
import pandas as pd
from time import sleep
from isodate import parse_duration


In [None]:
API_KEY = '
youtube = build('youtube', 'v3', developerKey=API_KEY)
channel_id = 'UCAuUUnT6oDeKwE6v1NGQxug'  # TED Channel


In [None]:
#  Step 4: Get categoryId → category name mapping
categories_res = youtube.videoCategories().list(
    part="snippet",
    regionCode="US"
).execute()

category_map = {}
for item in categories_res['items']:
    category_id = item['id']
    category_title = item['snippet']['title']
    category_map[category_id] = category_title


In [None]:
channel_res = youtube.channels().list(
    part='contentDetails',
    id=channel_id
).execute()

uploads_playlist_id = channel_res['items'][0]['contentDetails']['relatedPlaylists']['uploads']


In [None]:
video_ids = []
next_page_token = None

while True:
    playlist_res = youtube.playlistItems().list(
        part='snippet',
        playlistId=uploads_playlist_id,
        maxResults=50,
        pageToken=next_page_token
    ).execute()

    for item in playlist_res['items']:
        video_ids.append(item['snippet']['resourceId']['videoId'])

    next_page_token = playlist_res.get('nextPageToken')
    if not next_page_token:
        break

    sleep(1)


In [None]:
all_data = []

for i in range(0, len(video_ids), 50):
    response = youtube.videos().list(
        part='snippet,statistics,contentDetails',
        id=','.join(video_ids[i:i+50])
    ).execute()

    for item in response['items']:
        snippet = item['snippet']
        stats = item.get('statistics', {})
        details = item.get('contentDetails', {})

        all_data.append({
            'Title': snippet['title'],
            'Published At': snippet['publishedAt'],
            'Video ID': item['id'],
            'Video URL': f"https://www.youtube.com/watch?v={item['id']}",
            'Views': int(stats.get('viewCount', 0)),
            'Likes': int(stats.get('likeCount', 0)),
            'Comments': int(stats.get('commentCount', 0)),
            'Duration': details.get('duration', ''),
            'Category': category_map.get(snippet.get('categoryId', ''), 'Unknown'),
            'Tags': ', '.join(snippet.get('tags', [])) if 'tags' in snippet else '',
            'Description': snippet.get('description', '')
        })

    sleep(1)


In [None]:
# 📌 Step 8: Convert to DataFrame and format duration
df = pd.DataFrame(all_data)

# Convert ISO 8601 duration to readable format
def format_duration(iso_duration):
    duration = parse_duration(iso_duration)
    total_seconds = int(duration.total_seconds())
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    seconds = total_seconds % 60

    if hours > 0:
        return f"{hours}:{minutes:02}:{seconds:02}"
    else:
        return f"{minutes}:{seconds:02}"

df['Readable Duration'] = df['Duration'].apply(format_duration)


In [None]:
df.to_csv('ted_full_data.csv', index=False)

from google.colab import files
files.download('ted_full_data.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>