# **Data Collection YouTube**

In [2]:
import pandas as pd
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import config

In [35]:
# API setup
api_key = config.key6
youtube = build('youtube', 'v3', developerKey=api_key)

In [4]:
def fetch_videos(query, max_results, days_back):
    all_videos = []
    end_date = datetime.utcnow()
    start_date = end_date - timedelta(days=days_back)

    while max_results > 0:
        published_before = end_date.isoformat("T") + "Z"
        published_after = start_date.isoformat("T") + "Z"

        search_response = youtube.search().list(
            q=query,
            part='snippet',
            maxResults=50,  # Max results per page allowed by API
            type='video',
            publishedBefore=published_before,
            publishedAfter=published_after
        ).execute()

        video_ids = [item['id']['videoId'] for item in search_response.get('items', [])]
        if video_ids:
            video_response = youtube.videos().list(
                part='snippet,statistics',
                id=','.join(video_ids)
            ).execute()

            for video in video_response.get('items', []):
                all_videos.append({
                    'Title': video['snippet']['title'],
                    'Channel': video['snippet']['channelTitle'],
                    'Published At': video['snippet']['publishedAt'],
                    'Video ID': video['id'],
                    'Likes': video['statistics'].get('likeCount', 'Not available'),
                    'Dislikes': video['statistics'].get('dislikeCount', 'Not available'),
                    'Comments': video['statistics'].get('commentCount', 'Not available')
                })

        max_results -= len(video_ids)
        end_date = start_date
        start_date -= timedelta(days=days_back)

        if not video_ids:
            break 

    return pd.DataFrame(all_videos)

## **Collect data  - Tea**

In [5]:
search_queries_tea1 = [
    "history of tea",
    "origin of tea drinking",
    "first tea plantation",
    "tea trade history",
    "ancient tea culture",
]

In [6]:
search_queries_tea2 = [
    "tea in ancient China",
    "introduction of tea to Europe",
    "British tea culture history",
    "tea and the silk road",
    "oldest tea varieties",
]

In [8]:
# Collect data for each query
final_data_tea1 = pd.DataFrame()
for query in search_queries_tea1:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_tea1 = pd.concat([final_data_tea1, data_df], ignore_index=True)

Fetching data for query: history of tea
Fetching data for query: origin of tea drinking
Fetching data for query: first tea plantation
Fetching data for query: tea trade history
Fetching data for query: ancient tea culture


In [9]:
final_data_tea1.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [12]:
# Collect data for each query
final_data_tea2 = pd.DataFrame()
for query in search_queries_tea2:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_tea2 = pd.concat([final_data_tea2, data_df], ignore_index=True)

Fetching data for query: tea in ancient China
Fetching data for query: introduction of tea to Europe
Fetching data for query: British tea culture history
Fetching data for query: tea and the silk road
Fetching data for query: oldest tea varieties


In [13]:
final_data_tea2.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,Traditional tea processing techniques and asso...,UNESCO,2022-11-29T15:36:01Z,Oe6kGFmFLnc,Not available,Not available,Not available
1,"Uncut Chinese Tea Ceremony - Wang Fu Teahouse,...",Jesse's Teahouse,2023-05-16T12:49:41Z,w8PXCOBg4EY,2613,Not available,82
2,The ancient ritual of the Chinese tea ceremony...,ABC News (Australia),2023-03-30T05:00:16Z,PWWOeA1UVfA,51,Not available,Not available
3,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
4,[ASMR] Traditional Chinese Tea Ceremony,PaprTape,2023-02-02T23:54:55Z,PIkvN5B1y98,1481,Not available,85
5,Ancient Chinese Tea Art: Exploring the Mysteri...,tenmoku glaze,2023-02-17T09:57:21Z,24v3yigcy24,19,Not available,2
6,Unveiling Ancient Chinese Tea Culture,Rewind History,2024-08-21T12:00:18Z,mrINYr3cGrk,0,Not available,0
7,What Did People Actually Eat In Ancient China?,Pages In History,2023-03-10T22:00:52Z,j4tcDm6m4xk,250,Not available,21
8,The Fascinating History of Tea: From Ancient C...,Easy2learn,2024-09-29T21:19:46Z,SgtMFk-7wt0,1,Not available,0
9,Exploring the Elegance of Ancient Chinese Tea ...,TheOneTimeTraveller,2023-12-28T13:15:02Z,ZrISiu4WqXM,64,Not available,0


Combine data frames

In [14]:
df_tea = pd.concat([final_data_tea1, final_data_tea2], ignore_index=True)
df_tea.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [15]:
df_tea.shape

(4896, 7)

In [26]:
# Save the DataFrame to a CSV file
df_tea.to_csv('data_tea.csv', index=False)

## **Collect data  - Coffee**

In [17]:
search_queries_coffee1 = [
    "history of coffee",
    "origin of coffee drinking",
    "first coffee house",
    "coffee trade history",
    "coffee in Ethiopia history"
]

In [18]:
search_queries_coffee2 = [
    "introduction of coffee to Europe",
    "coffee in the Ottoman Empire",
    "spread of coffee culture",
    "first coffee plantation",
    "historical impact of coffee",
]

In [20]:
# Collect data for each query
final_data_coffee1 = pd.DataFrame()
for query in search_queries_coffee1:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_coffee1 = pd.concat([final_data_coffee1, data_df], ignore_index=True)

Fetching data for query: history of coffee
Fetching data for query: origin of coffee drinking
Fetching data for query: first coffee house
Fetching data for query: coffee trade history
Fetching data for query: coffee in Ethiopia history


In [21]:
final_data_coffee1.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,How humanity got hooked on coffee - Jonathan M...,TED-Ed,2024-03-05T16:01:19Z,xQ156y4TtJs,33183,Not available,560
1,Caffeine Chronicles: The Transformative Journe...,Get.factual,2023-12-14T16:00:30Z,YaTjjYBxAw8,1693,Not available,107
2,When Coffee was Illegal,Tasting History with Max Miller,2022-12-02T16:00:21Z,qvOTev_r4-8,30140,Not available,1655
3,Coffee and what it does to your body - BBC Wor...,BBC World Service,2024-05-16T12:00:06Z,62wEk02YKs0,30887,Not available,1892
4,The History of Coffee || Dr Roy Casagranda,Age of Empires,2024-10-06T17:47:01Z,3k2g-W2UDEg,10990,Not available,433
5,Story of the Origin of Coffee: The Legend of K...,Secrets of History,2023-03-04T02:45:45Z,Q5GtZRjhSxs,21,Not available,1
6,The History of: Coffee | The History Of,The History Of,2024-08-24T20:50:14Z,08Unj_j6x_g,4,Not available,3
7,Why Single-Origin Coffee Is So Expensive | So ...,Business Insider,2023-02-28T16:00:08Z,Dmpnrtey3YU,25607,Not available,1018
8,How to Brew Civil War Coffee with Sweet Potatoes,Tasting History with Max Miller,2023-06-27T15:00:21Z,ZfOanv9oUv4,42346,Not available,3244
9,Chocolate Instead of Coffee in Early America,Townsends,2023-04-10T18:50:03Z,LRPHNOqABlA,26247,Not available,1005


In [23]:
# Collect data for each query
final_data_coffee2 = pd.DataFrame()
for query in search_queries_coffee2:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_coffee2 = pd.concat([final_data_coffee2, data_df], ignore_index=True)

Fetching data for query: introduction of coffee to Europe
Fetching data for query: coffee in the Ottoman Empire
Fetching data for query: spread of coffee culture
Fetching data for query: first coffee plantation
Fetching data for query: historical impact of coffee


In [24]:
final_data_coffee2.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The Origin of Coffee | HOW did COFFEE come to ...,8Grams - Italian Coffee Specialists,2023-09-03T15:55:52Z,n7t9axFrm8I,13,Not available,1
1,How humanity got hooked on coffee - Jonathan M...,TED-Ed,2024-03-05T16:01:19Z,xQ156y4TtJs,33183,Not available,560
2,How Turkey Brought Coffee to Europe,Sketchy History,2023-01-18T01:51:20Z,sf2Z14f1uiQ,24,Not available,1
3,Turk introduce coffee to Europe,Property Turkey,2023-01-13T15:27:11Z,yN5Me6dVPfc,18,Not available,0
4,The medieval history of coffee,Xeritern Perspectives,2023-12-28T06:30:07Z,D9tHLE8jAsE,57,Not available,0
5,How Coffee Spread to Europe,Lizpresso,2023-10-04T09:06:32Z,Ume_eeu73iY,1,Not available,0
6,The Shocking Story of Coffee’s Arrival in Europe!,Morning Roar,2024-08-28T13:17:31Z,TD6MEZn5QXc,9,Not available,0
7,How Coffee Came to Europe,Interesting World,2024-08-08T11:00:21Z,The-I3QbSCk,1,Not available,0
8,Coffee's European Journey: From Controversy to...,Canned History,2024-05-04T20:00:16Z,yrORfXb4k2c,80,Not available,1
9,The History of Coffee: From Goatherd's Discove...,Time Tales,2024-03-19T04:18:27Z,cQ79xb7dMXQ,5,Not available,1


Combine data frames

In [27]:
df_coffee = pd.concat([final_data_coffee1, final_data_coffee2], ignore_index=True)
df_coffee.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,How humanity got hooked on coffee - Jonathan M...,TED-Ed,2024-03-05T16:01:19Z,xQ156y4TtJs,33183,Not available,560
1,Caffeine Chronicles: The Transformative Journe...,Get.factual,2023-12-14T16:00:30Z,YaTjjYBxAw8,1693,Not available,107
2,When Coffee was Illegal,Tasting History with Max Miller,2022-12-02T16:00:21Z,qvOTev_r4-8,30140,Not available,1655
3,Coffee and what it does to your body - BBC Wor...,BBC World Service,2024-05-16T12:00:06Z,62wEk02YKs0,30887,Not available,1892
4,The History of Coffee || Dr Roy Casagranda,Age of Empires,2024-10-06T17:47:01Z,3k2g-W2UDEg,10990,Not available,433
5,Story of the Origin of Coffee: The Legend of K...,Secrets of History,2023-03-04T02:45:45Z,Q5GtZRjhSxs,21,Not available,1
6,The History of: Coffee | The History Of,The History Of,2024-08-24T20:50:14Z,08Unj_j6x_g,4,Not available,3
7,Why Single-Origin Coffee Is So Expensive | So ...,Business Insider,2023-02-28T16:00:08Z,Dmpnrtey3YU,25607,Not available,1018
8,How to Brew Civil War Coffee with Sweet Potatoes,Tasting History with Max Miller,2023-06-27T15:00:21Z,ZfOanv9oUv4,42346,Not available,3244
9,Chocolate Instead of Coffee in Early America,Townsends,2023-04-10T18:50:03Z,LRPHNOqABlA,26247,Not available,1005


In [28]:
df_coffee.shape

(4783, 7)

In [29]:
# Save the DataFrame to a CSV file
df_coffee.to_csv('data_coffee.csv', index=False)

## **Collect data  - Tea vs. Coffee**

In [30]:
search_queries_general1 = [
    
    "tea vs coffee history",
    "cultural impact of tea and coffee",
    "economic history of tea and coffee",
    "tea and coffee in colonial times",
    "how tea and coffee changed the world",
]


In [31]:
search_queries_general2 = [
    "tea and coffee rituals around the world",
    "historical tea and coffee ceremonies",
    "evolution of tea and coffee consumption",
    "tea and coffee in literature",
    "famous historical figures and their tea/coffee habits"
]

In [33]:
# Collect data for each query
final_data_general1 = pd.DataFrame()
for query in search_queries_general1:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_general1 = pd.concat([final_data_general1, data_df], ignore_index=True)

Fetching data for query: tea vs coffee history
Fetching data for query: cultural impact of tea and coffee
Fetching data for query: economic history of tea and coffee
Fetching data for query: tea and coffee in colonial times
Fetching data for query: how tea and coffee changed the world


In [34]:
final_data_general1.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The Epic Battle: Tea vs. Coffee!,Shrubby,2024-08-08T15:51:42Z,xEFMH2iFCdY,14,Not available,5
1,Coffee vs Tea: Which is Healthier? The Answer ...,CBN News,2022-11-02T20:00:00Z,25hV1udOhKo,866,Not available,78
2,WHAT DO EUROPEANS LIKE MORE TEA OR COFFEE?,Macko's Maps,2023-10-06T14:00:39Z,pj0VHEWSOOQ,23,Not available,0
3,Tea or coffee - what do you prefer?,CGTN Europe,2023-05-25T17:15:38Z,Av7alcwXEHs,11,Not available,0
4,🍵☕TEA or COFFEE of #countryballs ?,Rukavov,2024-02-10T17:46:03Z,sLFA0RUYfxk,5378,Not available,399
5,Tea or coffee? #map,World Map,2024-01-20T10:39:14Z,XFxTJTW28PY,206,Not available,30
6,COFFEE VS. TEA!,Lizpresso,2023-10-04T09:14:40Z,EB0HyWlXzIc,31,Not available,1
7,Does Your Country Prefer Tea or Coffee,Xenelock Geography,2023-10-19T11:00:14Z,DkvYsOaTvAU,Not available,Not available,29
8,Experience the REAL Beauty of Shalimar Gardens...,Celtic Mick Walks,2024-10-12T10:45:02Z,eHfXDJR4adA,56,Not available,18
9,Coffee or Tea? #shorts #fyp,Venus Mapping,2023-09-07T10:49:00Z,nUurJ7Ih6WU,873,Not available,250


In [36]:
# Collect data for each query
final_data_general2 = pd.DataFrame()
for query in search_queries_general2:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_general2 = pd.concat([final_data_general2, data_df], ignore_index=True)

Fetching data for query: tea and coffee rituals around the world
Fetching data for query: historical tea and coffee ceremonies
Fetching data for query: evolution of tea and coffee consumption
Fetching data for query: tea and coffee in literature
Fetching data for query: famous historical figures and their tea/coffee habits


In [37]:
final_data_general2.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,"Global Sips: tea, coffee or hot chocolate Disc...",Travelacy,2024-03-27T20:29:49Z,CDTBXRjAOoc,18,Not available,2
1,How Tea and Coffee Shaped the Modern World,Tea & Coffee Explorers,2024-08-21T19:10:47Z,DozQxnEf-ok,8,Not available,0
2,Discover Kyoto's Art of Traditional Matcha Tea...,SocialTravels,2024-10-02T18:33:27Z,U17xSsAuGrE,0,Not available,0
3,International Coffee Rituals #coffee #rituals ...,Brewhilda.com,2023-12-11T05:44:52Z,BrSLxIOM6dw,3,Not available,0
4,Coffee Rituals,Phinix Coffee Cafe,2024-06-15T11:31:36Z,f1Mh8RypZCQ,365,Not available,0
5,The Enigmatic Traditions of the Japanese Tea C...,Coffee Minute,2023-06-05T16:23:00Z,-k6ZJ1iV1rQ,4,Not available,0
6,Coffees Mesmerizing Journey From Mystical Ritu...,Let's Talk Religion,2023-11-13T13:00:56Z,vlyGWgpg6uk,513,Not available,22
7,Uncovering the Mind Blowing Secrets of the Jap...,Coffee Minute,2023-06-05T16:20:00Z,a_ZZQrBIXh0,0,Not available,0
8,Life in AZERBAIJAN: The PARADISE and CHEAP Cou...,Global Life Discoveries,2024-10-11T11:30:34Z,nt7KIZkyud8,98,Not available,17
9,Experience tea ceremony at a traditional Korea...,아니리아띠 Aniriatti & Loving Gugak,2023-07-26T15:57:18Z,BnGdzescbVU,29,Not available,1


Combine data frames

In [38]:
df_general = pd.concat([final_data_general1, final_data_general2], ignore_index=True)
df_general.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The Epic Battle: Tea vs. Coffee!,Shrubby,2024-08-08T15:51:42Z,xEFMH2iFCdY,14,Not available,5
1,Coffee vs Tea: Which is Healthier? The Answer ...,CBN News,2022-11-02T20:00:00Z,25hV1udOhKo,866,Not available,78
2,WHAT DO EUROPEANS LIKE MORE TEA OR COFFEE?,Macko's Maps,2023-10-06T14:00:39Z,pj0VHEWSOOQ,23,Not available,0
3,Tea or coffee - what do you prefer?,CGTN Europe,2023-05-25T17:15:38Z,Av7alcwXEHs,11,Not available,0
4,🍵☕TEA or COFFEE of #countryballs ?,Rukavov,2024-02-10T17:46:03Z,sLFA0RUYfxk,5378,Not available,399
5,Tea or coffee? #map,World Map,2024-01-20T10:39:14Z,XFxTJTW28PY,206,Not available,30
6,COFFEE VS. TEA!,Lizpresso,2023-10-04T09:14:40Z,EB0HyWlXzIc,31,Not available,1
7,Does Your Country Prefer Tea or Coffee,Xenelock Geography,2023-10-19T11:00:14Z,DkvYsOaTvAU,Not available,Not available,29
8,Experience the REAL Beauty of Shalimar Gardens...,Celtic Mick Walks,2024-10-12T10:45:02Z,eHfXDJR4adA,56,Not available,18
9,Coffee or Tea? #shorts #fyp,Venus Mapping,2023-09-07T10:49:00Z,nUurJ7Ih6WU,873,Not available,250


In [39]:
df_general.shape

(4629, 7)

In [40]:
# Save the DataFrame to a CSV file
df_general.to_csv('data_tea_vs_coffee.csv', index=False)

### **Combine all data frames**

In [41]:
df_final = pd.concat([df_tea, df_coffee, df_general], ignore_index=True)
df_final.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [42]:
df_final.shape

(14308, 7)

In [43]:
# Save the DataFrame to a CSV file
df_final.to_csv('data_combined.csv', index=False)

#### **Get comments**

In [47]:
def initialize_youtube_client(api_key):
    return build('youtube', 'v3', developerKey=api_key)

In [48]:
def get_comments(youtube, video_id):
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100 
    )
    response = request.execute()

    while response:
        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            text = comment['textOriginal']
            author = comment['authorDisplayName']
            comments.append({'video_id': video_id, 'author': author, 'comment': text})

        if 'nextPageToken' in response:
            response = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                pageToken=response['nextPageToken']
            ).execute()
        else:
            break

    return comments

In [49]:
def fetch_comments_batch(api_key, df, batch_size=10):
    youtube = initialize_youtube_client(api_key)
    all_comments = pd.DataFrame(columns=['video_id', 'author', 'comment'])

    for i in range(0, len(df), batch_size):
        batch_video_ids = df['Video ID'][i:i + batch_size]
        for video_id in batch_video_ids:
            print(f"Fetching comments for video ID: {video_id}")
            video_comments = get_comments(youtube, video_id)
            video_comments_df = pd.DataFrame(video_comments)
            all_comments = pd.concat([all_comments, video_comments_df], ignore_index=True)

    return all_comments