# **Data Collection YouTube**

In [2]:
import pandas as pd
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import config

In [22]:
# API setup
api_key = config.key4
youtube = build('youtube', 'v3', developerKey=api_key)

In [4]:
def fetch_videos(query, max_results, days_back):
    all_videos = []
    end_date = datetime.utcnow()
    start_date = end_date - timedelta(days=days_back)

    while max_results > 0:
        published_before = end_date.isoformat("T") + "Z"
        published_after = start_date.isoformat("T") + "Z"

        search_response = youtube.search().list(
            q=query,
            part='snippet',
            maxResults=50,  # Max results per page allowed by API
            type='video',
            publishedBefore=published_before,
            publishedAfter=published_after
        ).execute()

        video_ids = [item['id']['videoId'] for item in search_response.get('items', [])]
        if video_ids:
            video_response = youtube.videos().list(
                part='snippet,statistics',
                id=','.join(video_ids)
            ).execute()

            for video in video_response.get('items', []):
                all_videos.append({
                    'Title': video['snippet']['title'],
                    'Channel': video['snippet']['channelTitle'],
                    'Published At': video['snippet']['publishedAt'],
                    'Video ID': video['id'],
                    'Likes': video['statistics'].get('likeCount', 'Not available'),
                    'Dislikes': video['statistics'].get('dislikeCount', 'Not available'),
                    'Comments': video['statistics'].get('commentCount', 'Not available')
                })

        max_results -= len(video_ids)
        end_date = start_date
        start_date -= timedelta(days=days_back)

        if not video_ids:
            break 

    return pd.DataFrame(all_videos)

## **Collect data  - Tea**

In [5]:
search_queries_tea1 = [
    "history of tea",
    "origin of tea drinking",
    "first tea plantation",
    "tea trade history",
    "ancient tea culture",
]

In [6]:
search_queries_tea2 = [
    "tea in ancient China",
    "introduction of tea to Europe",
    "British tea culture history",
    "tea and the silk road",
    "oldest tea varieties",
]

In [8]:
# Collect data for each query
final_data_tea1 = pd.DataFrame()
for query in search_queries_tea1:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_tea1 = pd.concat([final_data_tea1, data_df], ignore_index=True)

Fetching data for query: history of tea
Fetching data for query: origin of tea drinking
Fetching data for query: first tea plantation
Fetching data for query: tea trade history
Fetching data for query: ancient tea culture


In [9]:
final_data_tea1.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [12]:
# Collect data for each query
final_data_tea2 = pd.DataFrame()
for query in search_queries_tea2:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_tea2 = pd.concat([final_data_tea2, data_df], ignore_index=True)

Fetching data for query: tea in ancient China
Fetching data for query: introduction of tea to Europe
Fetching data for query: British tea culture history
Fetching data for query: tea and the silk road
Fetching data for query: oldest tea varieties


In [13]:
final_data_tea2.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,Traditional tea processing techniques and asso...,UNESCO,2022-11-29T15:36:01Z,Oe6kGFmFLnc,Not available,Not available,Not available
1,"Uncut Chinese Tea Ceremony - Wang Fu Teahouse,...",Jesse's Teahouse,2023-05-16T12:49:41Z,w8PXCOBg4EY,2613,Not available,82
2,The ancient ritual of the Chinese tea ceremony...,ABC News (Australia),2023-03-30T05:00:16Z,PWWOeA1UVfA,51,Not available,Not available
3,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
4,[ASMR] Traditional Chinese Tea Ceremony,PaprTape,2023-02-02T23:54:55Z,PIkvN5B1y98,1481,Not available,85
5,Ancient Chinese Tea Art: Exploring the Mysteri...,tenmoku glaze,2023-02-17T09:57:21Z,24v3yigcy24,19,Not available,2
6,Unveiling Ancient Chinese Tea Culture,Rewind History,2024-08-21T12:00:18Z,mrINYr3cGrk,0,Not available,0
7,What Did People Actually Eat In Ancient China?,Pages In History,2023-03-10T22:00:52Z,j4tcDm6m4xk,250,Not available,21
8,The Fascinating History of Tea: From Ancient C...,Easy2learn,2024-09-29T21:19:46Z,SgtMFk-7wt0,1,Not available,0
9,Exploring the Elegance of Ancient Chinese Tea ...,TheOneTimeTraveller,2023-12-28T13:15:02Z,ZrISiu4WqXM,64,Not available,0


Combine data frames

In [14]:
df_tea = pd.concat([final_data_tea1, final_data_tea2], ignore_index=True)
df_tea.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [15]:
df_tea.shape

(4896, 7)

In [26]:
# Save the DataFrame to a CSV file
df_tea.to_csv('data_tea.csv', index=False)

## **Collect data  - Coffee**

In [17]:
search_queries_coffee1 = [
    "history of coffee",
    "origin of coffee drinking",
    "first coffee house",
    "coffee trade history",
    "coffee in Ethiopia history"
]

In [18]:
search_queries_coffee2 = [
    "introduction of coffee to Europe",
    "coffee in the Ottoman Empire",
    "spread of coffee culture",
    "first coffee plantation",
    "historical impact of coffee",
]

In [20]:
# Collect data for each query
final_data_coffee1 = pd.DataFrame()
for query in search_queries_coffee1:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_coffee1 = pd.concat([final_data_coffee1, data_df], ignore_index=True)

Fetching data for query: history of coffee
Fetching data for query: origin of coffee drinking
Fetching data for query: first coffee house
Fetching data for query: coffee trade history
Fetching data for query: coffee in Ethiopia history


In [21]:
final_data_coffee1.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,How humanity got hooked on coffee - Jonathan M...,TED-Ed,2024-03-05T16:01:19Z,xQ156y4TtJs,33183,Not available,560
1,Caffeine Chronicles: The Transformative Journe...,Get.factual,2023-12-14T16:00:30Z,YaTjjYBxAw8,1693,Not available,107
2,When Coffee was Illegal,Tasting History with Max Miller,2022-12-02T16:00:21Z,qvOTev_r4-8,30140,Not available,1655
3,Coffee and what it does to your body - BBC Wor...,BBC World Service,2024-05-16T12:00:06Z,62wEk02YKs0,30887,Not available,1892
4,The History of Coffee || Dr Roy Casagranda,Age of Empires,2024-10-06T17:47:01Z,3k2g-W2UDEg,10990,Not available,433
5,Story of the Origin of Coffee: The Legend of K...,Secrets of History,2023-03-04T02:45:45Z,Q5GtZRjhSxs,21,Not available,1
6,The History of: Coffee | The History Of,The History Of,2024-08-24T20:50:14Z,08Unj_j6x_g,4,Not available,3
7,Why Single-Origin Coffee Is So Expensive | So ...,Business Insider,2023-02-28T16:00:08Z,Dmpnrtey3YU,25607,Not available,1018
8,How to Brew Civil War Coffee with Sweet Potatoes,Tasting History with Max Miller,2023-06-27T15:00:21Z,ZfOanv9oUv4,42346,Not available,3244
9,Chocolate Instead of Coffee in Early America,Townsends,2023-04-10T18:50:03Z,LRPHNOqABlA,26247,Not available,1005


In [23]:
# Collect data for each query
final_data_coffee2 = pd.DataFrame()
for query in search_queries_coffee2:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_coffee2 = pd.concat([final_data_coffee2, data_df], ignore_index=True)

Fetching data for query: introduction of coffee to Europe
Fetching data for query: coffee in the Ottoman Empire
Fetching data for query: spread of coffee culture
Fetching data for query: first coffee plantation
Fetching data for query: historical impact of coffee


In [24]:
final_data_coffee2.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The Origin of Coffee | HOW did COFFEE come to ...,8Grams - Italian Coffee Specialists,2023-09-03T15:55:52Z,n7t9axFrm8I,13,Not available,1
1,How humanity got hooked on coffee - Jonathan M...,TED-Ed,2024-03-05T16:01:19Z,xQ156y4TtJs,33183,Not available,560
2,How Turkey Brought Coffee to Europe,Sketchy History,2023-01-18T01:51:20Z,sf2Z14f1uiQ,24,Not available,1
3,Turk introduce coffee to Europe,Property Turkey,2023-01-13T15:27:11Z,yN5Me6dVPfc,18,Not available,0
4,The medieval history of coffee,Xeritern Perspectives,2023-12-28T06:30:07Z,D9tHLE8jAsE,57,Not available,0
5,How Coffee Spread to Europe,Lizpresso,2023-10-04T09:06:32Z,Ume_eeu73iY,1,Not available,0
6,The Shocking Story of Coffee’s Arrival in Europe!,Morning Roar,2024-08-28T13:17:31Z,TD6MEZn5QXc,9,Not available,0
7,How Coffee Came to Europe,Interesting World,2024-08-08T11:00:21Z,The-I3QbSCk,1,Not available,0
8,Coffee's European Journey: From Controversy to...,Canned History,2024-05-04T20:00:16Z,yrORfXb4k2c,80,Not available,1
9,The History of Coffee: From Goatherd's Discove...,Time Tales,2024-03-19T04:18:27Z,cQ79xb7dMXQ,5,Not available,1


Combine data frames

In [27]:
df_coffee = pd.concat([final_data_coffee1, final_data_coffee2], ignore_index=True)
df_coffee.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,How humanity got hooked on coffee - Jonathan M...,TED-Ed,2024-03-05T16:01:19Z,xQ156y4TtJs,33183,Not available,560
1,Caffeine Chronicles: The Transformative Journe...,Get.factual,2023-12-14T16:00:30Z,YaTjjYBxAw8,1693,Not available,107
2,When Coffee was Illegal,Tasting History with Max Miller,2022-12-02T16:00:21Z,qvOTev_r4-8,30140,Not available,1655
3,Coffee and what it does to your body - BBC Wor...,BBC World Service,2024-05-16T12:00:06Z,62wEk02YKs0,30887,Not available,1892
4,The History of Coffee || Dr Roy Casagranda,Age of Empires,2024-10-06T17:47:01Z,3k2g-W2UDEg,10990,Not available,433
5,Story of the Origin of Coffee: The Legend of K...,Secrets of History,2023-03-04T02:45:45Z,Q5GtZRjhSxs,21,Not available,1
6,The History of: Coffee | The History Of,The History Of,2024-08-24T20:50:14Z,08Unj_j6x_g,4,Not available,3
7,Why Single-Origin Coffee Is So Expensive | So ...,Business Insider,2023-02-28T16:00:08Z,Dmpnrtey3YU,25607,Not available,1018
8,How to Brew Civil War Coffee with Sweet Potatoes,Tasting History with Max Miller,2023-06-27T15:00:21Z,ZfOanv9oUv4,42346,Not available,3244
9,Chocolate Instead of Coffee in Early America,Townsends,2023-04-10T18:50:03Z,LRPHNOqABlA,26247,Not available,1005


In [28]:
df_coffee.shape

(4783, 7)

In [29]:
# Save the DataFrame to a CSV file
df_coffee.to_csv('data_coffee.csv', index=False)