# **Data Collection YouTube**

In [2]:
import pandas as pd
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import config

In [11]:
# API setup
api_key = config.key2
youtube = build('youtube', 'v3', developerKey=api_key)

In [4]:
def fetch_videos(query, max_results, days_back):
    all_videos = []
    end_date = datetime.utcnow()
    start_date = end_date - timedelta(days=days_back)

    while max_results > 0:
        published_before = end_date.isoformat("T") + "Z"
        published_after = start_date.isoformat("T") + "Z"

        search_response = youtube.search().list(
            q=query,
            part='snippet',
            maxResults=50,  # Max results per page allowed by API
            type='video',
            publishedBefore=published_before,
            publishedAfter=published_after
        ).execute()

        video_ids = [item['id']['videoId'] for item in search_response.get('items', [])]
        if video_ids:
            video_response = youtube.videos().list(
                part='snippet,statistics',
                id=','.join(video_ids)
            ).execute()

            for video in video_response.get('items', []):
                all_videos.append({
                    'Title': video['snippet']['title'],
                    'Channel': video['snippet']['channelTitle'],
                    'Published At': video['snippet']['publishedAt'],
                    'Video ID': video['id'],
                    'Likes': video['statistics'].get('likeCount', 'Not available'),
                    'Dislikes': video['statistics'].get('dislikeCount', 'Not available'),
                    'Comments': video['statistics'].get('commentCount', 'Not available')
                })

        max_results -= len(video_ids)
        end_date = start_date
        start_date -= timedelta(days=days_back)

        if not video_ids:
            break 

    return pd.DataFrame(all_videos)

## **Collect data  - Tea**

In [5]:
search_queries_tea1 = [
    "history of tea",
    "origin of tea drinking",
    "first tea plantation",
    "tea trade history",
    "ancient tea culture",
]

In [6]:
search_queries_tea2 = [
    "tea in ancient China",
    "introduction of tea to Europe",
    "British tea culture history",
    "tea and the silk road",
    "oldest tea varieties",
]

In [8]:
# Collect data for each query
final_data_tea1 = pd.DataFrame()
for query in search_queries_tea1:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_tea1 = pd.concat([final_data_tea1, data_df], ignore_index=True)

Fetching data for query: history of tea
Fetching data for query: origin of tea drinking
Fetching data for query: first tea plantation
Fetching data for query: tea trade history
Fetching data for query: ancient tea culture


In [9]:
final_data_tea1.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [12]:
# Collect data for each query
final_data_tea2 = pd.DataFrame()
for query in search_queries_tea2:
    print(f"Fetching data for query: {query}")
    data_df = fetch_videos(query, max_results=1000, days_back=730)
    final_data_tea2 = pd.concat([final_data_tea2, data_df], ignore_index=True)

Fetching data for query: tea in ancient China
Fetching data for query: introduction of tea to Europe
Fetching data for query: British tea culture history
Fetching data for query: tea and the silk road
Fetching data for query: oldest tea varieties


In [13]:
final_data_tea2.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,Traditional tea processing techniques and asso...,UNESCO,2022-11-29T15:36:01Z,Oe6kGFmFLnc,Not available,Not available,Not available
1,"Uncut Chinese Tea Ceremony - Wang Fu Teahouse,...",Jesse's Teahouse,2023-05-16T12:49:41Z,w8PXCOBg4EY,2613,Not available,82
2,The ancient ritual of the Chinese tea ceremony...,ABC News (Australia),2023-03-30T05:00:16Z,PWWOeA1UVfA,51,Not available,Not available
3,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
4,[ASMR] Traditional Chinese Tea Ceremony,PaprTape,2023-02-02T23:54:55Z,PIkvN5B1y98,1481,Not available,85
5,Ancient Chinese Tea Art: Exploring the Mysteri...,tenmoku glaze,2023-02-17T09:57:21Z,24v3yigcy24,19,Not available,2
6,Unveiling Ancient Chinese Tea Culture,Rewind History,2024-08-21T12:00:18Z,mrINYr3cGrk,0,Not available,0
7,What Did People Actually Eat In Ancient China?,Pages In History,2023-03-10T22:00:52Z,j4tcDm6m4xk,250,Not available,21
8,The Fascinating History of Tea: From Ancient C...,Easy2learn,2024-09-29T21:19:46Z,SgtMFk-7wt0,1,Not available,0
9,Exploring the Elegance of Ancient Chinese Tea ...,TheOneTimeTraveller,2023-12-28T13:15:02Z,ZrISiu4WqXM,64,Not available,0


Combine data frames

In [14]:
df_tea = pd.concat([final_data_tea1, final_data_tea2], ignore_index=True)
df_tea.head(10)

Unnamed: 0,Title,Channel,Published At,Video ID,Likes,Dislikes,Comments
0,The History Of Tea - Historia [Animated History],Mechanix Illustrated,2023-02-19T21:28:20Z,TX78tKMdJsA,287,Not available,27
1,Tea: How Britain Screwed India and China,Elliot Sang,2023-09-08T17:00:11Z,2-1obrIBYN0,5730,Not available,357
2,The History of Tea | The Origin Of Tea | Tea's...,KidsMathTV,2023-01-09T11:03:57Z,N6uzMv9XpKY,108,Not available,0
3,Robert Fortune on How Tea Was Stolen From the ...,SLICE Full Doc,2024-07-03T12:30:21Z,YbqtP-lH734,1552,Not available,192
4,How did tea become so popular in Great Britain...,Absolute History,2023-04-05T21:00:24Z,uhLl7YxQnjA,7291,Not available,85
5,A brief history of tea,Neom,2022-10-25T19:00:04Z,9wfj-E5s-ws,94,Not available,65
6,Commoner Queens of England,History Tea Time with Lindsay Holiday,2024-04-16T16:00:05Z,8LEoH6SSCwM,6196,Not available,556
7,"Anne, Queen of Great Britain & Ireland",History Tea Time with Lindsay Holiday,2024-09-03T16:00:41Z,jcNBX4UvHmk,3777,Not available,222
8,The History Of Afternoon Tea | Documentary,HistoricHospitality,2024-02-23T17:00:11Z,caFzG39lBmE,192,Not available,35
9,An Introduction to Tea in East Asia - ASMR His...,ASMR Historian,2024-02-28T07:00:10Z,TUvEMzi7r10,67,Not available,13


In [15]:
df_tea.shape

(4896, 7)

In [16]:
# Save the DataFrame to a CSV file
df_tea.to_csv('final_tea_data.csv', index=False)