In [1]:
pip install google-api-python-client 

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [None]:
from googleapiclient.discovery import build
from dotenv import load_dotenv
import os
import pandas as pd


#access the data from the dotenv file
load_dotenv()

# setting up the youtube API key to access the channels
API_KEY = os.getenv("API_KEY")
youtube = build('youtube', 'v3', developerKey=API_KEY)
# this creates a connection to youtube API v3


In [113]:
# List of channel id which will be used to get the data from the corresponding channel

channel_ids = [" UCq-Fj5jknLsUf-MWSy4_brA", #Tseris
               "UCupvZG-5ko_eiXAupbDfxWw", #Cnn
               "UCvC4D8onUfXzvjTOM-dBfEA", #Marvel
               "UCMiJRAwDNSNzuYeN2uWa0pA", #Techchannel
               "UCYPvAwZP8pZhSMW8qs7cVCw", #India Today
               "UCb-xXZ7ltTvrh9C6DgB9H-Q", #Prasd Tech in telugu
               "UCBi2mrWuNuyYy4gbM6fU18Q", #ABC NEws
               "UCnQC_G5Xsjhp9fEJKuIcrSw", #Benshapiro
               "UCXuqSBlHAE6Xw-yeJA0Tunw", #LinusTech
               "UCsTcErHg8oDvUnTzoqsYeNw", #Unboxtheropy
               "UCOmcA3f_RrH6b9NmcNa4tdg", #CNET
               "UCOpcACMWblDls9Z6GERVi1A", #Screen Junkies
               "UCVtL1edhT8qqY-j2JIndMzg", #CineFix
               "UCLXo7UDZvByw2ixzpQCufnA", #Vox
               "UCvJJ_dzjViJCoLf5uKUTwoA", #CNBC
               "CoUxsWakJucWg46KW5RsvPw",  #Financial
               "UCX6b17PVsYBQ0ip5gyeme-Q", #CrashCourse
               "UC4a-Gbdw7vOaccHmFo40b9g", #Khan Academy
               "UCsooa4yRKGN_zEE8iknghZA", #Ted-ed
               "UCq2E1mIwUKMWzCA4liA_XGQ", #PickUpLimes
               "UCJ24N4O0bP7LGLBDvye7oCA", #Matt
               "UCSPYNpQ2fHv9HJ-q6MIMaPw", #Financialdiet
               "UCFKE7WVJfvaHW5q283SxchA", #Yoga with Adriene
              ]


In [None]:


def get_channel_stats(youtube, channel_ids):
    """
    This function retrieves channel statistics from the YouTube API.

    Parameters:
    youtube (googleapiclient.discovery.Resource): An authenticated instance of the YouTube API.
    channel_ids (list): A list of YouTube channel IDs for which to retrieve statistics.
    Returns:
    list: A list of dictionaries, where each dictionary contains the following keys:
          - Channel_name: The name of the YouTube channel.
          - Subscribers: The number of subscribers to the channel.
          - Views: The total number of views on the channel.
          - Total_videos: The total number of videos uploaded to the channel.
          - playlist_id: The ID of the channel's upload playlist.
    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 

    for i in range(len(response['items'])):
        data = dict(Channel_name = response['items'][i]['snippet']['title'],
                    Subscribers = response['items'][i]['statistics']['subscriberCount'],
                    Views = response['items'][i]['statistics']['viewCount'],
                    Total_videos = response['items'][i]['statistics']['videoCount'],
                    playlist_id = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)

    return all_data


In [None]:
# Calling the function to get the channel statistics 
channel_statistics = get_channel_stats(youtube, channel_ids)

# Storing the data in a pandas dataframe
channel_data = pd.DataFrame(channel_statistics)

channel_data

Unnamed: 0,Channel_name,Subscribers,Views,Total_videos,playlist_id
0,ABC News,18300000,16356416062,98666,UUBi2mrWuNuyYy4gbM6fU18Q
1,CineFix - IGN Movies and TV,3720000,1030413778,2231,UUVtL1edhT8qqY-j2JIndMzg
2,CNN,17800000,17539093528,170258,UUupvZG-5ko_eiXAupbDfxWw
3,CNET,3960000,1899160408,23220,UUOmcA3f_RrH6b9NmcNa4tdg
4,Matt D'Avella,3910000,311765298,435,UUJ24N4O0bP7LGLBDvye7oCA
5,The Financial Diet,1230000,156608748,1344,UUSPYNpQ2fHv9HJ-q6MIMaPw
6,Vox,12500000,3670018009,1788,UULXo7UDZvByw2ixzpQCufnA
7,Prasadtechintelugu,4750000,1795326121,4670,UUb-xXZ7ltTvrh9C6DgB9H-Q
8,Ben Shapiro,7230000,4285799966,8107,UUnQC_G5Xsjhp9fEJKuIcrSw
9,Marvel Entertainment,21200000,6477131431,9469,UUvC4D8onUfXzvjTOM-dBfEA


In [117]:
video_ids = []

# Getting the video ids from the channel's upload playlist
for channel_id in channel_data['playlist_id']:
    request = youtube.playlistItems().list(
                        part='snippet',
                        playlistId = channel_id,
                        maxResults = 50
                        )
    response = request.execute()
            
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['snippet']['resourceId']['videoId']) #appending the video ids to the list

In [118]:
def get_video_details(youtube, video_ids):
    """
    Retrieves detailed statistics for a list of YouTube video IDs.

    Parameters:
    youtube (googleapiclient.discovery.Resource): An authenticated instance of the YouTube API.
    video_ids (list): A list of YouTube video IDs for which to retrieve statistics.

    Returns:
    list: A list of dictionaries, where each dictionary contains the following keys:
          - Title: The title of the video.
          - Published_date: The date and time when the video was published.
          - Views: The number of views on the video.
          - Likes: The number of likes on the video.
          - Categories: The category ID of the video.
          - Comments: The number of comments on the video.
    """
    all_video_stats = []

    for i in range(0, len(video_ids), 50):  # YouTube API allows max 50 videos per request
        request = youtube.videos().list(
            part="snippet,statistics",
            id=",".join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response.get("items", []):  # Prevent KeyError if 'items' is missing
            video_stats = dict(
                Title=video["snippet"]["title"],
                Published_date=video["snippet"]["publishedAt"],
                Views=video["statistics"].get("viewCount", 0),  # .get() to handle missing data
                Likes=video["statistics"].get("likeCount", 0),
                Categories = video["snippet"]["categoryId"],
                Comments=video["statistics"].get("commentCount", 0)  # Removed 'dislikeCount'
            )
            all_video_stats.append(video_stats)

    return all_video_stats


In [None]:
# Getting the each video details
video_details = get_video_details(youtube, video_ids)
# storing the details into a dataframe
video_data = pd.DataFrame(video_details)

video_data.head(100)

Unnamed: 0,Title,Published_date,Views,Likes,Categories,Comments
0,"Nightline Full Broadcast — Friday, March 14, 2025",2025-03-15T10:33:16Z,1025,89,25,32
1,"LIVE: ABC News Live - Friday, March 14",2025-03-15T03:58:13Z,87905,762,25,0
2,Space expert on the SpaceX launch of 4 astrona...,2025-03-15T03:45:02Z,2985,34,25,6
3,"ABC News Live Prime: Friday, March 14, 2025",2025-03-15T03:06:48Z,13193,120,25,31
4,SpaceX mission launches to ISS to bring astron...,2025-03-15T03:06:16Z,9089,408,25,42
...,...,...,...,...,...,...
95,Top 10 International Feature Oscar Winners of ...,2024-03-08T14:00:29Z,68312,2075,24,231
96,Rear Window Is Hitchcock’s Most Technically Im...,2024-03-03T14:00:08Z,23426,633,24,116
97,Denis Villeneuve Picks a Favorite Shot From Ea...,2024-02-26T15:00:03Z,709020,19367,24,639
98,Monty Python And The Holy Grail Gleefully Skew...,2024-02-25T14:15:04Z,29352,685,24,158


In [121]:
categories = video_data['Categories']

category_data = []
for i in categories:
    request = youtube.videoCategories().list(
      part = 'snippet',
      id = i
   )

    response = request.execute()
    
    for vid in response.get('items',[]):
       category_data.append(vid['snippet']['title'])
categories_column = pd.DataFrame(category_data)

video_data['Categories'] = categories_column

In [126]:
# displaying all rows and columns

pd.set_option('display.max_rows', None)

# Uploading the data into a csv file
video_data.to_csv('youtube_data.csv',index=False)
