In [2]:
import pandas as pd
import numpy as np
import requests

In [25]:
class YouTubeAPI:
    def __init__(self, api_key, channel_id):
        self.api_key = api_key
        self.channel_id = channel_id

    def get_channel_videos(self):
        """Return video_id,video_title,video_publishTime from all the video of YouTube channel"""

        # Initialize an empty DataFrame to store the results
    
        df = pd.DataFrame(columns=['video_ID', 'video_title', 'video_publishTime'])
    
        # Set the initial page token to an empty string
        page_token = ""

        # Set the maximum number of results per page
        max_results = 50

        # Set the base URL for the YouTube API search endpoint
        base_url = 'https://www.googleapis.com/youtube/v3/search'

        # Set a flag to indicate whether there are more pages of results
        more_results = True

        while more_results:
            # Construct the full URL with the appropriate query parameters
            url = f"{base_url}?key={self.api_key}&channelId={self.channel_id}&part=snippet&type=video&maxResults={max_results}&pageToken={page_token}"

            # Make the request to the YouTube API
            try:
                response = requests.get(url).json()
            except Exception as e:
                # Log the error and retry the request
                print(f"Error making request: {e}")
                break

            # Check if the API returned an error
            if 'error' in response:
                # Log the error and retry the request
                print(f"API error: {response['error']['message']}")
                break

            # Extract the relevant information from the response
            for video in response['items']:
                if video['id']['kind'] == 'youtube#video':
                    video_ID = video['id']['videoId']
                    video_title = video['snippet']['title']
                    video_publishTime = video['snippet']['publishTime'].split('T')[0]

                # Create a temporary DataFrame with the new data
                temp_df = pd.DataFrame({'video_ID': [video_ID],
                                        'video_title': [video_title],
                                        'video_publishTime': [video_publishTime]})

                # Concatenate the temporary DataFrame with df
                df = pd.concat([df, temp_df], ignore_index=True)

            # Check if there is a next page of results
            if 'nextPageToken' in response:
                page_token = response['nextPageToken']
            else:
                more_results = False
        return df
    
    def get_video_stats(self, video_id_list):
        # Initialize an empty DataFrame to store the results
        df = pd.DataFrame(columns=['video_Id','video_viewCount', 'video_likeCount', 'video_favoriteCount', 'video_commentCount'])

        # Set the base URL for the YouTube API video endpoint
        base_url = 'https://www.googleapis.com/youtube/v3/videos'

        # Split the list of video IDs into chunks of 50
        for i in range(0, len(video_id_list), 50):
            video_ids = ",".join(video_id_list[i:i+50])

            # Construct the full URL with the appropriate query parameters
            url = f"{base_url}?id={video_ids}&part=statistics&key={self.api_key}"

            # Make the request to the YouTube API
            try:
                response = requests.get(url).json()
            except Exception as e:
                # Log the error and retry the request
                print(f"Error making request: {e}")

            # Check if the API returned an error
            if 'error' in response:
                # Log the error and retry the request
                print(f"API error: {response['error']['message']}")

            # Extract the relevant information from the response
            for video in response['items']:
                video_ID = video['id']
                video_viewCount = video['statistics']['viewCount']
                video_likeCount = video['statistics']['likeCount']
                video_favoriteCount = video['statistics']['favoriteCount']
                video_commentCount = video['statistics']['commentCount']

                # Create a temporary DataFrame with the new data
                temp_df = pd.DataFrame({'video_Id': [video_ID],
                                        'video_viewCount': [video_viewCount],
                                        'video_likeCount': [video_likeCount],
                                        'video_favoriteCount': [video_favoriteCount],
                                        'video_commentCount': [video_commentCount]})

                # Concatenate the temporary DataFrame with df
                df = pd.concat([df, temp_df], ignore_index=True)

        return df
    def get_comments(self,video_id_list):
        comment_df = pd.DataFrame(columns=['video_id','comment_desc'])

        for v in video_id_list:
            url_v_c='https://www.googleapis.com/youtube/v3/commentThreads?key='+api_key+'&videoId='+v+'&part=snippet,replies&maxResults=1000'
        
            try:
                response_c = requests.get(url_v_c).json()
            except Exception as e:
                # Log the error and retry the request
                print(f"Error making request: {e}") 
                
            if 'error' in response_c:
                # Log the error and retry the request
                print(f"API error: {response_c['error']['message']}")
                
                
            # Initialize temp_df inside the loop
            temp_df = pd.DataFrame(columns=['video_id','comment_desc','comment_publish'])
            for c in response_c['items']:
                video_id = c['snippet']['videoId']
                comment_desc = c['snippet']['topLevelComment']['snippet']['textDisplay']
                temp_df = temp_df.append({'video_id':video_id,
                                         'comment_desc':comment_desc}, ignore_index=True)
            # Append temp_df to comment_df after the inner loop
            comment_df = comment_df.append(temp_df, ignore_index=True)
        return comment_df


                
        

In [26]:
api_key = "AIzaSyAcUG_OXZMh-Rpi_YoDU-OF8rZVc64yLxI"
channel_id = "UCKZozRVHRYsYHGEyNKuhhdA"
#UCKZozRVHRYsYHGEyNKuhhdA
#AIzaSyAcUG_OXZMh-Rpi_YoDU-OF8rZVc64yLxI

In [27]:
api = YouTubeAPI(api_key, channel_id)
videos = api.get_channel_videos()
videos.head()

Unnamed: 0,video_ID,video_title,video_publishTime
0,hHGlDhJkDKI,Walmart vs Small Local Vendors: American Expre...,2021-06-15
1,-_jOwk6GsXo,How to pick the next Stock Market GOLDMINE? (i...,2021-07-13
2,YCngR6uSEVo,How to build Brand loyalty like Volvo?,2021-06-11
3,A1k62pSQ7so,This simple idea changed FMCG Market forever |...,2021-05-11
4,-NtB3rf8paE,Zomato’s SECRET Plan to Become Profitable: Zom...,2021-11-19


In [28]:
videos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   video_ID           200 non-null    object
 1   video_title        200 non-null    object
 2   video_publishTime  200 non-null    object
dtypes: object(3)
memory usage: 4.8+ KB


In [29]:
videos.to_csv("channel_videos.csv")

In [30]:

L = []

for i in range(videos['video_ID'].size):
  L.append(videos['video_ID'][i])


print(L)

['hHGlDhJkDKI',
 '-_jOwk6GsXo',
 'YCngR6uSEVo',
 'A1k62pSQ7so',
 '-NtB3rf8paE',
 'wzsYAuxhxSw',
 'h7xd2PqtglQ',
 'h9dlG5WfDfI',
 'SGIKsejp7-0',
 '5JKTbU6RnA0',
 'BiO2WRPBzSA',
 '0wVAPXswE6E',
 '5vMbXNjgSjw',
 'cbTEAeteaI8',
 'X2f9IiflXaQ',
 'o-gGiyY9s9w',
 'eLT1To94V-Q',
 'WWpSZY1RAAM',
 'RoD3kThcl6U',
 'X-CLMLrHcqU',
 'WnfqgKTz3fk',
 'S5x54ZlNHX0',
 '-d7f71CFACo',
 '8rulgRgwHYc',
 'jRRpGLy0TBg',
 '8tBuAM0XHmY',
 'pK887oMqxRY',
 'vgV9HNuUxKY',
 '-DE7KXOK_qU',
 '8352JG8CCoE',
 'KNblG_-QOL0',
 'jp_CSPtSQWY',
 'vkrBQCQLQG0',
 'iQRe7OHtHXg',
 'kotO3sUv-xo',
 'TqZRgHOlKmU',
 'JqSeQ3c4_qI',
 'yhpVAki47RI',
 'fxjFr54LLOw',
 'tI9fRLNmNkY',
 'p2RCPyv95SE',
 'dVtnSPZn6BU',
 'K7uvd1Muc80',
 'glHQPkrcf8A',
 'TCboTQr8Ecg',
 'er752AxyziE',
 'urLoa8PkQpU',
 'OEB_UviHDq0',
 'lSF--3zgjKQ',
 'Z7P-t_yc8gE',
 'vkrBQCQLQG0',
 'kotO3sUv-xo',
 'JqSeQ3c4_qI',
 '8rulgRgwHYc',
 'yhpVAki47RI',
 'fxjFr54LLOw',
 'tI9fRLNmNkY',
 'iQRe7OHtHXg',
 '8tBuAM0XHmY',
 'p2RCPyv95SE',
 'dVtnSPZn6BU',
 'glHQPkrcf8A',
 'er752A

In [31]:
video_id_list = L
stats = api.get_video_stats(video_id_list)
print(stats.head())

stats.to_csv("channel_statistics.csv")

      video_Id video_viewCount video_likeCount video_favoriteCount  \
0  hHGlDhJkDKI          129409           10444                   0   
1  -_jOwk6GsXo          382926           21017                   0   
2  YCngR6uSEVo          335968           18130                   0   
3  A1k62pSQ7so          410373           25167                   0   
4  -NtB3rf8paE          283101           11762                   0   

  video_commentCount  
0                554  
1                823  
2                923  
3               1246  
4                550  


In [32]:
# Get the comments for a list of video IDs
comment = api.get_comments(video_id_list)

comment.head()
comment.to_csv("channel_comment.csv")

      video_id                                       comment_desc  \
0  hHGlDhJkDKI  amex is not a company , its a feeling<br>they ...   
1  hHGlDhJkDKI  american express is the future !! <br>use our ...   
2  hHGlDhJkDKI  Ganesh, you are brilliant, your business partn...   
3  hHGlDhJkDKI               This is philanthropy not capitalism.   
4  hHGlDhJkDKI  I need to discuss with you sir inbox ..Busines...   

  comment_publish  
0             NaN  
1             NaN  
2             NaN  
3             NaN  
4             NaN  
