In [13]:
import pandas as pd
from googleapiclient.discovery import build
import googleapiclient.errors

# Hindi: 'hi'
# Tamil: 'ta'
# Telugu: 'te'
# Kannada: 'kn'
# Malayalam: 'ml'
# Bengali: 'bn'
# Marathi: 'mr'
# Gujarati: 'gu'
# Punjabi: 'pa'
# Odia: 'or'

# Language-wise population distribution in India based on the 2011 Census data:
# 1 Hindi  52,83,47,193 43.63 yes
# 2 Bengali 9,72,37,669 8.03 yes
# 3 Marathi 8,30,26,680 6.86 yes
# 4 Telugu 8,11,27,740 6.70 yes
# 5 Tamil 6,90,26,881 5.70 yes
# 6 Gujarati 5,54,92,554 4.58 yes
# 7 Urdu 5,07,72,631 4.19 yes
# 8 Kannada 4,37,06,512 3.61 yes
# 9 Odia 3,75,21,324 3.10 yes
# 10 Malayalam 3,48,38,819 2.88 yes

# List of API keys
api_keys = ["AIzaSyDPkNg5OIyRc2M3i2eOyR8hMvU5MTH-N7U"]  

# Function to create a new YouTube API client using the next available API key
def get_youtube_client(api_keys):
    for api_key in api_keys:
        try:
            return build('youtube', 'v3', developerKey=api_key)
        except googleapiclient.errors.HttpError:
            print(f"API key {api_key} is not valid or has reached its quota.")
    raise Exception("All API keys have reached their quota.")

# Set up the YouTube Data API client
youtube = get_youtube_client(api_keys)

# List of keywords to search for
keywords = ['bjp 2019 ప్రచారం']
published_after = '2019-01-01T00:00:00Z'
published_before = '2019-04-10T00:00:00Z'

try:
    search_result_list = []
    comments_list = []

    for keyword in keywords:
        search_response = youtube.search().list(
            q=keyword,
            part='snippet',
            type='video',
            regionCode='IN',
            publishedAfter=published_after,
            publishedBefore=published_before,
            relevanceLanguage='te',
            maxResults=50
        ).execute()

        while search_response.get('items', []):
            for search_result in search_response.get('items', []):            
                yt_search_keyword = keywords
                ytsearch_regionCode = 'IN'
                ytsearch_kind = search_result['kind']
                ytsearch_etag = search_result['etag']
                ytsearch_video_kind = search_result['id']['kind']
                video_id = search_result['id']['videoId']
                yt_channelId = search_result['snippet']['channelId']
                yt_channelTitle = search_result['snippet']['channelTitle']
                ytvideo_publishedAt = search_result['snippet']['publishedAt']
                ytvideo_publishTime = search_result['snippet']['publishTime']
                yt_video_title = search_result['snippet']['title']
                yt_video_description = search_result['snippet']['description']
                yt_video_liveBroadcastContent = search_result['snippet']['liveBroadcastContent']

                search_result_data = {
                    'yt_search_keyword': yt_search_keyword,
                    'ytsearch_regionCode': ytsearch_regionCode,
                    'ytsearch_kind': ytsearch_kind,
                    'ytsearch_etag': ytsearch_etag,
                    'ytsearch_video_kind': ytsearch_video_kind,
                    'video_id': video_id,
                    'yt_channelId': yt_channelId,
                    'yt_channelTitle': yt_channelTitle,
                    'ytvideo_publishedAt': ytvideo_publishedAt,
                    'ytvideo_publishTime': ytvideo_publishTime,
                    'yt_video_title': yt_video_title,
                    'yt_video_description': yt_video_description,
                    'yt_video_liveBroadcastContent': yt_video_liveBroadcastContent
                }
                search_result_list.append(search_result_data)

                try:
                    comment_response = youtube.commentThreads().list(
                        part='snippet',
                        videoId=video_id,
                        textFormat='plainText',
                        maxResults=10000
                    ).execute()

                    comments_list.append(comment_response)

                    for comment_result in comment_response.get('items', []):
                        commentThread_kind = comment_result['kind']
                        commentThread_etag = comment_result['etag']
                        comment_id = comment_result['snippet']['topLevelComment']['id']
                        comment_etag = comment_result['snippet']['topLevelComment']['etag']
                        comment_kind = comment_result['snippet']['topLevelComment']['kind']
                        video_id = comment_result['snippet']['topLevelComment']['snippet']['videoId']
                        comment_textDisplay = comment_result['snippet']['topLevelComment']['snippet']['textDisplay']
                        comment_textOriginal = comment_result['snippet']['topLevelComment']['snippet']['textOriginal']
                        comment_authorDisplayName = comment_result['snippet']['topLevelComment']['snippet']['authorDisplayName']
                        comment_authorChannelUrl = comment_result['snippet']['topLevelComment']['snippet']['authorChannelUrl']
                        comment_canRate = comment_result['snippet']['topLevelComment']['snippet']['canRate']
                        comment_viewerRating = comment_result['snippet']['topLevelComment']['snippet']['viewerRating']
                        comment_likeCount = comment_result['snippet']['topLevelComment']['snippet']['likeCount']
                        comment_publishedAt = comment_result['snippet']['topLevelComment']['snippet']['publishedAt']
                        comment_updatedAt = comment_result['snippet']['topLevelComment']['snippet']['updatedAt']
                        comment_canReply = comment_result['snippet']['canReply']
                        comment_totalReplyCount = comment_result['snippet']['totalReplyCount']   
                        comment_isPublic = comment_result['snippet']['isPublic']
                        
                        comment_data = {
                        'commentThread_kind': commentThread_kind,
                        'commentThread_etag': commentThread_etag,
                        'comment_id': comment_id,
                        'comment_etag': comment_etag,
                        'comment_kind': comment_kind,
                        'video_id': video_id,
                        'comment_textDisplay': comment_textDisplay,
                        'comment_textOriginal': comment_textOriginal,
                        'comment_authorDisplayName': comment_authorDisplayName,
                        'comment_authorChannelUrl': comment_authorChannelUrl,
                        'comment_canRate': comment_canRate,
                        'comment_viewerRating': comment_viewerRating,
                        'comment_likeCount': comment_likeCount,
                        'comment_publishedAt': comment_publishedAt,
                        'comment_updatedAt': comment_updatedAt,
                        'comment_canReply': comment_canReply,
                        'comment_totalReplyCount': comment_totalReplyCount,
                        'comment_isPublic': comment_isPublic
                        }
                        comments_list.append(comment_data)

                except googleapiclient.errors.HttpError as e:
                    error_message = e.content.decode('utf-8') if isinstance(e.content, bytes) else str(e)
                    if e.resp.status == 403 and "commentsDisabled" in error_message:
                        print(f"Comments are disabled for video with videoId: {video_id}")
                    else:
                        print(f"An error occurred while processing videoId: {video_id} - {error_message}")

            if 'nextPageToken' in search_response:
                next_page_token = search_response['nextPageToken']
                search_response = youtube.search().list(
                    q=keyword,
                    part='snippet',
                    type='video',
                    regionCode='IN',
                    relevanceLanguage='te',
                    maxResults=50,
                    pageToken=next_page_token
                ).execute()
            else:
                break

except googleapiclient.errors.HttpError as e:
    print("An error occurred:", e)

# Create DataFrames from the lists
df1 = pd.DataFrame(search_result_list)
df2 = pd.DataFrame(comments_list)

yt_search_df = df1[df1.video_id.notnull()]
yt_comments_df = df2[df2.video_id.notnull()].drop(["kind", "etag","nextPageToken", "pageInfo","items"], axis = 1)

print(yt_search_df.shape[0])
print(yt_search_df.shape[1])
print(yt_comments_df.shape[0])
print(yt_comments_df.shape[1])

master_df=pd.merge(yt_comments_df, yt_search_df, on='video_id', how='left')
print(master_df.shape[0])
print(master_df.shape[1])

yt_search_df.to_csv("D:\\0_SHU_31018584\\Data\\Raw_files_Split\\youtube_search_result_73.csv",index=False)
yt_comments_df.to_csv("D:\\0_SHU_31018584\\Data\\Raw_files_Split\\youtube_comments_result_73.csv",index=False)
master_df.to_csv("D:\\0_SHU_31018584\\Data\\Final_data\\youtube_apidata_73.csv",index=False)

Comments are disabled for video with videoId: 2JlryqLYOWM
Comments are disabled for video with videoId: h_JBp_aw2Ug
Comments are disabled for video with videoId: VHBF3tTg8W8
Comments are disabled for video with videoId: AWN9gNwn5js
Comments are disabled for video with videoId: zJDD-ook5y0
Comments are disabled for video with videoId: ykKIcLZkRaM
Comments are disabled for video with videoId: QmRZl_67xuo
Comments are disabled for video with videoId: IpdERco_08w
Comments are disabled for video with videoId: v5KU7NLFO4s
Comments are disabled for video with videoId: mUeg1hLdLwU
Comments are disabled for video with videoId: 7v_LBhK3-Gk
Comments are disabled for video with videoId: tFmrAakyz0s
Comments are disabled for video with videoId: PF0_C0KNChU
Comments are disabled for video with videoId: LHQOsVo3-O4
Comments are disabled for video with videoId: tFmrAakyz0s
Comments are disabled for video with videoId: f6UHW48_3J4
Comments are disabled for video with videoId: yuNgWr_46_A
Comments are d

In [2]:
master_df.to_csv("D:\\0_SHU_31018584\\Data\\Final_data\\youtube_apidata_37.csv",index=False,escapechar='\\')