In [None]:
!pip install google-api-python-client
import pandas as pd

# Read csv file, save movie titles into list for future use
df = pd.read_csv('finalprototype.csv')
movie_titles = df['title'].tolist()[1187:]

from googleapiclient.discovery import build

API_KEY = ''   #Each API key can only extract ~30 to 100 movies
youtube = build('youtube', 'v3', developerKey=API_KEY)


from googleapiclient.errors import HttpError
# Get youtube video's engagement data (video title, view count, like count, comment count)
def get_video_engagement(video_id):
    request = youtube.videos().list(
        part='statistics',
        id=video_id
    )
    response = request.execute()

    if 'items' in response:
        video_stats = response['items'][0]['statistics']
        view_count = int(video_stats.get('viewCount', 0))
        like_count = int(video_stats.get('likeCount', 0))
        comment_count = int(video_stats.get('commentCount', 0))

        return view_count, like_count, comment_count

    return 0, 0, 0

# Get youtube vidoe's top 10 comments
def get_video_comments(video_id, max_results=10):
    request = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        order='relevance',
        textFormat='plainText',
        maxResults=max_results
    )
    try:
        response = request.execute()
        comments = [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]
        return comments

    # incase video owner closed comment section
    except HttpError as e:
        if 'commentsDisabled' in str(e):
            print(f'Comments are disabled for the video with ID: {video_id}')
            return []
        else:
            raise


# Create lists to store information
titles = []
video_ids = []
views = []
likes = []
comments = []
top_comments_list = []

for movie_title in movie_titles:
    # Perform a search for the movie title
    request = youtube.search().list(
        q=movie_title,
        part='id,snippet',
        type='video',
        maxResults=1)
    response = request.execute()

    # Extract relevant information from the API response
    if 'items' in response:
        video = response['items'][0]
        title = video['snippet']['title']
        video_id = video['id']['videoId']

        # Get video engagement metrics
        view_count, like_count, comment_count = get_video_engagement(video_id)

        # Get top comments
        top_comments = get_video_comments(video_id, max_results=10)
        top_comments_list.append(top_comments)  # Store top comments for each video

        # Print statements
        print(f'Movie Title: {movie_title}')
        print(f'YouTube Video Title: {title}')
        print(f'YouTube Video ID: {video_id}')
        print(f'Views: {view_count}')
        print(f'Likes: {like_count}')
        print(f'Comments: {comment_count}')
        print('Top Comments:')
        for i, comment in enumerate(top_comments, start=1):
            print(f'Top Comment #{i}: {comment}')
        print('---')

        # Store information in lists
        titles.append(title)
        video_ids.append(video_id)
        views.append(view_count)
        likes.append(like_count)
        comments.append(comment_count)


# Create a new DataFrame with the collected information
result_df = pd.DataFrame({
    'MovieTitle': movie_titles[:len(titles)],
    'YouTube Video Title': titles,
    'YouTube Video ID': video_ids,
    'Views': views,
    'Likes': likes,
    'Comments': comments,
    'Top 10 Comments': ['|, '.join(top) for top in top_comments_list]})

# Save the DataFrame to a new CSV file
result_df.to_csv('Youtube_Movie_Data.csv', index=False)