In [2]:
import pandas as pd
from googleapiclient.discovery import build

# Replace 'YOUR_API_KEY' with your actual API key.
api_key = 'Your key'

# Create the YouTube service using the API key
youtube = build('youtube', 'v3', developerKey=api_key)

def get_videos_from_year(year, num_videos):
    # Calculate the date range for the given year
    published_after = f'{year}-01-01T00:00:00Z'
    published_before = f'{year + 1}-01-01T00:00:00Z'
    
    # Initialize a list to store the videos
    videos = []

    # Keep track of the next page token for pagination
    next_page_token = None

    # Retrieve up to `num_videos` videos from the given year
    while len(videos) < num_videos:
        # Use the search.list() method to search for videos within the date range
        search_request = youtube.search().list(
            part='id,snippet',
            type='video',
            publishedAfter=published_after,
            publishedBefore=published_before,
            maxResults=50,  # Maximum number of results per request (up to 50)
            pageToken=next_page_token  # Use the next page token for pagination
        )
        
        # Execute the search request and get the response
        search_response = search_request.execute()
        
        # Extract video IDs and publication dates from the search results
        video_ids = []
        for item in search_response['items']:
            video_id = item['id']['videoId']
            published_at = item['snippet']['publishedAt']
            videos.append({'id': video_id, 'published_at': published_at})
            video_ids.append(video_id)
        
        # Use the videos.list() method to retrieve details about the videos
        if video_ids:
            video_request = youtube.videos().list(
                part='contentDetails,statistics',
                id=','.join(video_ids)
            )
            
            # Execute the video request and get the response
            video_response = video_request.execute()
            
            # Add length, views, likes, dislikes, and comments to the videos list
            for video in video_response['items']:
                # Find the video in the list by ID and update its information
                for video_info in videos:
                    if video_info['id'] == video['id']:
                        # Retrieve and convert views, likes, dislikes, and comments to integers
                        views = int(video['statistics'].get('viewCount', 0))
                        likes = int(video['statistics'].get('likeCount', 0))
                        dislikes = int(video['statistics'].get('dislikeCount', 0))
                        comments = int(video['statistics'].get('commentCount', 0))
                        
                        # Retrieve length (duration) in ISO 8601 format
                        length = video['contentDetails'].get('duration')
                        
                        # Only add videos with non-zero views and valid length
                        if views > 0 and length:
                            video_info['views'] = views
                            video_info['likes'] = likes
                            video_info['dislikes'] = dislikes
                            video_info['comments'] = comments
                            video_info['length'] = length
                        else:
                            # Remove video_info if it doesn't meet criteria
                            videos.remove(video_info)
        
        # Get the next page token for pagination
        next_page_token = search_response.get('nextPageToken')
        
        # If there are no more pages, break out of the loop
        if not next_page_token:
            break
    
    # Return the list of videos (limited to `num_videos`)
    return videos[:num_videos]

# Define the range of years you want to retrieve videos from
years = range(2019, 2024)

# Initialize a dictionary to store the DataFrames for each year
videos_by_year_df = {}

# Retrieve 500 videos from each year in the specified range and convert to DataFrame
for year in years:
    videos = get_videos_from_year(year, 500)
    # Convert the list of videos to a pandas DataFrame
    df = pd.DataFrame(videos)
    # Store the DataFrame in the dictionary
    videos_by_year_df[year] = df
    print(f'Fetched {len(df)} videos for the year {year}')

# Output the dictionary of DataFrames
videos_by_year_df


Fetched 194 videos for the year 2019
Fetched 155 videos for the year 2020
Fetched 160 videos for the year 2021
Fetched 225 videos for the year 2022
Fetched 196 videos for the year 2023


{2019:               id          published_at    views   likes  dislikes  comments  \
 0    SumDHcnCRuU  2019-10-30T11:15:00Z  8752785  249401         0     11035   
 1    J1Yv24cM2os  2019-03-10T15:17:38Z  6004648  154414         0     13352   
 2    lBCcmedmbhM  2019-01-11T21:43:02Z  2112655   35947         0      1208   
 3    0LS0Z8fgiII  2019-07-20T17:58:46Z  6020587   74426         0      3310   
 4    B3C6XVjpSJ4  2019-07-17T22:05:07Z  2468658   39187         0      2542   
 ..           ...                   ...      ...     ...       ...       ...   
 189  rBdV1iam5Ts  2019-03-03T05:54:10Z     6098      63         0         6   
 190  aDYZuHLfQmw  2019-04-30T02:00:01Z   752581    4366         0        87   
 191  NSBabkVHeNE  2019-07-18T06:10:22Z  1587105    5293         0       105   
 192  79UGt02ow5o  2019-02-22T22:25:47Z     3584      65         0        11   
 193  4LomZbNJnZQ  2019-03-16T02:30:00Z  5163034   24660         0       526   
 
        length  
 0     PT3M15S 

In [3]:
for year in years:
    videos = get_videos_from_year(year, 500)
    # Convert the list of videos to a pandas DataFrame
    df = pd.DataFrame(videos)
    # Store the DataFrame in the dictionary
    videos_by_year_df[year] = df
    print(f'Fetched {len(df)} videos for the year {year}')
    
    # Export the DataFrame to a CSV file
    file_path = f'videos_{year}.csv'
    df.to_csv(file_path, index=False)
    print(f'DataFrame for year {year} exported to {file_path}')

Fetched 214 videos for the year 2019
DataFrame for year 2019 exported to videos_2019.csv
Fetched 152 videos for the year 2020
DataFrame for year 2020 exported to videos_2020.csv
Fetched 227 videos for the year 2021
DataFrame for year 2021 exported to videos_2021.csv
Fetched 150 videos for the year 2022
DataFrame for year 2022 exported to videos_2022.csv
Fetched 195 videos for the year 2023
DataFrame for year 2023 exported to videos_2023.csv
