In [49]:
import pandas as pd
import re
from datetime import datetime
from googleapiclient.discovery import build

In [50]:
# Function to convert duration string to seconds
def convert_duration(duration_str):
    match = re.match(r'P(?:T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?)?(?:D(\d+)T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?)?', duration_str)
    if match:
        days = int(match.group(4) or 0)
        hours = int(match.group(5) or match.group(1) or 0)
        minutes = int(match.group(6) or match.group(2) or 0)
        seconds = int(match.group(7) or match.group(3) or 0)
        return days * 86400 + hours * 3600 + minutes * 60 + seconds
    else:
        return None

# Function to get videos based on search query, duration, and number of videos
def get_videos(api_key, duration, num_videos=10, min_duration=None, max_duration=None):
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Call the search.list method to retrieve results for all categories
    search_response = youtube.search().list(
        part='id',
        type='video',
        videoDuration=duration,
        maxResults=num_videos
    ).execute()

    video_ids = []
    for search_result in search_response.get('items', []):
        video_ids.append(search_result['id']['videoId'])

    # Call the videos.list method to retrieve details about each video
    video_response = youtube.videos().list(
        part='snippet,contentDetails,statistics',
        id=','.join(video_ids)
    ).execute()

    videos = []
    for video_result in video_response.get('items', []):
        video_id = video_result['id']
        title = video_result['snippet']['title']
        duration = convert_duration(video_result['contentDetails']['duration'])
        if duration is None:
            continue
        if (min_duration is not None and duration < min_duration) or (max_duration is not None and duration > max_duration):
            continue
        views = int(video_result['statistics']['viewCount'])
        rating = float(video_result['statistics'].get('likeCount', 0)) / max(int(video_result['statistics'].get('likeCount', 0)) + int(video_result['statistics'].get('dislikeCount', 0)), 1)
        region = video_result['snippet'].get('regionCode', 'Unknown')
        upload_date_str = video_result['snippet'].get('publishedAt', 'Unknown')
        upload_date = datetime.strptime(upload_date_str, "%Y-%m-%dT%H:%M:%SZ").strftime("%y%m%d")
        category = video_result['snippet'].get('categoryId', 'Unknown')
        videos.append({'ID': video_id, 'Title': title, 'Length (seconds)': duration, 'Views': views, 'Rating': rating, 'Region': region, 'Upload Date': upload_date, 'Category': category})

    # Sort videos by duration
    videos = sorted(videos, key=lambda x: x['Length (seconds)'])

    return videos

In [51]:
# Define your YouTube Data API key
api_key = 'AIzaSyAJ69dPun8O-fspAfGjgatHeQ_7TYFKiT0'

num_videos = 10
short_videos = get_videos(api_key, 'short', num_videos, max_duration=60)
short_df = pd.DataFrame(short_videos)

# Search for medium-length videos (7-20 minutes)
medium_videos = get_videos(api_key, 'medium', num_videos, 7 * 60, 20 * 60)
medium_df = pd.DataFrame(medium_videos)

# Search for long videos (>50 minutes)
long_videos = get_videos(api_key, 'long', num_videos, min_duration=50 * 60)
long_df = pd.DataFrame(long_videos)

In [52]:
short_df

Unnamed: 0,ID,Title,Length (seconds),Views,Rating,Region,Upload Date,Category
0,yMdJyZm7ZDQ,Almost had it. 😅😅 #shorts,17,365713431,1.0,Unknown,220619,17
1,9rWvE4Uyh2A,don't move!!!!!! #squidgame,26,176915865,1.0,Unknown,211002,24
2,0LMKF0iCT9g,Fool-Proof Way To Quickly Grow Dragon Fruit Fr...,27,1931298,1.0,Unknown,210130,26
3,LJaj3E29N_4,The right way to carve a Turkey #shorts,31,3106566,1.0,Unknown,211121,22
4,fQaf8wrRkzA,Cinnamon Roll Banana Cake! Recipe tutorial #Sh...,33,195728,1.0,Unknown,210824,24
5,OVVGWSggcB0,@mouse11121 #gladucame,33,218786832,1.0,Unknown,230607,10
6,dfrcAwNYsK0,BBQ Ribs for my husbands lunch 🍖🔥 #lunchformyh...,46,13585220,1.0,Unknown,220131,22
7,e89R-kfufyE,"agarwood incense, meditation, relaxing, healing",51,211397139,1.0,Unknown,220914,10
8,o0oM1OdmwTA,5 Shoulder Pain Relief Exercises in 60 seconds,54,493995,1.0,Unknown,220603,27
9,VDdamDi6oes,Don't Put Liquid Nitrogen In a Steam Engine,55,46602769,1.0,Unknown,210719,28
