In [86]:
!pip install beautifulsoup4 requests isodate

Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate
Successfully installed isodate-0.6.1


In [92]:
def get_channel_id_by_username(username, api_key):
    url = "https://youtube-v31.p.rapidapi.com/channels"

    querystring = {"part": "snippet,contentDetails,statistics", "forUsername": username}

    headers = {
        "X-RapidAPI-Key": api_key,
        "X-RapidAPI-Host": "youtube-v31.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)
    if response.status_code == 200:
        items = response.json().get('items', [])
        if items:
            return items[0]['id']
        else:
            print("Channel not found.")
            return None
    else:
        print("Failed to fetch data:", response.status_code, response.text)
        return None

In [91]:
import requests
import isodate

def get_youtube_videos(channel_id, api_key, min_minutes, count):
    url = "https://youtube-v31.p.rapidapi.com/search"
    min_seconds = min_minutes * 60
    all_videos = []
    next_page_token = None

    while len(all_videos) < count:
        querystring = {
            "channelId": channel_id,
            "part": "snippet",
            "order": "date",
            "maxResults": "50",
            "pageToken": next_page_token
        }

        headers = {
            "X-RapidAPI-Key": api_key,
            "X-RapidAPI-Host": "youtube-v31.p.rapidapi.com"
        }

        response = requests.get(url, headers=headers, params=querystring)
        if response.status_code == 200:
            videos = response.json().get('items', [])
            video_ids = [video['id']['videoId'] for video in videos if 'videoId' in video['id']]

            # Fetch video details including duration
            video_details = get_video_details(video_ids, api_key)

            # Filter videos by duration
            filtered_videos = [video for video in video_details if video['duration_seconds'] >= min_seconds]
            all_videos.extend(filtered_videos)

            # Check for next page token
            next_page_token = response.json().get('nextPageToken')
            if not next_page_token:
                break
        else:
            print("Failed to fetch data:", response.status_code, response.text)
            break

    return all_videos[:count]

def get_video_details(video_ids, api_key):
    url = "https://youtube-v31.p.rapidapi.com/videos"
    video_details = []

    for i in range(0, len(video_ids), 50):  # API allows max 50 ids per request
        querystring = {
            "part": "contentDetails,snippet",
            "id": ','.join(video_ids[i:i+50])
        }

        headers = {
            "X-RapidAPI-Key": api_key,
            "X-RapidAPI-Host": "youtube-v31.p.rapidapi.com"
        }

        response = requests.get(url, headers=headers, params=querystring)
        if response.status_code == 200:
            videos = response.json().get('items', [])
            for video in videos:
                duration = isodate.parse_duration(video['contentDetails']['duration'])
                duration_seconds = duration.total_seconds()
                video_details.append({
                    'title': video['snippet']['title'],
                    'url': f"https://www.youtube.com/watch?v={video['id']}",
                    'duration': video['contentDetails']['duration'],
                    'duration_seconds': duration_seconds
                })
        else:
            print("Failed to fetch video details:", response.status_code, response.text)
            break

    return video_details



In [103]:

# Replace this with your actual RapidAPI key
api_key = "Enter your RapidAPI key its free"

channel_user_name = 'PowerfulJRE'
channel_id=get_channel_id_by_username(channel_user_name,api_key)

In [104]:
channel_id

'UCzQUP1qoWDoEbmsQxvdjxgQ'

In [112]:
min_minutes = 10  # Minimum duration in minutes
count = 2  # Number of videos to return
videos = get_youtube_videos(channel_id, api_key, min_minutes, count)
print(f"Found {len(videos)} videos with a minimum duration of {min_minutes} minutes.")
for video in videos:
    print(f"Title: {video['title']}, URL: {video['url']}, Duration: {video['duration']}")

Found 2 videos with a minimum duration of 10 minutes.
Title: Joe Rogan Experience #2158 - Harland Williams, URL: https://www.youtube.com/watch?v=pCGCu_v77Do, Duration: PT2H31M48S
Title: Joe Rogan Experience #2157 - Duncan Trussell, URL: https://www.youtube.com/watch?v=01O3uTGtX7w, Duration: PT3H4M11S


In [109]:
print(videos)

[{'title': 'Joe Rogan Experience #2158 - Harland Williams', 'url': 'https://www.youtube.com/watch?v=pCGCu_v77Do', 'duration': 'PT2H31M48S', 'duration_seconds': 9108.0}, {'title': 'Joe Rogan Experience #2157 - Duncan Trussell', 'url': 'https://www.youtube.com/watch?v=01O3uTGtX7w', 'duration': 'PT3H4M11S', 'duration_seconds': 11051.0}, {'title': 'JRE MMA Show #157 with Craig Jones', 'url': 'https://www.youtube.com/watch?v=FzAxtfdDHnY', 'duration': 'PT2H9M40S', 'duration_seconds': 7780.0}, {'title': 'Joe Rogan Experience #2156 - Jeremie & Edouard Harris', 'url': 'https://www.youtube.com/watch?v=c6JdeL90ans', 'duration': 'PT2H22M32S', 'duration_seconds': 8552.0}, {'title': 'Joe Rogan Experience #2155 - Brian Redban', 'url': 'https://www.youtube.com/watch?v=1L6mboUGXtE', 'duration': 'PT2H31M56S', 'duration_seconds': 9116.0}, {'title': 'Joe Rogan Experience #2154 - Remi Warren', 'url': 'https://www.youtube.com/watch?v=VlrLibwDTZU', 'duration': 'PT2H13M13S', 'duration_seconds': 7993.0}, {'tit

In [110]:
!pip install youtube-transcript-api




In [130]:
def get_transcripts(video_urls):
    transcripts = {}
    for video_url in video_urls:
        video_id = video_url.split('=')[-1]
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
            transcript = "\n".join([entry['text'] for entry in transcript_list])
            transcripts[video_url] = transcript
        except Exception as e:
            transcripts[video_url] = f"Transcript not available: {e}"
    return transcripts


In [131]:
def get_video_urls(videos):
    return [video['url'] for video in videos]

In [132]:
urls=get_video_urls(videos)

In [133]:
transcriptions=get_transcripts(urls)

In [134]:
transcriptions

{'https://www.youtube.com/watch?v=pCGCu_v77Do': "Joe Rogan podcast check it out The Joe\nRogan Experience Train by day Joe Rogan\npodcast by night all\nday oh I I think I've known you for 30\nyears you know how crazy that is 31 is\nit really yeah wow I remember the day we\nmet Baskin Robins on Melrose really yeah\ndo we made it Basin Robins Basin Robbins\nI got one of those memories like uh the\ngirl from Taxi oh do you really got mint\nchocolate chip double scoop and I had\npeanut butter and chocolate I think\nyou're making this up well I think\nyou're making this up he's making this\nup I like let's see how this goes yeah\nas soon as he was saying he's got a\nmemory like the lady from Taxi I was\nlike and I couldn't even remember her\nname that lady's got a crazy memory she\ndoes she can tell you like dates in 1972\nwhat day it was dude I bumped into it a\nsushi joint once and she reenacted the\nday lost my virginity it's unbelievable\nwow she remembered it how she know it\nshe alien