In [45]:
import requests
from dotenv import load_dotenv
from urllib.parse import urlparse
from urllib.parse import parse_qs
from os import getenv
from requests.models import PreparedRequest
from requests import get

load_dotenv()

True

In [46]:
u = 'https://www.youtube.com/watch?v=9yDDnurRmIY'


# helper functions 

def add_key(params):
    return {"key": getenv('API_KEY'), **params}


def get_video_key(p_url):
    parsed_url = urlparse(p_url)
    captured_value = parse_qs(parsed_url.query)['v'][0]
    return captured_value

get_video_key(u)

'9yDDnurRmIY'

# Understanding the Results of the Video

we call the [GetVideos](https://developers.google.com/youtube/v3/docs/search/list) API which searches for the query and gives us the list of the videos.


| Part                        | What is it for                             |
|-----------------------------|--------------------------------------------|
| contentDetails              | Has the Duration of the Video              |
| snippet                     | has tags and more info. on the Video       |

In [47]:
def search_results(q):
    url = 'https://www.googleapis.com/youtube/v3/search'
    params = add_key({
      'q': q,
      'part': 'snippet',
        'maxResults': 5,
        "order": "relevance"
    })
    req = PreparedRequest()
    req.prepare_url(url, params)
    return get(req.url)

In [48]:
search_results("how to make a chocolate cake, can you help with me it").json()

{'kind': 'youtube#searchListResponse',
 'etag': 'mZ6fGYrhVR-WfphYONIAPls1ze8',
 'nextPageToken': 'CAUQAA',
 'regionCode': 'DK',
 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#searchResult',
   'etag': 'e_jA1IFiwxzGxeExpA2ln6gvdAU',
   'id': {'kind': 'youtube#video', 'videoId': 'rL4sDLdfn7A'},
   'snippet': {'publishedAt': '2022-03-11T00:54:54Z',
    'channelId': 'UC_35hRJlT4PEmvFCcU6l_3Q',
    'title': 'Chocolate Cake made in a PAN! (No oven)',
    'description': 'shorts #chocolatecake #easyrecipe #onepan #simplerecipe #cake #baking.',
    'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/rL4sDLdfn7A/default.jpg',
      'width': 120,
      'height': 90},
     'medium': {'url': 'https://i.ytimg.com/vi/rL4sDLdfn7A/mqdefault.jpg',
      'width': 320,
      'height': 180},
     'high': {'url': 'https://i.ytimg.com/vi/rL4sDLdfn7A/hqdefault.jpg',
      'width': 480,
      'height': 360}},
    'channelTitle': 'Patrick Zeinali',
    'liveBroad

In [49]:
search_results("how to make a chocolate cake ?").json()["items"][0]

{'kind': 'youtube#searchResult',
 'etag': 'utShvsRpCgVJvNuF6-rdyQQRPTQ',
 'id': {'kind': 'youtube#video', 'videoId': 'mlLTGsUlMZw'},
 'snippet': {'publishedAt': '2019-11-04T14:34:30Z',
  'channelId': 'UC3ngJBRN2DbBC8OFLwU8kQA',
  'title': 'Simple Moist Chocolate Cake Recipe | Basic recipe for beginners',
  'description': 'This recipe is an easy chocolate cake recipe which is moist and dense. this is a quick recipe even beginners can do easily.',
  'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/mlLTGsUlMZw/default.jpg',
    'width': 120,
    'height': 90},
   'medium': {'url': 'https://i.ytimg.com/vi/mlLTGsUlMZw/mqdefault.jpg',
    'width': 320,
    'height': 180},
   'high': {'url': 'https://i.ytimg.com/vi/mlLTGsUlMZw/hqdefault.jpg',
    'width': 480,
    'height': 360}},
  'channelTitle': 'Yummy Treats',
  'liveBroadcastContent': 'none',
  'publishTime': '2019-11-04T14:34:30Z'}}

# Getting the Transcripts

First we can use the [Captions](https://www.googleapis.com/youtube/v3/captions) API. in order to see what kind of captions does youtube offer 

This API can be be skipped, listing here if it can be used for later

In [50]:
def get_caption_items(u):
    c_url = 'https://www.googleapis.com/youtube/v3/captions'
    video_key = get_video_key(u)
    params = add_key({
      'videoId': video_key,
      'part': 'snippet'
    })
    req = PreparedRequest()
    req.prepare_url(c_url, params)
    return get(req.url)

In [51]:
resp = get_caption_items('https://www.youtube.com/watch?v=9yDDnurRmIY')
m = resp.json()
m, m["items"][0]["id"]


({'kind': 'youtube#captionListResponse',
  'etag': 'uVf_piGN-xSWbyPeCeOU4GPQHJM',
  'items': [{'kind': 'youtube#caption',
    'etag': 'bKbKMAcHAlLeKidAUViGFIPe_pg',
    'id': 'AUieDaY6naV-hW1_Wyk23jAtiGs3uwq3QI8f7zvazbRr5a-lko0',
    'snippet': {'videoId': '9yDDnurRmIY',
     'lastUpdated': '2023-01-13T19:46:11.310475Z',
     'trackKind': 'asr',
     'language': 'en',
     'name': '',
     'audioTrackType': 'unknown',
     'isCC': False,
     'isLarge': False,
     'isEasyReader': False,
     'isDraft': False,
     'isAutoSynced': False,
     'status': 'serving'}}]},
 'AUieDaY6naV-hW1_Wyk23jAtiGs3uwq3QI8f7zvazbRr5a-lko0')

# Getting the Transcripts

Since we are only interested in the Transcripts (English) we would need this package: [youtube-transcript-api](https://github.com/jdepoix/youtube-transcript-api) (which might have been installed while you have done this project setup)

In [52]:
from youtube_transcript_api import YouTubeTranscriptApi

ytt_api = YouTubeTranscriptApi()

fc = ytt_api.fetch(get_video_key('https://www.youtube.com/watch?v=9yDDnurRmIY'))

In [53]:
print(
    fc.video_id,
    fc.language,
    fc.language_code,
    # whether it has been manually created or generated by YouTube
    fc.is_generated, sep="\n"
)

9yDDnurRmIY
English (auto-generated)
en
True


In [54]:
fc.to_raw_data()[2:6]

[{'text': 'hey get in who the hell are you a really',
  'start': 6.48,
  'duration': 4.079},
 {'text': "bad start to your day that's who were",
  'start': 9.0,
  'duration': 3.059},
 {'text': 'you just waiting for me out here all day',
  'start': 10.559,
  'duration': 3.54},
 {'text': 'yes sir well do you have like an app to',
  'start': 12.059,
  'duration': 3.301}]

# Conclusion

we got transcripts for the video and we even got a way to to have the timestamps for the text. 