Load Libraries

In [43]:
# General
import re
import os
import webbrowser
import numpy as np
import pandas as pd
from IPython.display import display, HTML

In [44]:
# DL
import torch

In [45]:
!pip install youtube-search-python
!pip install youtube-transcript-api



In [46]:
# Youtube
from youtubesearchpython import VideosSearch
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound

In [47]:
# Huggingface
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM
from transformers import DPRQuestionEncoder, DPRContextEncoder, DPRQuestionEncoderTokenizer, DPRContextEncoderTokenizer

In [48]:
!pip install faiss-cpu



In [49]:
# Vector Databases
import faiss

In [50]:
!pip install langchain



In [51]:
!pip install langchain-community



In [52]:
# Langchain | Elevenlabs | Langchain Agents
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.agents import load_tools, initialize_agent, AgentType
from langchain_community.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool

In [53]:
!pip install ollama



In [54]:
# Ollama
import ollama

# Load Constants

In [55]:
# Constants (Predefined Inputs)
DESTINATION = "Amsterdam"
PREFERENCES = ["mUSEUMS", "Outdoor Activities"]
MAX_RESULTS = 20 # Number of videos to fetch
MIN_VIEWS = 10000 # Minimum number of views for a video to be considered
LLM = "facebook/bart-large-cnn"
LOCAL_LLM = "llama3"
MAX_TOKENS = 1000

# Build PipeLine Step by Step

1.Search YouTube Videos

In [72]:
def parse_views(views_str):
  """ Parses the views string from YouTube and converts it to an integer.
  5,909 views -> 5909"""
  # Remove the views part and any commas
  views_str = views_str.lower().replace('views','').replace(',','').strip()
  return int(views_str) # In case of any parsing error


  # Check if the string ends with "K" (thousand)
  # if "K" in views_str:
  #  return int(float(views_str.replace("K", "")) * 1000)

def fetch_youtube_videos(destination, preferences, MIN_VIEWS, max_results=10):
  """
  Fetches relevant YouTube videos based on the destination and user preferences.
  detination (str)
  preferences (list)
  MIN_VIEWS (int)
  max_results (int)
  """
  # Combine preferences into a search query
  preferences_query = " ".join(preferences)
  search_query = f"{destination} travel guide {preferences_query} Netherlands"

  # Initialize VideosSearch
  videos_search = VideosSearch(search_query, limit=max_results)

  # Execute search
  search_results = videos_search.result()

  videos = []
  for video in search_results["result"]:
    views_str = video["viewCount"]["text"]
    views = parse_views(views_str)
    print(f"parseview {views}")

    # Filter out videos with fewer than MIN_VIEWS
    if views >= MIN_VIEWS:
      videos.append(video)
    video_data = {
        'Title': video['title'],
        'Duration': video['duration'],
        'Channel': video['channel']['name'],
        'Views': views, # Store as integer
        'Link': video['link']
    }
    videos.append(video_data)

  return videos

In [73]:
print("Fetching relevant YouTube videos about traveling in Amsterdam ...\n")
videos = fetch_youtube_videos(DESTINATION, PREFERENCES, MIN_VIEWS, MAX_RESULTS)
print("Done!")

Fetching relevant YouTube videos about traveling in Amsterdam ...

parseview 284072
parseview 91805
parseview 6255
parseview 5421
parseview 1070435
parseview 1019464
parseview 376463
parseview 4596
parseview 82
parseview 5410
parseview 238771
parseview 14454
parseview 18164
parseview 107137
parseview 676088
parseview 2000
parseview 451
parseview 62857
parseview 2536
Done!


In [96]:
len(videos)

30

In [75]:
videos

[{'type': 'video',
  'id': '3izVLop9iKg',
  'title': 'Top 10 Places to Visit in Amsterdam 2024 | Netherlands Travel Guide',
  'publishedTime': '1 year ago',
  'duration': '12:01',
  'viewCount': {'text': '284,072 views', 'short': '284K views'},
  'thumbnails': [{'url': 'https://i.ytimg.com/vi/3izVLop9iKg/hq720.jpg?sqp=-oaymwEcCOgCEMoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLC54729-gi5kBpbgzUC9_6yY104zA',
    'width': 360,
    'height': 202},
   {'url': 'https://i.ytimg.com/vi/3izVLop9iKg/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBKrVrHEM1RCELafsC9Rvwtt8CajQ',
    'width': 720,
    'height': 404}],
  'richThumbnail': {'url': 'https://i.ytimg.com/an_webp/3izVLop9iKg/mqdefault_6s.webp?du=3000&sqp=CKCvoLgG&rs=AOn4CLCuDpH_VJeblSOXKI0RMlOMqCXcAg',
   'width': 320,
   'height': 180},
  'descriptionSnippet': [{'text': 'Welcome to the vibrant '},
   {'text': 'city', 'bold': True},
   {'text': ' of '},
   {'text': 'Amsterdam', 'bold': True},
   {'text': ', where hi

In [88]:
videos_df = pd.DataFrame(videos)
print(videos_df)

     type           id                                              title  \
0   video  3izVLop9iKg  Top 10 Places to Visit in Amsterdam 2024 | Net...   
1     NaN          NaN                                                NaN   
2   video  QEAQ8N6SK6w  Top 7 Best Things To Do in Amsterdam (2024) | ...   
3     NaN          NaN                                                NaN   
4     NaN          NaN                                                NaN   
5     NaN          NaN                                                NaN   
6   video  GRrXVAFh_XU  The BEST things to do in Amsterdam 🇳🇱🍻 handpic...   
7     NaN          NaN                                                NaN   
8   video  4i5KYDePHDU  10 Amazing Places to Visit in the Netherlands ...   
9     NaN          NaN                                                NaN   
10  video  rRFsyg7VJ9Y      Things To Do In Amsterdam: 3 Day Travel Guide   
11    NaN          NaN                                                NaN   

In [89]:
print(videos_df[['Title','Duration','Views','Channel']].to_string(index=False))

                                                                                           Title Duration     Views                   Channel
                                                                                             NaN      NaN       NaN                       NaN
                             Top 10 Places to Visit in Amsterdam 2024 | Netherlands Travel Guide    12:01  284072.0               TRIP XTREME
                                                                                             NaN      NaN       NaN                       NaN
                             Top 7 Best Things To Do in Amsterdam (2024) | Ultimate Travel Guide     8:45   91805.0         Travel Awaits You
                        Things to do in Amsterdam with kids | Amsterdam Family Travel Guide 2024     6:44    6255.0       Family Travel Guide
                                  Amsterdam Travel Guide | Exploring the Gems of the Netherlands     8:23    5421.0               Mr. Dibbles
      

In [97]:
def display_videos(videos):
  """
  Displays the list of videos in a pandas DataFrame and optionally opens the videos in the browser.
  Parameters:
  videos (list): A list of video dictionaries.
  """

  if not videos:
    print("No videos found with more than 10,000 views.")
    return

  # Create a DataFrame for better display
  videos_df = pd.DataFrame(videos)
  print("\nFetched YouTuv=be videos (Filtered by > 10.000 views):")
  print(videos_df[['Title','Duration','Views','Channel']].to_string(index=False))

  # Optionally, ask the user if they want to open the videos in the browser
  """ open_browser = input("\nDo you want to open the fetched videos in your web? (y/n): ").strip().lower()
  if open_browser == 'y':
    for video in videos:
      webbrowser.open(video['Link'])"""

display_videos(videos)


Fetched YouTuv=be videos (Filtered by > 10.000 views):
                                                                                           Title Duration     Views                   Channel
                                                                                             NaN      NaN       NaN                       NaN
                             Top 10 Places to Visit in Amsterdam 2024 | Netherlands Travel Guide    12:01  284072.0               TRIP XTREME
                                                                                             NaN      NaN       NaN                       NaN
                             Top 7 Best Things To Do in Amsterdam (2024) | Ultimate Travel Guide     8:45   91805.0         Travel Awaits You
                        Things to do in Amsterdam with kids | Amsterdam Family Travel Guide 2024     6:44    6255.0       Family Travel Guide
                                  Amsterdam Travel Guide | Exploring the Gems of the Netherl

In [91]:
print(videos_df[['Title','Duration','Views','Channel']].to_string(index=False))

                                                                                           Title Duration     Views                   Channel
                                                                                             NaN      NaN       NaN                       NaN
                             Top 10 Places to Visit in Amsterdam 2024 | Netherlands Travel Guide    12:01  284072.0               TRIP XTREME
                                                                                             NaN      NaN       NaN                       NaN
                             Top 7 Best Things To Do in Amsterdam (2024) | Ultimate Travel Guide     8:45   91805.0         Travel Awaits You
                        Things to do in Amsterdam with kids | Amsterdam Family Travel Guide 2024     6:44    6255.0       Family Travel Guide
                                  Amsterdam Travel Guide | Exploring the Gems of the Netherlands     8:23    5421.0               Mr. Dibbles
      

In [100]:
# Display clickable links
print("\n### Watch These Videos:")
for idx, video in enumerate(videos, 1):
  # Display as markdown link
  display(HTML(f"{idx}. <a href='{video['link']}' target='_blank'>{idx}. {video['title']}</a>"))


### Watch These Videos:


KeyError: 'link'

# 2.Transcript YouTube Videos



























  

In [92]:
def extract_video_id(youtube_url):
  """
  Extracts the video ID from a YouTube URL.
  Parameters:
  youtube_url (str): The URL of the YouTube video.
  Returns:
  str: The video ID if found, otherwise None.
  """
  # Regular expression to extract video ID
  video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z]{11})', youtube_url)

  if video_id_match:
    return video_id_match.group(1)
  else:
    return None

In [93]:
# Example
extract_video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
# Output: 'dQw4w9WgXcQ'

'dQw4w9WgXcQ'

In [94]:
def extract_transcript(videos_df):
  # Initialize a new column for transcripts
  videos_df['Transcript'] = None

  # Iterate over each video and fetch the transcript
  for index, row in videos_df.iterrows():
    youtube_url = row['Link']
    video_title = row['Title']
    video_id = extract_video_id(youtube_url)

    if video_id:
      try:
        # Fetch the transcript using the video ID
        transcript = YouTubeTranscriptApi.get_transcript(video_id)

        # Combine the transcript segments into a single string
        transcript = ' '.join([segment['text'] for segment in transcript_list])

        # Store the transcript in the DataFrame
        videos_df.at[index, 'Transcript'] = transcript
        print(f"Transcript fetched for '{video_title}'")

      except TranscriptsDisabled:
        print(f"Transcripts are disabled for '{video_title}'")
        videos_df.at[index, 'Transcript'] = "Transcripts are disabled for this video."

      except NoTranscriptFound:
        print(f"No transcript found for '{video_title}'.")
        videos_df.at[index, 'Transcript'] = "No transcript found for this video."

      except Exception as e:
        print(f"An error occurred while fetching the transcript for '{video_title}'. Error: {e}")
        videos_df.at[index, 'Transcript'] = f"An error occurred while fetching the transcript: {e}"

      else:
        print(f"Could not extract video ID from URL: {youtube_url}")
        videos_df.at[index, 'Transcript'] = "Invalid YouTube URL."

    return videos_df