## Installing

In [1]:
!pip install pytubefix
!pip install torch torchvision torchaudio
!pip install transformers torch faiss-cpu
!pip install openai-whisper
!pip install gradio

Collecting pytubefix
  Downloading pytubefix-8.7.0-py3-none-any.whl.metadata (5.3 kB)
Downloading pytubefix-8.7.0-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.1/85.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytubefix
Successfully installed pytubefix-8.7.0
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h



# Development


### Import all the necessary libraries

In [2]:
from pytubefix import YouTube
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
import whisper
import torch
import torchvision
import torchaudio
import re
import gradio as gr
import os

  from tqdm.autonotebook import tqdm, trange



## Extract lyrics as a string from a youtube url using `whisper` form `openai-whisper library` and `pytubefix` to download the audio from youtube url video
data_URL_handling.py

**Note: Issues with another enviroments**

- `pytubefix` has restrictions on its use, specifically in the po_token_verifier function, which tries to read data from the standard input (input) to obtain a token necessary to access YouTube. This is incompatible with deployed applications, since there is no direct interaction with the terminal.

- `yt_dlp` has the same restrictions

- `Youtube Data API v3` is usefull for data extraction but it is not possible to download the video or audio for transcription

This issues dont happen in Colab because automatically takes the configured Google account data


In [6]:
import re
import whisper
import torch
from pytubefix import YouTube

def is_valid_youtube_url(url):
  """
  Check if a URL is valid for YouTube.
  Args:
      url (str): URL to be validated.
  Returns:
      bool: True if valid, False otherwise.
  """
  youtube_regex = re.compile(
      r"^(https?://)?(www\.)?(youtube\.com|youtu\.?be)/.+$"
  )
  return youtube_regex.match(url) is not None

def get_details_from_youtube_url(youtube_url):
  """
    Extract the lyrics from a YouTube video using openai-whisper.

    Args:
        youtube_url: The URL of the YouTube video.

    Returns:
        A dictionary with the video details and the transcribed lyrics.
    """
  if not is_valid_youtube_url(youtube_url):
    raise ValueError(f"Error: The URL '{youtube_url}' is invalid. Tray another")

  try:
    # change to cpu in local or hf space
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = whisper.load_model("base", device=device)  #large-v2, large

    # yt = YouTube(youtube_url,use_po_token=True,use_oauth=True, allow_oauth_cache=True) # for another enviroments like hf space
    yt = YouTube(youtube_url)

    # Prepare video details
    audio_stream = yt.streams.filter(only_audio=True).first()

    if not audio_stream:
        raise ValueError("No audio stream found for the video.")

    video_details = {
      "title": yt.title,
      "author": yt.author,
      "audio_url": audio_stream.url,
      "lyrics": ""
      }

    # Transcribe lyrics
    lyrics = model.transcribe(video_details['audio_url'])
    lyrics = [segment["text"] for segment in lyrics["segments"]] # divided in segments of sentences
    # lyrics = lyrics["text"]  # complete song in a text
    video_details['lyrics'] = lyrics
    video_details.pop('audio_url') # Remove temporary audio URL

    return video_details

  except Exception as e:
    raise ValueError(f"Error processing YouTube URL {youtube_url}: {e}")


# Song Database

In [9]:
import faiss
import numpy as np
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer
# from data_URL_handling import get_details_from_youtube_url

class SongDatabase:
    def __init__(self):
        self.index = None
        self.song_data = []  # List to store song metadata (title, author, lyrics)
        self.model = SentenceTransformer("all-MiniLM-L6-v2")

    def add_song(self, url_yt):
        """
        Adds a song to the database by extracting its lyrics and embeddings.
        Args:
            youtube_url (str): The YouTube URL of the song.
        Returns:
            bool: True if the song was added successfully, False otherwise. -- There was adapted to raise exceptions for gradio app
        """
        # Extract lyrics from song url
        song = get_details_from_youtube_url(url_yt)

        if not song or not song["lyrics"]:
          raise ValueError("There was an error trying to extract the data from the URL")

        if self.song_exists(song['title'],song['author']):
          raise ValueError("The song already exists")

        lyrics = song['lyrics']

        # Extract the embedidngs of the lyrics
        embeddings = self.extract_embeddings([lyrics])

        if embeddings is None:
          raise ValueError("There was an error trying to extract the embeddings from the data lyrics")

        # Initialize FAISS index if not already created
        if self.index is None:
            self.index = faiss.IndexFlatIP(embeddings.shape[1])
        # IndexFlatIP, which is an index based on internal product vectors and does not support direct elimination

        # Add to index and store song details
        self.index.add(embeddings)
        self.song_data.append({
            'title': song['title'],
            'author': song['author'],
            'lyrics': lyrics
            }
        )

        return True

    def song_exists(self, title,author):
      """
      Checks if a song exists in the database by its title.
      Args:
          title (str): The title of the song to check.
          author (str): The author of the song to check.
      Returns:
          bool: True if the song exists, False otherwise.
      """
      return any(song["title"] == title and song['author']==author for song in self.song_data)


    def extract_embeddings(self, texts):
      """
        Extracts normalized embeddings for the given texts.
        Args:
            texts (list of str): List of texts to encode.
        Returns:
            np.ndarray: Normalized embeddings.
        """
      try:
        embeddings = self.model.encode(texts, convert_to_numpy=True)
        return normalize(embeddings, norm="l2")
      except Exception as e:
        return f"Error extracting embeddings: {e}"


    def search_covers(self, query_url, top_k):
      """
        Searches for the most similar songs in the database to the song in the query URL.
        Args:
            query_url (str): The YouTube URL of the query song.
            top_k (int): Number of top similar results to retrieve.
        Returns:
            list of dict: List of dictionaries with song title, author, and similarity score.
        """

      if self.index is None or not self.song_data:
        raise ValueError("The database is empty. Please add songs before searching.")

      query_details = get_details_from_youtube_url(query_url)
      if not query_details or not query_details["lyrics"]:
        raise ValueError("There was an error trying to extract the data from the URL")

      if top_k > len(self.song_data):
        raise ValueError(f"Invalid 'top_k' value: {top_k}. Must be <= {len(self.song_data)}.")

      query_embedding = self.extract_embeddings([query_details["lyrics"]])
      if query_embedding is None:
        raise ValueError("There was an error trying to extract the embeddings from the data lyrics")

      D, I = self.index.search(query_embedding, top_k)
      results = []

      for i, idx in enumerate(I[0]):
        if idx < len(self.song_data):
          song = self.song_data[idx]
          print(song)
          similarity = D[0][i]*100
          results.append({
              "title": song["title"],
              "author": song["author"],
              "similarity": f"{similarity:.1f}%"
          })
      return results


# The Interface

app.py


In [14]:
import gradio as gr
# from db_management import SongDatabase

# Data Base Instance
db = SongDatabase()

# Functions for Gradio
def add_song(url):
  try:
    success = db.add_song(url)
    if success:
      stored_songs = [f"{song['title']} - {song['author']}" for song in db.song_data]
      gr.Info(f"✅ Song added successfully from '{url}'!")
      return "\n".join(stored_songs),"" #clean ""
    else:
      raise gr.Error(f"❌ Failed to add song from URL '{url}'. Please check the URL.")
  except Exception as e:
    raise gr.Error(f"❌ Failed to add song from URL '{url}'. Error during search: {e}")

def search_covers(query_url, top_k):
  try:
    results = db.search_covers(query_url, int(top_k))
    if not results:
      return "<div style='color: red;'>No similar songs found.</div>",""

    formatted_results = []
    for i, res in enumerate(results):
      # Agregar flechas o asteriscos para simular énfasis
      similarity = float(res["similarity"].strip('%'))
      status = "✅" if similarity > 30 else "❌"
      formatted_results.append(
          f"{status} {i+1}. {res['title']} - {res['author']} (Similarity: {res['similarity']})"
      )

      song = get_details_from_youtube_url(query_url)
      song_info = f"{song['title']} - {song['author']}"
    return gr.Textbox(label=song_info, value="\n".join(formatted_results),),""

  except Exception as e:
    raise gr.Error(f"❌ Error during search: {e}")

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as app:
  gr.Markdown("# 🎵 **Cover Searching App**")

  with gr.Row():
    with gr.Column():
      gr.Markdown("### 🎼 **Add Songs to the Album**")
      song_url_input = gr.Textbox(label="YouTube URL", placeholder="Enter the song's YouTube URL...")
      add_button = gr.Button("➕ Add Song")
      stored_songs_output = gr.Textbox(label="Stored Songs", lines=10, interactive=False)
      add_button.click(add_song, inputs=song_url_input, outputs=[stored_songs_output, song_url_input])

    with gr.Column():
      gr.Markdown("### 🔍 **Search for Similar Covers**")
      with gr.Row():
        query_url_input = gr.Textbox(label="Search Cover URL", placeholder="Enter a YouTube URL to search...")
        top_k_input = gr.Number(label="Top Results to Show", value=1, precision=0,minimum=1)

      search_button = gr.Button("🔍 Search")
      # search_output = gr.HTML(label="Results")
      search_output = gr.Textbox(label="Song Title and Artist", lines=10, interactive=False)
      search_button.click(search_covers, inputs=[query_url_input, top_k_input], outputs=[search_output, query_url_input])

# App Execute
app.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e440c449180faa6f9c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


https://youtu.be/Y0ORhLyJWuc


https://youtu.be/TWX0SAh3T1I