In [11]:
import yt_dlp
import os

FFMPEG_PATH = r"C\:Users\SABIO\ffmpeg\bin\ffmpeg.exe" 

# Define the songs download folder
download_path = "songs"
os.makedirs(download_path, exist_ok=True)

def download_song(song_name):
    """Download a song from YouTube and force MP3 conversion"""
    ydl_opts = {
        'format': 'bestaudio/best',
        'default_search': 'ytsearch1',
        'noplaylist': True,
        'outtmpl': f'{download_path}/%(title)s.%(ext)s',
        'ffmpeg_location': FFMPEG_PATH, 
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'quiet': False  # Set to True to hide logs
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([song_name])

    print(f"✅ Downloaded and converted: {song_name}")

# Example songs
songs = ["Michael Jackson Speed Demon"]

for song in songs:
    try:
        download_song(song)
    except Exception as e:
        print("Expected Error")



[generic] Extracting URL: Michael Jackson Speed Demon
[youtube:search] Extracting URL: ytsearch1:Michael Jackson Speed Demon
[download] Downloading playlist: Michael Jackson Speed Demon
[youtube:search] query "Michael Jackson Speed Demon": Downloading web client config
[youtube:search] query "Michael Jackson Speed Demon" page 1: Downloading API JSON
[youtube:search] Playlist Michael Jackson Speed Demon: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=l039y9FaIjc
[youtube] l039y9FaIjc: Downloading webpage
[youtube] l039y9FaIjc: Downloading tv client config
[youtube] l039y9FaIjc: Downloading player 5ae7d525
[youtube] l039y9FaIjc: Downloading tv player API JSON
[youtube] l039y9FaIjc: Downloading ios player API JSON
[youtube] l039y9FaIjc: Downloading m3u8 information
[info] l039y9FaIjc: Downloading 1 format(s): 251
[download] Destination: songs\Michael Jackson - Speed Demon (Official Video).webm
[download] 100% of    9.7

ERROR: Postprocessing: ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location


Expected Error


In [1]:
import os
import subprocess

FFMPEG_PATH = r"C:\Users\SABIO\ffmpeg\bin\ffmpeg.exe" 

# Define input (where your WEBM files are) and output folders
input_folder = "songs"  # Update this if needed
output_folder = "songs_mp3"
os.makedirs(output_folder, exist_ok=True)  # Create output folder if it doesn't exist

def convert_webm_to_mp3():
    """Convert all .webm files in a folder to .mp3"""
    for file in os.listdir(input_folder):
        if file.endswith(".webm"):
            input_path = os.path.join(input_folder, file)
            output_path = os.path.join(output_folder, file.replace(".webm", ".mp3"))
            
            # Use full path to FFmpeg
            command = [FFMPEG_PATH, "-i", input_path, "-vn", "-ab", "192k", output_path]
            
            # Run FFmpeg command
            try:
                subprocess.run(command, check=True)
                print(f"✅ Converted: {file} -> {output_path}")
            except FileNotFoundError:
                print(f"❌ Error: FFmpeg not found at {FFMPEG_PATH}")
                break
            except subprocess.CalledProcessError as e:
                print(f"❌ FFmpeg Error: {e}")
                break

convert_webm_to_mp3()

✅ Converted: KORDHELL - MURDER PLOT.webm -> songs_mp3\KORDHELL - MURDER PLOT.mp3
✅ Converted: Michael Jackson - Billie Jean (Official Video).webm -> songs_mp3\Michael Jackson - Billie Jean (Official Video).mp3
✅ Converted: Michael Jackson - Speed Demon (Official Video).webm -> songs_mp3\Michael Jackson - Speed Demon (Official Video).mp3
✅ Converted: overdrive.webm -> songs_mp3\overdrive.mp3


In [2]:
import librosa
import librosa.display
import numpy as np
import os
import matplotlib.pyplot as plt

# Define folder containing MP3 files
input_folder = "songs_mp3"  
output_folder = "fingerprints"
os.makedirs(output_folder, exist_ok=True)  # Create output folder if not exists

def generate_fingerprint(file_path):
    """Extracts a fingerprint (spectrogram peaks) from an audio file"""
    try:
        # Load audio file
        y, sr = librosa.load(file_path, sr=44100)
        
        # Compute Spectrogram
        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
        
        # Get peaks in the spectrogram (Shazam-like fingerprinting)
        peaks = np.argwhere(D > np.percentile(D, 95))  # Extract top 5% peaks
        
        # Convert peaks into a unique fingerprint
        fingerprint = peaks.tolist()  # Convert to list format
        
        # Save fingerprint as a NumPy file
        fingerprint_file = os.path.join(output_folder, os.path.basename(file_path).replace(".mp3", ".npy"))
        np.save(fingerprint_file, fingerprint)
        
        print(f"✅ Fingerprint created for {os.path.basename(file_path)}")

        plt.figure(figsize=(10, 6))
        librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log")
        plt.colorbar(format="%+2.0f dB")
        plt.title(f"Spectrogram - {os.path.basename(file_path)}")
        plt.savefig(f"{file_path.replace('.mp3', '.png')}")
        plt.close()

    except Exception as e:
        print(f"⚠️ Error processing {file_path}: {e}")

# Process all MP3 files in the folder
for file in os.listdir(input_folder):
    if file.endswith(".mp3"):
        file_path = os.path.join(input_folder, file)
        generate_fingerprint(file_path)

✅ Fingerprint created for KORDHELL - MURDER PLOT.mp3
✅ Fingerprint created for Michael Jackson - Billie Jean (Official Video).mp3
✅ Fingerprint created for Michael Jackson - Speed Demon (Official Video).mp3
✅ Fingerprint created for overdrive.mp3


In [5]:
import librosa
import numpy as np
import os

# Define folders
songs_folder = "songs_mp3"  # Full song database
fingerprints_folder = "fingerprints"  # Precomputed fingerprints

def generate_snippet_fingerprint(file_path, start_time=10, duration=5):
    """Extracts a fingerprint from a short audio snippet"""
    try:
        # Load only a small snippet of the audio file
        y, sr = librosa.load(file_path, sr=44100, offset=start_time, duration=duration)
        
        # Compute spectrogram
        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
        
        # Get peaks (top 5% of intensities)
        peaks = np.argwhere(D > np.percentile(D, 95))
        
        return peaks  # Return the fingerprint

    except Exception as e:
        print(f"⚠️ Error processing snippet: {e}")
        return None

def find_closest_match(snippet_fingerprint):
    """Find the song with the closest matching fingerprint"""
    best_match = None
    best_score = float('inf')  # Lower score means better match

    for fingerprint_file in os.listdir(fingerprints_folder):
        if fingerprint_file.endswith(".npy"):
            song_fingerprint = np.load(os.path.join(fingerprints_folder, fingerprint_file))
            
            # Compare snippet fingerprint to full song fingerprint
            difference = len(set(map(tuple, snippet_fingerprint)) - set(map(tuple, song_fingerprint)))

            if difference < best_score:  # Lower difference means better match
                best_score = difference
                best_match = fingerprint_file.replace(".npy", "")

    return best_match if best_match else "No match found"

# Test with a snippet from a song
test_song = os.path.join(songs_folder, "Michael Jackson - Speed Demon (Official Video).mp3")  # Change this to an actual song in your database
snippet_fingerprint = generate_snippet_fingerprint(test_song)

if snippet_fingerprint is not None:
    match = find_closest_match(snippet_fingerprint)
    print(f"🔍 Closest match: {match}")

🔍 Closest match: Michael Jackson - Speed Demon (Official Video)
