In [1]:
import os
import subprocess
import random
import librosa

# Folder setup
input_folder = "songs_mp3"
clip_folder = "clip_mp3"

os.makedirs(clip_folder, exist_ok=True)

def extract_clip(mp3_path, duration=10):
    """Extract a random clip from a full MP3 file."""
    try:
        # Load audio to get total duration
        y, sr = librosa.load(mp3_path, sr=22050)
        total_duration = librosa.get_duration(y=y, sr=sr)

        # Pick random start time
        start_time = 0
        if total_duration > duration:
            start_time = random.uniform(0, total_duration - duration)

        # Output path
        base_name = os.path.splitext(os.path.basename(mp3_path))[0]
        clip_name = f"clip_{base_name}.mp3"
        clip_path = os.path.join(clip_folder, clip_name)

        # FFmpeg command
        cmd = [
            "ffmpeg",
            "-y",                # Overwrite if exists
            "-ss", str(start_time),
            "-t", str(duration),
            "-i", mp3_path,
            "-acodec", "copy",
            clip_path
        ]
        subprocess.run(cmd, check=True)
        print(f"🎯 Clip saved: {clip_path}")

    except Exception as e:
        print(f"❌ Error processing {mp3_path}: {e}")

# Iterate through each MP3 in input folder
for file in os.listdir(input_folder):
    if file.endswith(".mp3"):
        full_path = os.path.join(input_folder, file)
        extract_clip(full_path, duration=10)

🎯 Clip saved: clip_mp3\clip_Billie Eilish - BIRDS OF A FEATHER (Official Music Video).mp3
🎯 Clip saved: clip_mp3\clip_Chappell Roan - Pink Pony Club (Official Music Video).mp3
🎯 Clip saved: clip_mp3\clip_Kendrick Lamar - luther (Official Audio).mp3
🎯 Clip saved: clip_mp3\clip_Kendrick Lamar - Not Like Us.mp3
🎯 Clip saved: clip_mp3\clip_Kendrick Lamar - tv off (Official Audio).mp3
🎯 Clip saved: clip_mp3\clip_Lady Gaga, Bruno Mars - Die With A Smile (Official Music Video).mp3
🎯 Clip saved: clip_mp3\clip_PARTYNEXTDOOR & DRAKE - NOKIA (Official Lyric Video).mp3
🎯 Clip saved: clip_mp3\clip_ROSÉ & Bruno Mars - APT. (Official Music Video).mp3
🎯 Clip saved: clip_mp3\clip_Shaboozey - A Bar Song (Tipsy) [Official Visualizer].mp3
🎯 Clip saved: clip_mp3\clip_Teddy Swims - Lose Control (The Village Sessions).mp3


In [2]:
import librosa
import librosa.display
import numpy as np
import os
import matplotlib.pyplot as plt

# Define folder containing MP3 files
input_folder = "clip_mp3"  
output_folder = "clip_fingerprints"
os.makedirs(output_folder, exist_ok=True)  

def generate_fingerprint(file_path):
    """Extracts a fingerprint (spectrogram peaks) from an audio file"""
    try:
        # Load audio file
        y, sr = librosa.load(file_path, sr=44100)
        
        # Compute Spectrogram
        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
        
        # Get peaks in the spectrogram (Shazam-like fingerprinting)
        peaks = np.argwhere(D > np.percentile(D, 95))  # Extract top 5% peaks
        
        # Convert peaks into a unique fingerprint
        fingerprint = peaks.tolist()  # Convert to list format
        
        # Save fingerprint as a NumPy file
        fingerprint_file = os.path.join(output_folder, os.path.basename(file_path).replace(".mp3", ".npy"))
        np.save(fingerprint_file, fingerprint)
        
        print(f"Fingerprint created for {os.path.basename(file_path)}")

        plt.figure(figsize=(10, 6))
        librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log")
        plt.colorbar(format="%+2.0f dB")
        plt.title(f"Spectrogram - {os.path.basename(file_path)}")
        plt.savefig(f"{file_path.replace('.mp3', '.png')}")
        plt.close()

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Process all MP3 files in the folder
for file in os.listdir(input_folder):
    if file.endswith(".mp3"):
        file_path = os.path.join(input_folder, file)
        generate_fingerprint(file_path)

Fingerprint created for clip_Billie Eilish - BIRDS OF A FEATHER (Official Music Video).mp3
Fingerprint created for clip_Chappell Roan - Pink Pony Club (Official Music Video).mp3
Fingerprint created for clip_Kendrick Lamar - luther (Official Audio).mp3
Fingerprint created for clip_Kendrick Lamar - Not Like Us.mp3
Fingerprint created for clip_Kendrick Lamar - tv off (Official Audio).mp3
Fingerprint created for clip_Lady Gaga, Bruno Mars - Die With A Smile (Official Music Video).mp3
Fingerprint created for clip_PARTYNEXTDOOR & DRAKE - NOKIA (Official Lyric Video).mp3
Fingerprint created for clip_ROSÉ & Bruno Mars - APT. (Official Music Video).mp3
Fingerprint created for clip_Shaboozey - A Bar Song (Tipsy) [Official Visualizer].mp3
Fingerprint created for clip_Teddy Swims - Lose Control (The Village Sessions).mp3


In [3]:
import os
import numpy as np
import pandas as pd

def flatten_and_export_fingerprints(input_folder="clip_fingerprints", output_csv="clip_flattened_fingerprints.csv"):
    all_records = []

    for file in os.listdir(input_folder):
        if file.endswith(".npy"):
            try:
                npy_path = os.path.join(input_folder, file)
                data = np.load(npy_path)

                if len(data.shape) == 2:
                    for row in data:
                        all_records.append({
                            "filename": file,
                            "col1": int(row[0]),
                            "col2": int(row[1])
                        })
                else:
                    print(f"⚠ Skipped non-2D array in: {file}")
            except Exception as e:
                print(f"❌ Error reading {file}: {e}")

    # Save to CSV
    df = pd.DataFrame(all_records)
    df.to_csv(output_csv, index=False)
    print(f"✅ Flattened CSV saved at: {output_csv}")

# 👉 Run the function
flatten_and_export_fingerprints()

⚠ Skipped non-2D array in: clip_Billie Eilish - BIRDS OF A FEATHER (Official Music Video).npy
✅ Flattened CSV saved at: clip_flattened_fingerprints.csv
