In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import lyricsgenius
import pandas as pd
import re
import time
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:


# ✅ Credentials
from credentials import SPOTIFY_CLIENT_ID, SPOTIFY_CLIENT_SECRET, GENIUS_API_KEY
# ✅ Initialize Genius client
genius = lyricsgenius.Genius(GENIUS_API_KEY, timeout=15, retries=3)

# ✅ Initialize theme classifier
theme_classifier = pipeline("zero-shot-classification", 
                          model="facebook/bart-large-mnli")
CANDIDATE_THEMES = [
    "romance", "sadness", "nature", "friendship",
    "self-love", "meditation", "adventure", "nostalgia",
    "party", "empowerment", "calm", "energy"
]

def clean_lyrics(lyrics):
    """Remove section headers and clean text"""
    return re.sub(r"\[.*?\]", "", lyrics).strip()

def get_lyrics(track_name, artist_name):
    """Get cleaned lyrics using lyricsgenius"""
    try:
        song = genius.search_song(track_name, artist_name)
        return clean_lyrics(song.lyrics) if song else None
    except Exception as e:
        print(f"Error getting lyrics for {track_name}: {str(e)}")
        return None

def analyze_themes(lyrics):
    """Analyze lyrics for themes using zero-shot classification"""
    if not lyrics:  
        return []
    return theme_classifier(lyrics[:1000], CANDIDATE_THEMES)["labels"][:3]

def main():
    # ✅ Load merged CSV file
    df = pd.read_csv("features_user_playlist_rows.csv")  # Ensure the correct file path
    df["lyrics"] = None
    df["themes"] = None

    songs_data = []

    for index, row in df.iterrows():
        track_name = row["track_name"]
        artist_name = row["artist_name"]
        playlist_id = row["playlist_id"]

        print(f"Processing: {track_name} - {artist_name}")

        # ✅ Get lyrics and analyze themes
        lyrics = get_lyrics(track_name, artist_name)
        themes = analyze_themes(lyrics) if lyrics else []

        # ✅ Store results
        songs_data.append({
            "track_id": playlist_id,
            "track_name": track_name,
            "artist_name": artist_name,
            "lyrics": lyrics,
            "themes": themes
        })

        time.sleep(1)  # To avoid API rate limits

    # ✅ Convert to DataFrame and save
    lyrics_df = pd.DataFrame(songs_data)
    lyrics_df.to_csv("merged_Csv_with_lyrics.csv", index=False)
    print(f"Created database with {len(lyrics_df)} songs with lyrics and themes.")

if __name__ == "__main__":
    main()


Device set to use mps:0


Processing: The Girl I Haven't Met - Kudasaibeats
Searching for "The Girl I Haven't Met" by Kudasaibeats...
Specified song does not contain lyrics. Rejecting.
Processing: nan - nan
Searching for "nan" by nan...
Error getting lyrics for nan: 'float' object has no attribute 'translate'
Processing: Wait - M83
Searching for "Wait" by M83...
Done.
Processing: Heat Waves - Glass Animals
Searching for "Heat Waves" by Glass Animals...
Done.
Processing: Anti-Hero - Taylor Swift
Searching for "Anti-Hero" by Taylor Swift...
Done.
Processing: More Than Friends (feat. Meghan Trainor) - Jason Mraz, Meghan Trainor
Searching for "More Than Friends (feat. Meghan Trainor)" by Jason Mraz, Meghan Trainor...
Done.
Processing: Out Of The Woods (Taylor's Version) - Taylor Swift
Searching for "Out Of The Woods (Taylor's Version)" by Taylor Swift...
Done.
Processing: Love Is Madness (feat. Halsey) - Thirty Seconds To Mars, Halsey
Searching for "Love Is Madness (feat. Halsey)" by Thirty Seconds To Mars, Halsey.