In [1]:
import os
import lyricsgenius
from dotenv import load_dotenv
import yaml
import tomllib
import numpy as np
from typing import List
import sys

root = "../../.."

sys.path.append(os.path.abspath(f"{root}/src"))
from utils import get_main_tag, shrink_genius_tag, song_to_csv, artists_list_to_string

load_dotenv()

genius = lyricsgenius.Genius()
public_api = lyricsgenius.PublicAPI()

with open(f"{root}/config.toml", "rb") as f:
    config = tomllib.load(f)

def get_song_tags(song_id: int):
    song = public_api.song(song_id)['song']
    return song['tags']

In [21]:
for genre in ["blues", "electronic", "experimental", "folk", "jazz"]:

    multilabel_file = f"{root}/{config['multilabel_data_dir']}/songs/{genre}.yml"
    song_file = f"{root}/{config['id_data_dir']}/songs.csv"
    index_file = "05-index.txt"

    with open(multilabel_file, "r") as f:
        songs = yaml.safe_load(f)

    start_index = 0
    if os.path.exists(index_file):
        with open(index_file, "r") as f:
                start_index = int(f.read())

    # TODO: Search every id to get the songs language
    csv_strings = []
    for i, song in enumerate(songs[start_index:]):

        if song["id"] == -1:
            continue

        # print(f"Saving song {song['title']} with id {song['id']}")

        language = None
        while not language:
             try:
                language = genius.song(song["id"])["song"]["language"]

                if not language:
                    language = ""
                    break
             except Exception:
                print(f"❌ Failed to get song for id {song['id']}")

        song["language"] = language
        song["artists"] = artists_list_to_string(song["artists"])

        csv_strings.append(song_to_csv(song))

        # File save for every 10th song
        if i % 10 == 0:

            with open(song_file, "a", encoding="utf-8") as f:
                f.writelines(csv_strings)

            with open(index_file, "w") as f:
                f.write(str(start_index + i + 1))

            csv_strings = []

            print(f"✅ Saved {start_index + i + 1}th song for genre {genre}")

    with open(index_file, "w") as f:
        f.write('0')

    print(f"Saved songs from {genre} genre")

✅ Saved 912th song for genre blues
✅ Saved 922th song for genre blues
✅ Saved 932th song for genre blues
✅ Saved 942th song for genre blues
✅ Saved 952th song for genre blues
✅ Saved 962th song for genre blues
✅ Saved 972th song for genre blues
✅ Saved 982th song for genre blues
❌ Failed to get song for id 2949128
✅ Saved 992th song for genre blues
Saved songs from blues genre
✅ Saved 1th song for genre electronic
✅ Saved 11th song for genre electronic
✅ Saved 21th song for genre electronic
✅ Saved 31th song for genre electronic
✅ Saved 41th song for genre electronic
✅ Saved 51th song for genre electronic
✅ Saved 61th song for genre electronic
✅ Saved 71th song for genre electronic
✅ Saved 81th song for genre electronic
✅ Saved 91th song for genre electronic
✅ Saved 101th song for genre electronic
✅ Saved 111th song for genre electronic
✅ Saved 121th song for genre electronic
✅ Saved 131th song for genre electronic
✅ Saved 141th song for genre electronic
✅ Saved 151th song for genre el