In [1]:
from dotenv import load_dotenv
import os
import lyricsgenius
import yaml

In [25]:
# Get credentials for the api calls from the .env file
# Niko has the credentials if you need them
load_dotenv()
DATA_PATH = "../../data"

In [3]:
genius = lyricsgenius.Genius()

In [4]:
# Scraping of all existing secondary tags on genius 
# has been done in src/notebooks/scrape-secondary-tags.ipynb
main_tags = ["country", "pop", "r-b", "rap", "rock"]
with open(DATA_PATH + "/secondary_genres.yaml", "r") as f:
    secondary_tags = yaml.safe_load(f)

main_tags, secondary_tags

(['country', 'pop', 'r-b', 'rap', 'rock'],
 ['abstract-rap',
  'a-cappella',
  'acid-house',
  'acid-jazz',
  'acid-techno',
  'acoustic',
  'adult-alternative',
  'adult-contemporary',
  'afro-arab',
  'afrobeat',
  'afrobeats',
  'afro-hip-hop',
  'afro-house',
  'afro-jazz',
  'afrosoul',
  'afroswing',
  'afro-trap',
  'albanian-folk',
  'album-oriented-rock-aor',
  'algerian-rai',
  'algerian-rap',
  'alternative',
  'alternative-country',
  'alternative-dance',
  'alternative-metal',
  'alternative-pop',
  'alternative-rap',
  'alternative-r-b',
  'alternative-rock',
  'amapiano',
  'ambient',
  'ambient-dub',
  'ambient-pop',
  'americana',
  'american-folk',
  'american-underground',
  'anarcho-punk',
  'anime-lo-fi',
  'anime-rap',
  'anthem',
  'anti-folk',
  'arabic-diss',
  'arabic-instrumental',
  'arabic-pop',
  'arabic-rap',
  'arabic-rock',
  'arrocha',
  'art-pop',
  'art-punk',
  'art-rock',
  'ashiq-music',
  'atmospheric-black-metal',
  'atmospheric-sludge-metal',
 

In [5]:
# Try to find all songs for a specified genre
abstract_rap_songs = genius.tag("abstract-rap")["hits"]

In [6]:
# These are appearently not all songs yet
len(abstract_rap_songs)

20

In [7]:
feel_good_inc = abstract_rap_songs[2]
feel_good_inc

{'url': 'https://genius.com/Gorillaz-feel-good-inc-lyrics',
 'title_with_artists': 'Feel Good Inc. by Gorillaz (Ft. De La Soul)',
 'title': 'Feel Good Inc.',
 'artists': ['Gorillaz'],
 'featured_artists': ['De La Soul']}

In [27]:
# Print the lyrics for a specific song
print(genius.lyrics(song_url=feel_good_inc["url"]))

Timeout: Request timed out:
HTTPSConnectionPool(host='genius.com', port=443): Read timed out. (read timeout=5)

In [None]:
abstract_rap_songs[:2]

[{'url': 'https://genius.com/Childish-gambino-iv-sweatpants-lyrics',
  'title_with_artists': 'IV. Sweatpants by Childish Gambino',
  'title': 'IV. Sweatpants',
  'artists': ['Childish Gambino'],
  'featured_artists': []},
 {'url': 'https://genius.com/Tyler-the-creator-yonkers-lyrics',
  'title_with_artists': 'Yonkers by Tyler, The Creator',
  'title': 'Yonkers',
  'artists': ['Tyler, The Creator'],
  'featured_artists': []},
 {'url': 'https://genius.com/Gorillaz-feel-good-inc-lyrics',
  'title_with_artists': 'Feel Good Inc. by Gorillaz (Ft. De La Soul)',
  'title': 'Feel Good Inc.',
  'artists': ['Gorillaz'],
  'featured_artists': ['De La Soul']}]

In [12]:
# The page feature can be used to find 20 more songs
genius.tag("abstract-rap", page=2)["hits"][:2]

[{'url': 'https://genius.com/Chance-the-rapper-acid-rain-lyrics',
  'title_with_artists': 'Acid Rain by Chance the Rapper',
  'title': 'Acid Rain',
  'artists': ['Chance the Rapper'],
  'featured_artists': []},
 {'url': 'https://genius.com/A-ap-rocky-phoenix-lyrics',
  'title_with_artists': 'Phoenix by A$AP Rocky',
  'title': 'Phoenix',
  'artists': ['A$AP Rocky'],
  'featured_artists': []}]

In [26]:
# Download 1000 songs for the every genre
for genre in ["pop", "rock", "r-b", "country", "rap", "blues", "jazz", "experimental", "folk", "classical-music", "electronic"]:

    # Iterate through pages of songs in genius
    titles = []
    lyrics = []
    all_songs = []
    for i in range(1, 51):
        pop_songs = genius.tag(genre, page=i)["hits"]
        for song in pop_songs:
            title = song["title_with_artists"]
            all_songs.append([title, song["url"]])
            print(f"Saved '{title}'")

    with open(f"{DATA_PATH}/songs/{genre}.yml", "w") as f:
        yaml.safe_dump(all_songs, f)
    print(f"Succsefffully saved {genre} songs")

Saved 'Time To Say Goodbye by Sarah Brightman'
Saved 'We'll Meet Again by Vera Lynn'
Saved 'Lacrimosa by Wolfgang Amadeus Mozart'
Saved 'FACTS by Tom MacDonald (Ft. Ben Shapiro)'
Saved 'Yeh Vaada Raha by Asha Bhosle (Ft. Kishore Kumar)'
Saved 'La Donna E Mobile by Luciano Pavarotti'
Saved 'All of the Lights (Interlude) by Kanye West'
Saved 'Shatter Me by Lindsey Stirling (Ft. Lzzy Hale)'
Saved 'Message to Harry Manback by Tool'
Saved 'Wife by Mitski'
Saved 'Fall on Me by Andrea Bocelli (Ft. Matteo Bocelli)'
Saved 'Ode to Joy by Ludwig van Beethoven'
Saved 'For The Beauty Of The Earth by John Rutter'
Saved 'Ave Maria - Ellens dritter Gesang (Ellens Gesang III, D. 839, Op. 52, No. 6, 1825) by Franz Schubert'
Saved 'E più ti penso by Andrea Bocelli & Ariana Grande'
Saved 'Pearl Diver by Mitski'
Saved 'Baker Street by Gerry Rafferty'
Saved 'Après Moi by Regina Spektor'
Saved 'Kishore Kumar - Aa Chal Ke Tujhe (Hindi Version) by Genius India'
Saved 'Kishore Kumar - Chingari Koi Bhadke (Hindi