# Genius French Rap Lyrics Scrapper

## Import the necessary libraries

In [None]:
from bs4 import BeautifulSoup
import requests
import re
import os

## Genius API Token

In [None]:
token = "GENIUS API TOKEN"

## Artist Object Scraping

### Function

In [None]:
# artist object scraping from Genius API
def rapper_info(rapper, page, TOKEN = token):
    base_url = 'https://api.genius.com'
    headers = {'Authorization': 'Bearer ' + TOKEN}
    search_url = base_url + '/search?per_page=50&page=' + str(page)
    data = {'q': rapper}
    response = requests.get(search_url, data=data, headers=headers, timeout=5)
    return response

### Test

In [None]:
print(rapper_info('Alpha Wann', 10))

<Response [200]>


## Song URL Scraping

### Function

In [None]:
# song URL scraping from rapper object
def song_url(rapper, nb_songs, TOKEN = token):
    songs = []
    page = 1

    while True:
        response = rapper_info(rapper, page, TOKEN)
        json = response.json()

        # scraping nb_songs songs objects of rapper
        song_info = []
        for hit in json['response']['hits']:
            if rapper.lower() in hit['result']['primary_artist']['name'].lower():
                song_info.append(hit)

        # scraping songs url from songs objects
        for song in song_info:
            if (len(songs) < nb_songs):
                url = song['result']['url']
                songs.append(url)

        # stoping the loop if we have enough songs in total for all rappers
        if (len(songs) == nb_songs):
            break

        # stoping the loop if we don't have enough songs of this rapper
        if (len(song_info) < 5):
            break
        else:
            page += 1
    
    # display the songs url that we have found
    print('Found {} songs of {}'.format(len(songs), rapper))
    return songs

### Test

In [None]:
song_url('Alpha Wann', 10)

Found 10 songs of Alpha Wann


['https://genius.com/Nekfeu-and-alpha-wann-monsieur-sable-lyrics',
 'https://genius.com/Nekfeu-and-alpha-wann-du-sexe-oppose-lyrics',
 'https://genius.com/Alpha-wann-le-piege-lyrics',
 'https://genius.com/Alpha-wann-philly-flingo-lyrics',
 'https://genius.com/Alpha-wann-stupefiant-et-noir-lyrics',
 'https://genius.com/Alpha-wann-ca-va-ensemble-lyrics',
 'https://genius.com/Alpha-wann-aaa-lyrics',
 'https://genius.com/Alpha-wann-ny-a-fond-lyrics',
 'https://genius.com/Alpha-wann-pistolet-rose-2-lyrics',
 'https://genius.com/Alpha-wann-cascade-remix-lyrics']

## Lyrics Scraping

### Function

In [None]:
# scraping lyrics from songs url
def song_lyrics(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    lyrics = "" # so that the program does not stop if a scraping error occurs

    # scraping lyrics using BeautifulSoup funtion
    try:
        lyrics = soup.select_one('div[class^="Lyrics__Container"], .lyrics').get_text(strip=True, separator='\n')
    except:
        print("An error occurred while scraping lyrics")

    # removing identifiers of chorus and verse
    lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)

    # removing empty lines
    lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])

    return lyrics

### Test

In [None]:
print(song_lyrics('https://genius.com/Nekfeu-and-alpha-wann-monsieur-sable-lyrics'))

Portez-moi du rêve... Marchand d'sable...
Balance un autre joint, balance une autre putain d'pépite
Les keufs aiment matraquer les prolos, ressentent l'invincibilité
C'est pas attaquer les homos qui t'rendra ta virilité
Ho non, poto va tâter des lolos, molo
T'amasses des vrais bobos sous projo
J'suis ni robot ni hologramme, arrête tes photos
On connaît la chienneté,
allez, viendez dans ma bande
L'étendard flambe, on a des temps d'avance
Même indépendamment
Importez-moi c'rêve magique, est-ce la zik qui me berce ?
Reste addict s'il te plaît, j'déteste les statistiques de merde
Hé, statique si je crée des verses
Magiques qui te prennent de court
Jamais pris de C, j'reste à l'abri des problèmes de bourges
Oublie ta peine, l'odeur de crack sur la gazinière
J'bousille le game avec le Phaal, on est quasi frères
J'compte pas céder à leur chantage odieux
On croit aux vœux, j'maudis le temps qui file
Qui rend si triste quand on embrasse nos vieux
" Hé, Nekfeu, on compte sur toi, fais ça bien ! 

## Lyrics to file

### Function

In [None]:
# writing the lyrics scraped on a text file
def lyrics_to_file(rapper, nb_songs, TOKEN = token):
    # creating a text file with the name of the rapper
    f = open('french_rap_lyrics/' + rapper.lower() + '.txt', 'wb')

    # getting the songs url
    urls = song_url(rapper, nb_songs, TOKEN)

    for url in urls:
        # getting the songs lyrics
        lyrics = song_lyrics(url)
        # writing the songs lyrics on the text file
        f.write(lyrics.encode("utf8"))
    f.close()

    # counting and displaying the number of lines written
    nb_lines = sum(1 for line in open('french_rap_lyrics/' + rapper.lower() + '.txt', 'rb'))
    print('Wrote {} lines of lyrics in the text file from {} songs of {}'.format(nb_lines, nb_songs, rapper))

### Test

In [None]:
lyrics_to_file('Alpha Wann', 10)

Found 10 songs of Alpha Wann
Wrote 515 lines of lyrics in the text file from 10 songs of Alpha Wann


## Final Scraping

In [None]:
# storing the names of rapper present in the file on a list
rappers_file = open("rappers.txt", "r")
content = rappers_file.read()
rappers_list = content.splitlines()
rappers_list = [name.rstrip() for name in rappers_list]
print(len(rappers_list))

100


In [None]:
# writing the lyrics file of each rapper
for rapper in rappers_list:
    lyrics_to_file(rapper, 100)

Found 100 songs of Alkpote
Wrote 5367 lines of lyrics in the text file from 100 songs of Alkpote
Found 100 songs of Alonzo
Wrote 4450 lines of lyrics in the text file from 100 songs of Alonzo
Found 100 songs of Booba
Wrote 4747 lines of lyrics in the text file from 100 songs of Booba
Found 100 songs of Da Uzi
Wrote 4317 lines of lyrics in the text file from 100 songs of Da Uzi
Found 100 songs of Dinos
Wrote 3937 lines of lyrics in the text file from 100 songs of Dinos
Found 28 songs of Doums
Wrote 1105 lines of lyrics in the text file from 100 songs of Doums
Found 100 songs of Dosseh
Wrote 4694 lines of lyrics in the text file from 100 songs of Dosseh
Found 100 songs of Freeze Corleone
Wrote 4776 lines of lyrics in the text file from 100 songs of Freeze Corleone
Found 26 songs of SEB (FRA)
Wrote 1119 lines of lyrics in the text file from 100 songs of SEB (FRA)
Found 26 songs of Gringe
Wrote 1132 lines of lyrics in the text file from 100 songs of Gringe
Found 100 songs of Seth Gueko
Wro

In [None]:
!zip -r /content/french_rap_lyrics.zip /content/french_rap_lyrics

  adding: content/french_rap_lyrics/ (stored 0%)
  adding: content/french_rap_lyrics/niska.txt (deflated 65%)
  adding: content/french_rap_lyrics/dinos.txt (deflated 65%)
  adding: content/french_rap_lyrics/spoke orkestra.txt (deflated 59%)
  adding: content/french_rap_lyrics/gringe.txt (deflated 61%)
  adding: content/french_rap_lyrics/naps.txt (deflated 67%)
  adding: content/french_rap_lyrics/kekra.txt (deflated 71%)
  adding: content/french_rap_lyrics/lorenzo.txt (deflated 62%)
  adding: content/french_rap_lyrics/seb (fra).txt (deflated 62%)
  adding: content/french_rap_lyrics/koba lad.txt (deflated 65%)
  adding: content/french_rap_lyrics/alpha wann.txt (deflated 60%)
  adding: content/french_rap_lyrics/mhd.txt (deflated 65%)
  adding: content/french_rap_lyrics/alkpote.txt (deflated 62%)
  adding: content/french_rap_lyrics/bigflo & oli.txt (deflated 62%)
  adding: content/french_rap_lyrics/kaaris.txt (deflated 65%)
  adding: content/french_rap_lyrics/plk.txt (deflated 66%)
  addin

In [None]:
from google.colab import files
files.download('/content/french_rap_lyrics.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>