In [2]:
!pip install lyricsgenius



In [3]:
!pip install googletrans



In [4]:
import numpy as np
import pandas as pd
import json
import time
import lyricsgenius
import  multiprocessing
import warnings
import random

from spotifyclient import SpotifyAPI
from googletrans import Translator


In [5]:
warnings.filterwarnings('ignore')
random.seed(5)

In [6]:
#Get the client id and client secret from SpotifyAPI
client_id = '116037a141a04c628cdadaf11310f7d8'
client_secret = 'f505511e896d4b8fb4cd27c4e771f9f6'

genius = lyricsgenius.Genius('p2U70gp0xq5TWV10R8iJXI5uz4DjNseRup1FA63S8-3ZgQBOMyCneDzCew89BXVb')

spotify = SpotifyAPI(client_id, client_secret)

In [7]:
genius.verbose = False # Turn off status messages
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.excluded_terms = ["(Remix)", "(Live)"]

In [8]:
def preprocess_df(df,query):

  """ This function is used to preprocess the given 
      dataframe

      Arguments :
        1) df(DataFrame) : The dataframe to be preprocessed
        2) query(str)    : The type of genre

      Returns : 
        The preprocessed dataframe

  """
  df.drop_duplicates(keep='first',inplace=True)
  df.reset_index(drop=True,inplace=True)
  df.rename(columns={'artist':'Artist','song':'Song'},inplace=True)
  print('Number of songs returned by the Spotify API of genre {}\
  is {}'.format(query.upper(),len(df)))

  return df

In [9]:
def get_song_name(query,search_type):

  """ This function is used to return the song titles 
      and also the artist names correspoding to each 
      query and type.

      Arguments:

        1) query(str) : The type of genre.
        2) search_type(str)  : The form in which the 
                               songs are returned.

      Returns:

        A list of dictionaries containing the song name
        and it's corresponding artists.

  """
  play_data = spotify.search(query = query, search_type = search_type)
  list_of_songs = []

  for i in range(len(play_data['playlists']['items'])):
    track_data = spotify.get_playlist_tracks(play_data['playlists']['items']
                                             [i]['id'])

    for j in range(len(track_data['items'])-1):     
      songs_and_artists={}
      try:
        songs_and_artists['artist'] = track_data['items'][j+1]['track'] \
                                                ['artists'][0]['name']
      except:
        songs_and_artists['artist'] = 'None' 
      try:
        songs_and_artists['song']=track_data['items'][j+1]['track']['name']
      except:
        songs_and_artists['song'] = 'None'

      list_of_songs.append(songs_and_artists)      

  final_list = [i for i in list_of_songs if not (i['artist'] == 'None' or 
                                                 i['song'] == 'None')]
  df_final = pd.DataFrame(final_list)
  
  return preprocess_df(df_final,query) 

In [10]:
#def genius_api(artist_name,song_name):

  """ This function is used to return the lyrics of a song 
      using the GeniusAPI.

      Arguments :

        1) artist_name(str) : Name of the artist.
        2) song_name(str)   : Name of the song.

     Returns :

       The lyrics of the song

  """
  #try:
   # song = genius.search_song(song_name,artist_name,get_full_info=True)
    #lyrics = song.lyrics
  #except:
   # lyrics = np.nan

  #return lyrics

In [11]:
def genius_api(artist_name,song_name):

  """ This function is used to return the lyrics of a song 
      using the GeniusAPI.

      Arguments :

        1) artist_name(str) : Name of the artist.
        2) song_name(str)   : Name of the song.

      Returns :

        The lyrics of the song

  """
  song = genius.search_song(song_name,
                            artist_name,
                            get_full_info=False)

  try:
    lyrics = song.lyrics
  except AttributeError:
    i = 0
    while song is None:
      if i == 5:
        return np.nan
        break
      song = genius.search_song(song_name,
                                artist_name,
                                get_full_info=False)
      i+=1
      if song is not None:
        lyrics= song.lyrics
        break

  return lyrics

In [12]:
def get_song_lyrics(df,p=50):

  """ This function is used to get the song lyrics using 
      multiprocessing.

      Arguments:
        
        1) df(DataFrame) : The dataset to do multiprocessing on.
        2) p(int)        : The number of processes.

      Returns :

        The dataframe which contains the lyrics.

  """
  #with multiprocessing.dummy.Pool(processes=p) as pool:
  pool = multiprocessing.Pool(50)
  df['Lyrics'] = pool.starmap(genius_api,
                              zip(df['Artist'],df['Song']))
  pool.terminate()
  pool.join()

  return df

In [None]:
#Getting the song names of genre 'Love'
df_title_love = get_song_name('love','playlist')

q=love&type=playlist&limit=50
Number of songs returned by the Spotify API of genre LOVE  is 3021


In [None]:
df_title_love.tail()

Unnamed: 0,Artist,Song
3016,Skylar Belice,Bad for You
3017,The Script,Breakeven
3018,Josephina,Dope Girl
3019,Sam Smith,Too Good At Goodbyes
3020,Ananya Birla,Let There Be Love


In [None]:
df_title_love.isnull().sum()

Artist    0
Song      0
dtype: int64

In [None]:
#Getting the song lyrics corresponding to the song name and artist name
df_love = get_song_lyrics(df_title_love)

In [None]:
df_love.isnull().sum()

Artist      0
Song        0
Lyrics    527
dtype: int64

In [None]:
df_love.dropna(axis=0,inplace=True)

In [None]:
df_love.head()

Unnamed: 0,Artist,Song,Lyrics
1,Dan + Shay,I Should Probably Go To Bed,All of my friends finally convinced me to get ...
2,Lost Frequencies,Don't Leave Me Now,Scene 08: Timber & The Owls\nScene 09: The TV ...
4,Joel Corry,Head & Heart (feat. MNEK) - Acoustic,"Oh my god, oh my god, this feeling's just begu..."
5,MisterWives,3 small words,Your watch broke\nAnd I hope that it stopped t...
6,Emily Burns,"I Love You, You're The Worst",Do you string me along for the thrill?\nKnock ...


In [None]:
df_love.to_csv('love_genre.csv')

In [13]:
#Getting the song names of genre 'Sad'
df_title_sad = get_song_name('sad','playlist')

q=sad&type=playlist&limit=50
Number of songs returned by the Spotify API of genre SAD  is 2855


In [14]:
df_title_sad.head(34)

Unnamed: 0,Artist,Song
0,NF,Let You Down
1,Russ,Losin Control
2,Drake,Doing It Wrong
3,Khalid,Coaster
4,Logic,1-800-273-8255
5,James Arthur,Say You Won't Let Go
6,The Killers,Just Another Girl
7,Khalid,Reasons
8,The Weeknd,Die For You
9,G-Eazy,Think About You (feat. Quiñ)


In [16]:
#Getting the song lyrics corresponding to the song name and artist name
df_sad = get_song_lyrics(df_title_sad)

In [17]:
df_sad.head(24)

Unnamed: 0,Artist,Song,Lyrics
0,NF,Let You Down,Feels like we're on the edge right now\nI wish...
1,Russ,Losin Control,"She's falling in love now, losing control now\..."
2,Drake,Doing It Wrong,When a good thing goes bad it's not the end of...
3,Khalid,Coaster,"As time passes, I feel so low\nSearchin' for p..."
4,Logic,1-800-273-8255,"I've been on the low, I been taking my time\nI..."
5,James Arthur,Say You Won't Let Go,I met you in the dark\nYou lit me up\nYou made...
6,The Killers,Just Another Girl,Step out into the Indian dust\nI can feel the ...
7,Khalid,Reasons,Evade my intellect\nFeed into my introspect\nW...
8,The Weeknd,Die For You,I'm findin' ways to articulate the feeling I'm...
9,G-Eazy,Think About You (feat. Quiñ),'Cause I do\n'Cause I do\n'Cause I do\nKeep wr...


In [19]:
df_sad.isnull().sum()

Artist      0
Song        0
Lyrics    555
dtype: int64

In [20]:
df_sad.to_csv('sad_genre.csv')

In [None]:
import json

with open('workout.json', 'w') as json_file:
  json.dump(workout, json_file)

In [None]:
import lyricsgenius
genius = lyricsgenius.Genius('p2U70gp0xq5TWV10R8iJXI5uz4DjNseRup1FA63S8-3ZgQBOMyCneDzCew89BXVb')

In [None]:
pop = pd.DataFrame({'Artist':['Harry Styles','Bruno Mars','ffgf'],'Song':['Sign of the Times','Talking to the Moon','fdsd']})