### Initial Setup

In [1]:
# gdrive module
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
# module installs
!pip install spotipy langdetect



In [3]:
# imports
import numpy as np
import pandas as pd
import requests
import spotipy
import lyricsgenius
import librosa
import json
import re
import urllib.request
import langdetect as ld

from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup

In [5]:
# spotify api creds
SPOTIFY_CLIENT_ID = ''
SPOTIFY_CLIENT_SECRET = ''

In [6]:
# function to get spotify token
def get_spotify_token(client_id, client_secret):
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_response = requests.post(auth_url, {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret,
    })
    auth_response_data = auth_response.json()
    return auth_response_data['access_token']

In [7]:
# store the spotify token
spotify_token = get_spotify_token(SPOTIFY_CLIENT_ID, SPOTIFY_CLIENT_SECRET)

In [8]:
# authorise spotipy module
sp = spotipy.Spotify(auth=spotify_token)

In [9]:
# track details function
def get_track_details(spotify_id):
    track = sp.track(spotify_id)
    return track['name'], track['artists'][0]['name']

---

### Lyrics - Free API (Adds jargon in the first line)

In [10]:
# function to get the lyrics from spotify id
def get_lyrics_by_spotify_id(spotify_id):

    song_title, artist = get_track_details(spotify_id)

    artist = str(artist)
    song_title = str(song_title)

    url = 'https://api.lyrics.ovh/v1/' + artist + '/' + song_title

    # fetch lyrics
    response = requests.get(url)
    json_data = json.loads(response.content)
    lyrics = json_data['lyrics']
    lyrics = lyrics.replace("Paroles de la chanson",'Song - ')
    lyrics = lyrics.replace("par", ', Artist - ')

    return lyrics

In [11]:
# example usage
spotify_id = 'https://open.spotify.com/track/1pXrR5Y9OgcIV2JEAl2lCB'

try:
  lyrics = get_lyrics_by_spotify_id(spotify_id)

  if lyrics:
    print(lyrics.lower())
  else:
    print('No lyrics found')

except:
  print('Error retrieving lyrics. Please try again later.')

song -  i kissed a girl , artist -  katy perry
this was never the way i planned
not my intention
i got so brave, drink in hand
lost my discretion
it's not what i'm used to
just wanna try you on
i'm curious for you
caught my attention

i kissed a girl and i liked it
the taste of her cherry chapstick
i kissed a girl just to try it
i hope my boyfriend don't mind it
it felt so wrong, it felt so right
don't mean i'm in love tonight
i kissed a girl and i liked it

i liked it

no, i don't even know your name
it doesn't matter
you're my experimental game
just human nature
it's not what good girls do
not how they should behave
my head gets so confused
hard to obey

i kissed a girl and i liked it
the taste of her cherry chapstick
i kissed a girl just to try it
i hope my boyfriend don't mind it
it felt so wrong, it felt so right
don't mean i'm in love tonight
i kissed a girl and i liked it

i liked it

us girls, we are so magical
soft skin, red lips, so kissable
hard to resist, so touchable
too 

---

### Feature Selection & Engineering

In [12]:
# language detection
print(ld.detect_langs(lyrics.lower()))

[en:0.9999995056967628]


In [20]:
# function to get the track's features
def get_track_features(spotify_id):
    track = sp.track(spotify_id)
    audio_features = sp.audio_features(spotify_id)[0]

    features = {
        "acousticness": audio_features.get('acousticness', None),
        "danceability": audio_features.get('danceability', None),
        "energy": audio_features.get('energy', None),
        "instrumentalness": audio_features.get('instrumentalness', None),
        "key": audio_features.get('key', None),
        "liveness": audio_features.get('liveness', None),
        "loudness": audio_features.get('loudness', None),
        "mode": audio_features.get('mode', None),
        "speechiness": audio_features.get('speechiness', None),
        "tempo": audio_features.get('tempo', None),
        "time_signature": audio_features.get('time_signature', None),
        "valence": audio_features.get('valence', None),
        "artists": [artist['name'] for artist in track['artists']],
        "album_name": track['album']['name'],
        "album_type": track['album']['album_type'],
        "release_date": track['album']['release_date'],
        "popularity": track['popularity'],
        "duration_ms": track['duration_ms'],
        "explicit": track['explicit'],
        "genres": sp.artist(track['artists'][0]['id']).get('genres', [])  # Assuming genres from the first artist
    }


    features = pd.DataFrame([features])

    # optional: add lyrics as a column in the dataframe
#    features['lyrics'] = get_lyrics_by_spotify_id(spotify_id)

    return features

In [23]:
spotify_id = 'https://open.spotify.com/track/1pXrR5Y9OgcIV2JEAl2lCB'
features = get_track_features(spotify_id)
features

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,artists,album_name,album_type,release_date,popularity,duration_ms,explicit,genres
0,0.00226,0.702,0.76,0,5,0.132,-3.174,1,0.0684,129.996,4,0.696,[Katy Perry],One Of The Boys,album,2008-06-17,69,179640,False,[pop]
