# Dev 3: Pipeline calling API Spotify

In this notebook there are several things implemented:

- Spotipy credentials to connect to API Spotify
- API call to retrieve an artist
- Check the string similarity with the query


In [256]:
import pandas as pd
import sys
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials #To access authorised Spotify data
import requests
import datetime
import base64
import re
import psycopg2

In [269]:
# connect to PostGreSQL
conn = psycopg2.connect("dbname=spotify user=postgres password=pg")
cursor= conn.cursor()

In [281]:
# Check current tables
s = "SELECT table_schema, table_name FROM information_schema.tables where table_schema = 'public'"
cursor.execute(s)
list_tables = cursor.fetchall()

In [282]:
list_tables

[('public', 'master_artist'), ('public', 'artist_song')]

In [278]:
if len(list_tables) == 0:
    
    # Create Artist Tables
    name_Table= "master_artist"
    sqlCreateTable_MasterArtist = "create table "+name_Table+" (artist_id varchar(25), artist_name varchar(50));"
    cursor.execute(sqlCreateTable_MasterArtist)
    
    # Create Table Artist - Song
    name_Table= "artist_song"
    cols = " (artist_id varchar(25), song_name varchar(100), song_id varchar(25), popularity SMALLINT, duration INT);"
    sqlCreateTable_ArtistSong "create table " + name_Table + cols
    cursor.execute(sqlCreateTable_ArtistSong)

In [280]:
name_Table= "artist_song"
cols = " (artist_id varchar(25), song_name varchar(100), song_id varchar(25), popularity SMALLINT, duration INT);"
sqlCreateTable_ArtistSong = "create table " + name_Table + cols
cursor.execute(sqlCreateTable_ArtistSong)

In [169]:
def levenshtein(s1, s2):
    """
    Function that implements roughly an approximation of the Levenshtein algorithm
    for string similarity. Is a distance metric, hence, the closer to 0, the most similar
    the strings will be
    """
    if len(s1) < len(s2):
        s1, s2 = s2, s1

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]/float(len(s1))

In [2]:
client_id = "348f94d3a73241188b2a89c91e1cfaee"
client_secret = "b5b29b040ab843cf842cf4eb875caff1"

In [142]:
dict_artists = {}
master_artist = {}

In [3]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

In [253]:
name_artist_query = "Karol G"
name = '{' + name_artist_query + '}'
result = sp.search(name) #search query
uri = result["tracks"]["items"][0]["artists"][0]["uri"]
artist_id = result["tracks"]["items"][0]["artists"][0]["id"]
name_artist = result["tracks"]["items"][0]["artists"][0]["name"]

In [None]:
# If len(name_artis)
if len(name_artist) >= 50:
    print("CONTINUE!") #CAMBIAR!

In [254]:
# Sanity Check: if the levenshtein distance is not less than 0.3
distance_levensh = levenshtein(name_artist_query.lower(),name_artist.lower())
if distance_levensh > 0.3:
    print("ERROR! Artist name not found") #CAMBIAR!

### Master Table - Artist

In [247]:
master_artist[artist_id] = name_artist

In [248]:
master_artist

{'2WX2uTcsvV5OnS0inACecP': 'Birdy',
 '1Cs0zKBU1kc0i8ypK3B9ai': 'David Guetta',
 '07FfkbljNIdl45Ijlh1aXS': 'RHODES',
 '20gsENnposVs2I4rQ5kvrf': 'Sam Feldt',
 '1l2ekx5skC4gJH8djERwh1': 'Don Diablo',
 '73jBynjsVtofjRpdpRAJGk': 'Dimitri Vegas & Like Mike',
 '4VMYDCV2IEDYJArk749S6m': 'Daddy Yankee',
 '3wtMPMvPtiFylbnNXF6CAj': 'Afro Bros',
 '1GDbiv3spRmZ1XdM1jQbT7': 'Natti Natasha',
 '2HkAI0YrEcgoR8QdaURqhO': 'Dimitri Vegas',
 '4pwXiI7Z5ZStkgKowZyoKi': 'Like Mike',
 '64M6ah0SkkRsnPGtGiRAbb': 'Bebe Rexha',
 '1vyhD5VmyZ7KMfW5gqLgo5': 'J Balvin',
 '4mHAu7NX2UNsnGXjviBD9e': 'Brooks',
 '00TKPo9MxwZ0j4ooveIxWZ': 'Loote',
 '5y2Xq6xcjJb2jVM54GHK3t': 'John Legend',
 '5WUlDfRSoLAfcVSX1WnrxN': 'Sia',
 '0z4gvV4rjIZ9wHck67ucSV': 'Akon',
 '4D75GcNG95ebPtNvoNVXhz': 'Afrojack',
 '0hCNtLu0JehylgoiP8L4Gh': 'Nicki Minaj',
 '60d24wfXkVzDSfLS6hyCjZ': 'Martin Garrix',
 '5KKpBU5eC2tJDzf0wmlRp2': 'RAYE',
 '1HY2Jd0NmPuamShAr6KMms': 'Lady Gaga'}

# Top Tracks

In [249]:
top_tracks = sp.artist_top_tracks(uri)

### Top 10 Songs for Artist Table

In [250]:
#Dictionary for each artist
dict_artists[artist_id] = []

# For each track
if "tracks" in top_tracks:
    if len(top_tracks["tracks"]):
        for track in top_tracks["tracks"]:
            
            dict_track = dict()
            # SONG - Properties - Initializer
            dict_track["name"] = ""
            dict_track["duration"]  = 0
            dict_track["popularity"] = 0
            dict_track["song_id"] = ""
            
            # Check the number of artists featuring the song
            # Add to their dictionary the features of the song
            # for them too
            
            # Create a set for the feat. artists
            set_featuring_artists = set()
            
            if "artists" in track:
                
                # Loop through all artists of a song
                for art_dict in track["artists"]:
                    
                    if "name" in art_dict and "id" in art_dict:
                        
                        # Get the id of that artist 
                        id_feat_artist = art_dict["id"]
                        name_feat_artist = art_dict["name"]
                        
                        #If the artist is different from the query one add it
                        # to both the dict_artist and the master_artist
                        if id_feat_artist != artist_id:
                            
                            # Add that feat. artist to the set
                            set_featuring_artists.add(id_feat_artist)
                            
                            # Add that feat. artist to the master of artists
                            master_artist[id_feat_artist] = name_feat_artist
                    
            # SONG - Properties - Track
            # Song name
            if "name" in track:
                dict_track["name"] = track["name"]
            
            # Duration
            if "duration_ms" in track:
                dict_track["duration"]  = track["duration_ms"]
            
            #Popularity
            if "popularity" in track:
                dict_track["popularity"] = track["popularity"]
                
            # Song ID
            if "id" in track:
                dict_track["song_id"] = track["id"]
                
            # Write the dictionary of song properties for the main artist
            dict_artists[artist_id].append(dict_track)
            
            # Write the dictionary of song properties for each of the feat. artists
            list_featuring_artists = list(set_featuring_artists)
            for i_id_feat_artist in list_featuring_artists:
                
                # If that artist is not yet in the dictionary of artists, add it:
                if i_id_feat_artist not in dict_artists:
                    dict_artists[i_id_feat_artist] = []
                dict_artists[i_id_feat_artist].append(dict_track)

In [251]:
dict_artists

{'1Cs0zKBU1kc0i8ypK3B9ai': [{'name': 'Titanium (feat. Sia)',
   'duration': 245040,
   'popularity': 66,
   'song_id': '0lHAMNU8RGiIObScrsRgmP'},
  {'name': 'Instagram',
   'duration': 184558,
   'popularity': 78,
   'song_id': '0U6bQIAh6MCGo1xjbIIx2S'},
  {'name': 'Say My Name',
   'duration': 198946,
   'popularity': 66,
   'song_id': '3MoV1UsAJmz64LHqyiRMp0'},
  {'name': "Better When You're Gone",
   'duration': 192402,
   'popularity': 74,
   'song_id': '4Tvw0lweq9l2JPQKFbpbBQ'},
  {'name': 'Conversations in the Dark - John Legend vs. David Guetta',
   'duration': 210236,
   'popularity': 73,
   'song_id': '20d27F17AZOxTJOEHAVK2Y'},
  {'name': 'Flames',
   'duration': 195000,
   'popularity': 54,
   'song_id': '3tTYxjYYcXFqFXA0AZ1Fav'},
  {'name': 'Sexy Bitch (feat. Akon)',
   'duration': 195853,
   'popularity': 61,
   'song_id': '0uXO2GrNiIb1xHT9LUdxZE'},
  {'name': 'Hey Mama (feat. Nicki Minaj, Bebe Rexha & Afrojack)',
   'duration': 192560,
   'popularity': 61,
   'song_id': '5

In [252]:
master_artist

{'2WX2uTcsvV5OnS0inACecP': 'Birdy',
 '1Cs0zKBU1kc0i8ypK3B9ai': 'David Guetta',
 '07FfkbljNIdl45Ijlh1aXS': 'RHODES',
 '20gsENnposVs2I4rQ5kvrf': 'Sam Feldt',
 '1l2ekx5skC4gJH8djERwh1': 'Don Diablo',
 '73jBynjsVtofjRpdpRAJGk': 'Dimitri Vegas & Like Mike',
 '4VMYDCV2IEDYJArk749S6m': 'Daddy Yankee',
 '3wtMPMvPtiFylbnNXF6CAj': 'Afro Bros',
 '1GDbiv3spRmZ1XdM1jQbT7': 'Natti Natasha',
 '2HkAI0YrEcgoR8QdaURqhO': 'Dimitri Vegas',
 '4pwXiI7Z5ZStkgKowZyoKi': 'Like Mike',
 '64M6ah0SkkRsnPGtGiRAbb': 'Bebe Rexha',
 '1vyhD5VmyZ7KMfW5gqLgo5': 'J Balvin',
 '4mHAu7NX2UNsnGXjviBD9e': 'Brooks',
 '00TKPo9MxwZ0j4ooveIxWZ': 'Loote',
 '5y2Xq6xcjJb2jVM54GHK3t': 'John Legend',
 '5WUlDfRSoLAfcVSX1WnrxN': 'Sia',
 '0z4gvV4rjIZ9wHck67ucSV': 'Akon',
 '4D75GcNG95ebPtNvoNVXhz': 'Afrojack',
 '0hCNtLu0JehylgoiP8L4Gh': 'Nicki Minaj',
 '60d24wfXkVzDSfLS6hyCjZ': 'Martin Garrix',
 '5KKpBU5eC2tJDzf0wmlRp2': 'RAYE',
 '1HY2Jd0NmPuamShAr6KMms': 'Lady Gaga',
 '66CXWjxzNUsdJxJ2JdwvnR': 'Ariana Grande',
 '41MozSoPIsD1dJM0CLPjZF': '

In [None]:
# Keeping Your Head Up - Don Diablo Remix; Radio Edit'