## Authentication to Spotify API

### Imports

In [41]:
import pandas as pd
import requests
import base64
import json
import csv
import os
from dotenv import load_dotenv

### Request for access token

In [42]:
load_dotenv()

True

In [43]:
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

In [44]:
auth_string = client_id + ":" + client_secret
auth_bytes = auth_string.encode("utf-8")
auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

In [45]:
url = "https://accounts.spotify.com/api/token"
headers = {
    "Authorization": "Basic " + auth_base64,
    "Content-type": "application/x-www-form-urlencoded"
}
data = {"grant_type": "client_credentials"}

In [46]:
result = requests.post(url, headers=headers, data=data)
result

<Response [200]>

In [47]:
json_result = json.loads(result.content)
token = json_result["access_token"]
token

'BQD0uX5Bdinsm5kOcVx7waJoejbJuHGhKd4KoC6Uw1I2g6dyiHNhukVLohFXUzw4-P8ixk7FJqtO7aCLUsR9-7JTv3BtVDaETZVHJ5RAam3eqOznqLI'

In [48]:
def get_auth_header(token):
    return {"Authorization": "Bearer " + token}
# get_auth_header(token)

### Searching for artist

In [49]:
def search_artist(token, artist_name):
    search_url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = f"?q={artist_name}&type=artist&limit=2"
    
    query_url = search_url + query
    result = requests.get(query_url, headers=headers)
    json_result = json.loads(result.content)
    
    return json_result


In [9]:
# search_artist(token, "Kanye West")

### Getting the id

In [50]:
def get_artist_id(token, artist_name):
    search_url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = f"?q={artist_name}&type=artist&limit=2"
    
    query_url = search_url + query
    result = requests.get(query_url, headers=headers)
    json_result = json.loads(result.content)["artists"]["items"]
    
    artist_id = json_result[0]
    
    if len(json_result) == 0:
        return f"No artists with this name"
    
    return artist_id["id"]

In [51]:
get_artist_id(token, "Kanye West")

'5K4W6rqBFWDnAN6FQUkS6x'

### Getting the artist name

In [52]:
def get_artist_name(token, id):
    search_url = f"https://api.spotify.com/v1/artists/{id}"
    headers = get_auth_header(token)
    
    result = requests.get(search_url, headers=headers)
    json_result = json.loads(result.content)["name"]
    
    return json_result

In [53]:
test = get_artist_name(token, "5K4W6rqBFWDnAN6FQUkS6x")
test

'Kanye West'

### Getting the related artists

In [54]:
def get_related_artists(token, id):
    artists_url = f"https://api.spotify.com/v1/artists/{id}/related-artists"
    headers = get_auth_header(token)
    
    result = requests.get(artists_url, headers=headers)
    json_result = json.loads(result.content)["artists"]
    
    return json_result

In [55]:
artists = get_related_artists(token, "3nFkdlSjzX9mRTtwJOzDYB")
artists
# len(artists)

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/20qISvAhX20dpIbOOzGK3q'},
  'followers': {'href': None, 'total': 3485919},
  'genres': ['conscious hip hop',
   'east coast hip hop',
   'gangster rap',
   'hardcore hip hop',
   'hip hop',
   'queens hip hop',
   'rap'],
  'href': 'https://api.spotify.com/v1/artists/20qISvAhX20dpIbOOzGK3q',
  'id': '20qISvAhX20dpIbOOzGK3q',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab6761610000e5eb153198caeef9e3bda92f9285',
    'width': 640},
   {'height': 320,
    'url': 'https://i.scdn.co/image/ab67616100005174153198caeef9e3bda92f9285',
    'width': 320},
   {'height': 160,
    'url': 'https://i.scdn.co/image/ab6761610000f178153198caeef9e3bda92f9285',
    'width': 160}],
  'name': 'Nas',
  'popularity': 71,
  'type': 'artist',
  'uri': 'spotify:artist:20qISvAhX20dpIbOOzGK3q'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/0ONHkAv9pCAFxb0zJwDNTy'},
  'followers': {'href': None, 'total': 2878435},


### Automating the process

In [56]:
related_artist = {}
related_artist["main_artist"] = []
related_artist["id"] = []
related_artist["name"] = []
related_artist["popularity"] = []
related_artist["genres"] = []

In [57]:
def collect_related_artists(token, artist_name):
    related_artist["main_artist"].append(artist_name)
    main_artist_id = get_artist_id(token, artist_name)
    artists = get_related_artists(token, main_artist_id)
    
    for i in range(len(artists)):
        related_artist["id"].append(artists[i]["id"])
        related_artist["name"].append(artists[i]["name"])
        related_artist["popularity"].append(artists[i]["popularity"])
        related_artist["genres"].append(artists[i]["genres"])
    
    data_trash = related_artist.copy()
    aux = 0
    while (len(data_trash["id"]) != 0):
        related_artist["main_artist"].append(get_artist_name(token, artists[aux]["id"]))
        # print(aux)
        side_artists = get_related_artists(token, artists[aux]["id"])
        
        for i in range(len(side_artists)):
            related_artist["id"].append(side_artists[i]["id"])
            related_artist["name"].append(side_artists[i]["name"])
            related_artist["popularity"].append(side_artists[i]["popularity"])
            related_artist["genres"].append(side_artists[i]["genres"])
        
        data_trash["id"].pop(0)
        aux += 1
        if aux == 20:
            return related_artist

In [13]:
# data_rap = collect_related_artists(token, "Ludacris")
# data_pop = collect_related_artists(token, "Steven Tyler")

In [14]:
# data_pop

In [58]:
def generating_csv(file_name, dictionary):
    with open(f"../data/{file_name}.csv", mode="w", newline="") as csv_file:
        csv_writer = csv.writer(csv_file)
        
        keys = dictionary.keys()
        
        csv_writer.writerow(keys)
        
        for i in range(len(dictionary["main_artist"])):
            line = [dictionary[key][i] for key in keys]
            csv_writer.writerow(line)
        
        return "ok"

In [90]:
# generating_csv("pop_artist", data_pop)

'ok'

In [59]:
def generating_data(file_name):
    initial_data = pd.read_csv("../data/rappers.csv")
    
    with open(f"../data/{file_name}.csv", mode="w", newline="") as csv_file:
        writer = csv.writer(csv_file)
        
        headers = ["main_artist", "id", "name", "popularity", "genres", "relations"]
        
        writer.writerow(headers)
        
        for i in range(len(initial_data)):
            # print(len(initial_data))
            # print(initial_data["name"])
            relations = get_related_artists(token, get_artist_id(token, initial_data["main_artist"][i]))
            relations_data = [] 
            
            for relation in relations:
                # print(relation["genres"])
                artists_data = {"id": relation["id"], 
                                 "name": relation["name"], 
                                 "genres": ", ".join(relation["genres"])
                }
                
                relations_data.append(artists_data)
            
            artists_fields = [
                initial_data["main_artist"][i],
                initial_data["id"][i],
                initial_data["name"][i],
                initial_data["popularity"][i],
                initial_data["genres"][i],
                relations_data,
            ]
            writer.writerow(artists_fields)
        
        print(f"Users information saved to {file_name}")

In [82]:
def generating_data(file_name, token, initial_artist_name):
    with open(f"../data/{file_name}.csv", mode="w", newline="") as csv_file:
        writer = csv.writer(csv_file)
        
        headers = ["main_artist", "id", "name", "popularity", "genres", "relations"]
        writer.writerow(headers)
        
        # Helper function for depth-first search
        def dfs(main_artist_name, current_artist_name, depth):
            if depth == 0:
                return  # Limiting depth to avoid infinite recursion
            
            current_artist_id = get_artist_id(token, current_artist_name)
            relations = get_related_artists(token, current_artist_id)
            
            for relation in relations:
                artist_data = {
                    "id": relation["id"],
                    "name": relation["name"],
                    "genres": ", ".join(relation["genres"])
                }
                
                # Write data to CSV
                writer.writerow([main_artist_name, artist_data["id"], artist_data["name"],
                                 relation["popularity"], artist_data["genres"], depth])
                
                # Recursively call the function for each related artist
                dfs(relation["name"], main_artist_name, depth - 1)
        
        # Start the depth-first search from the initial artist
        dfs(initial_artist_name, initial_artist_name, depth=3)  # Set your desired depth
        
        print(f"Users information saved to {file_name}")

In [61]:
relations = get_related_artists(token, get_artist_id(token, "Kanye West"))

relations

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/3nFkdlSjzX9mRTtwJOzDYB'},
  'followers': {'href': None, 'total': 8719769},
  'genres': ['east coast hip hop',
   'gangster rap',
   'hip hop',
   'pop rap',
   'rap'],
  'href': 'https://api.spotify.com/v1/artists/3nFkdlSjzX9mRTtwJOzDYB',
  'id': '3nFkdlSjzX9mRTtwJOzDYB',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab6761610000e5ebc75afcd5a9027f60eaebb5e4',
    'width': 640},
   {'height': 320,
    'url': 'https://i.scdn.co/image/ab67616100005174c75afcd5a9027f60eaebb5e4',
    'width': 320},
   {'height': 160,
    'url': 'https://i.scdn.co/image/ab6761610000f178c75afcd5a9027f60eaebb5e4',
    'width': 160}],
  'name': 'JAY-Z',
  'popularity': 81,
  'type': 'artist',
  'uri': 'spotify:artist:3nFkdlSjzX9mRTtwJOzDYB'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/0fA0VVWsXO9YnASrzqfmYu'},
  'followers': {'href': None, 'total': 6736533},
  'genres': ['hip hop', 'ohio hip hop', 'pop rap', '

In [83]:
generating_data("rapper_world", token, "Kanye West")

KeyboardInterrupt: 