In [1]:
import os
import json
import base64
import sys
from requests import get, post
import pandas as pd
from dotenv import load_dotenv
import time

In [2]:
load_dotenv()

client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

In [16]:
def get_token():
    auth_string = client_id + ':' + client_secret
    auth_bytes = auth_string.encode('utf-8')
    auth_base64 = str(base64.b64encode(auth_bytes),'utf-8')

    url = 'https://accounts.spotify.com/api/token' #post request url

    headers = { # post request headers
        "Authorization": "Basic "+ auth_base64,
        "Content-Type": "application/x-www-form-urlencoded" 
    } 

    data = {'grant_type': 'client_credentials'} #post request body

    result = post(url, headers=headers, data = data)
    json_result = json.loads(result.content)
    token = json_result['access_token']
    return token

def get_auth_header(token):
    return {'Authorization': 'Bearer ' + token}

def get_artist_pop(token, query):
    url = f'https://api.spotify.com/v1/search?query={query}&type=artist&limit=1'
    header = get_auth_header(token)
    result = get(url, headers = header)
    print(result)
    if result.status_code == 200:
        json_result = json.loads(result.content)
        popularity = json_result['artists']['items'][0]['popularity']
        return(popularity)
    elif result.status_code == 429:
        json_result = json.loads(result.content)
        print(json_result)
    else:
        return None

In [6]:
df = pd.read_csv('/Users/colemak/Documents/spotifymodel/notebook/spotify.csv')
df = df.dropna()
df['artist'] = df['artists'].apply(lambda x: x.split(';')[0])
artist_columns = df['artist'].unique()
length = len(artist_columns)
artist_pop_dict = {}
token = None
for i, artist in enumerate(artist_columns):
    if i % 5000 == 0:
        token = get_token()
    if i % 350 == 0:
        time.sleep(30)
    artist_1 = artist.split(';')[0]
    popularity = get_artist_pop(token, artist_1)
    artist_pop_dict[artist] = popularity
    print(f'{i}/{length} | {artist}:{popularity} length of dict:{len(artist_pop_dict)}')

<Response [200]>
0/17648 | Gen Hoshino:62 length of dict:1
<Response [200]>
1/17648 | Ben Woodward:45 length of dict:2
<Response [200]>
2/17648 | Ingrid Michaelson:58 length of dict:3
<Response [200]>
3/17648 | Kina Grannis:60 length of dict:4
<Response [200]>
4/17648 | Chord Overstreet:62 length of dict:5
<Response [200]>
5/17648 | Tyrone Wells:47 length of dict:6
<Response [200]>
6/17648 | A Great Big World:59 length of dict:7
<Response [200]>
7/17648 | Jason Mraz:72 length of dict:8
<Response [200]>
8/17648 | Ross Copperman:43 length of dict:9
<Response [200]>
9/17648 | Zack Tabudlo:68 length of dict:10
<Response [200]>
10/17648 | Dan Berk:44 length of dict:11
<Response [200]>
11/17648 | Anna Hamilton:40 length of dict:12
<Response [200]>
12/17648 | Landon Pigg:39 length of dict:13
<Response [200]>
13/17648 | Andrew Foy:44 length of dict:14
<Response [200]>
14/17648 | Boyce Avenue:69 length of dict:15
<Response [200]>
15/17648 | Brandi Carlile:64 length of dict:16
<Response [200]>
1

In [4]:
import pickle
import dill
import os
def save_object(file_path, obj):
    dir_path = os.path.dirname(file_path)

    os.makedirs(dir_path, exist_ok=True)

    with open(file_path, "wb") as file_obj:
        dill.dump(obj, file_obj)

In [8]:
save_object('/Users/colemak/Documents/spotifymodel/pop_dict.pkl', obj = artist_pop_dict)

In [5]:
def get_song_artist(token, query):
    url = f'https://api.spotify.com/v1/search?query={query}&type=track&limit=1'
    header = get_auth_header(token)
    result = get(url, headers = header)
    print(result)
    if result.status_code == 200:
        json_result = json.loads(result.content)
        song_id = json_result['tracks']['items'][0]['id']
        print(song_id)
        artist_id = json_result['tracks']['items'][0]['artists'][0]['id']

        return(song_id, artist_id)
    elif result.status_code == 429:
        json_result = json.loads(result.content)
        print(json_result)
    else:
        return None

In [9]:
import random
seen = []
song = []
artist = []

with open("/Users/colemak/Downloads/parts of speech word files/nouns/91K nouns.txt") as f:
    words = f.read().splitlines()
    token = get_token()
    while len(song) < 4000:
        index = random.randrange(0,90962)
        if index not in seen:
            seen.append(index)
            
            song_id, artist_id = get_song_artist(token, words[index])
            song.append(song_id)
            artist.append(artist_id)


<Response [200]>
5irD4qrnLiBy8kTEClSijS
<Response [200]>
7c6V4Q16adigPMZFYGrF4a
<Response [200]>
3MEYFivt6bilQ9q9mFWZ4g
<Response [200]>
14BdvepKLCYtKggBNPkQPT
<Response [200]>
3LSXgPxkWHi016kAl69Z3E
<Response [200]>
222GG3s6l0rYiQZ3xZ80t1
<Response [200]>
6DnpDW37IkQ0XESOVZkmbf
<Response [200]>
50GEUKDdAKgevDkdPOJYhI
<Response [200]>
2yegzgMP1c7ADsFNnnkY3c
<Response [200]>
4akPmp6IWf02FdXXXJi3lX
<Response [200]>
2bcYE9xNvZItntKltFT78I
<Response [200]>
1c9GpL8As18XxGOkHvsx6d
<Response [200]>
20CykrDG0I3r18U4kX7xR1
<Response [200]>
0ACqqECMKkDvhrLF76TMvt
<Response [200]>
03r3IXlCC1mKbd5yOOSwvE
<Response [200]>
3LRJbFT9rKoKv4aW7PuBJC
<Response [200]>
6qrjcIJHlsTJEw8N74SRgR
<Response [200]>
5Y35SjAfXjjG0sFQ3KOxmm
<Response [200]>
22F23R3CYWiaqzX53GlTlG
<Response [200]>
00vR63trn1CzygcjgAFaiM
<Response [200]>
3uEkceJfp5iru2wOIIGVGp
<Response [200]>
32XeS4nfrsBbnnzKEGdI9w
<Response [200]>
0zhd73eS96OGIXtNkq9Xml
<Response [200]>
15Z5FtwsEjRrXT472XkWg1
<Response [200]>
23DHUWJ7iEieNPMPKvjzBV


In [8]:
song_artist

{'5AEG4VrpRyx0U0rDoIIA63': '58fzPwxNFw1Ae500uB038l',
 '5LO52l2eBZu48A1TxlgxQp': '3CqN2uM47Pvkf22AoeLxBE',
 '6XO4Fxl4lMf6oTJyVn1tBK': '1rFYGSmWGWiCQmczq2bIXD',
 '1RACM2jYpkl10eothXLA37': '6DbqS0X8cSFOPGsvyze2yh',
 '6ZcELgkD2FEhBflWrY5MwZ': '79S80ZWgVhIPMCHuvl6SkA',
 '1Wsr7giAA9iTER7UZV1qhU': '0FI0kxP0BWurTz8cB8BBug',
 '1kHyejS87fzvj0SDvKVo8N': '7pICtPXy11dc7D82jsF1Xy',
 '1PtKvMDKE9cH1fsd5l1rqU': '7A2h1ch3YkBJIjt6ksQ0eX',
 '5b3XJ1pjrHO5JtY2PcTjnI': '181bsRPaVXVlUKXrxwZfHK',
 '65tjxUQBfaQzWXvIvhxIkH': '2bkTfSAn3OtR879neCk2zA',
 '1c1RDcw67cstVNu7GnhUHC': '2A5gn5f8HXgPKf03a7NN43',
 '5OyC7E3gQFQfMJGm2pHbZf': '5iZtmHKffMQOBHCGBz9De8',
 '5LRuS3lFXQQhG5M26XYFFJ': '6gWoT1cy6qryXUg31HB8nM',
 '2bgl6miORpyE4ql0gl7KHR': '2QJGwjXfe7IOoBFcrpzOTt',
 '1fnjrCo792W0EIbZnbCom8': '0YHGksl6HGKFQuAyDRf6U3',
 '03GzVCudmRXHKOaRijQtHN': '50y3awmB8dw3Wh4PIh2On5',
 '7qVHpz4zkA5TYTefBKrhR6': '3hiYKNf8gJkrqjdRPVLnTC',
 '4YQfX5QoyL1weIUpLQY4Bx': '36pk438TUUMdX59j1zbvTu',
 '5db1JyfVxSII8Lx9iP7Mdq': '5rbCgd1fk3T6xYBhvP

In [12]:
save_object('/Users/colemak/Documents/spotifymodel/artist_list.pkl', obj = artist)

In [10]:
len(song)

4000

In [28]:
xs = artist[0:25]
artist_batch = ','.join(str(x) for x in xs)
artist_batch

'4SCWiQbJCMTHK737aNUqBJ,4KACX9SaKjjSY3qP8XNKPs,64KEffDW9EtZ1y2vBYgq8T,3oSrYPZKpQLmhcmmZwmlQ4,6i7yNNoe0JhRCJkGnPwiXU,1gxaoyvX9r7305viSXGNc8,3UZhsPT78qv7V6VLBQAR69,4kKg9V4Yr24kz8vW6PZVPu,4doI4YfDQzgjm3pjI3zv4E,3zmfs9cQwzJl575W1ZYXeT,6WqIGGs6DLxbSqsnRTOuWO,5JNniEeEIeUYduLBuv857h,3iTsJGG39nMg9YiolUgLMQ,58MvwVvhEgFNTI7sHjkG7u,7a97eiOoGb1icBAmhnpwqY,7oPftvlwr6VrsViSDV7fJY,3EuCzrQF9DA3xUw6ZCOsBF,7tYKF4w9nC0nq9CsPZTHyP,4lph4e07jjlKkioG1bQx95,3tJoFztHeIJkJWMrx0td2f,1ftpYph651Il5aufimlG9v,6FQqZYVfTNQ1pCqfkwVFEa,6OukdlNSOR1SoGNF7v0d2C,6i39nMg9jmGqa2c1GPo9EG,126FigDBtqwS2YsOYMTPQe'

In [31]:
def get_artist_genre(token, query):
    url = f'https://api.spotify.com/v1/artists?ids={query}'
    header = get_auth_header(token)
    result = get(url, headers = header)
    print(result)
    if result.status_code == 200:
        json_result = json.loads(result.content)
        artists = json_result['artists']
        for i in range(len(artists)):
            genre = artists[i]['genres']
            if len(genre) >= 1:
                print(genre)
            else:
                print('no genre')
    elif result.status_code == 429:
        json_result = json.loads(result.content)
        print(json_result)
    else:
        return None

In [32]:
token = get_token()
get_artist_genre(token, artist_batch)

<Response [200]>
['calming instrumental', 'organic ambient']
no genre
['brostep', 'edm', 'pop', 'progressive electro house']
no genre
no genre
no genre
no genre
no genre
no genre
['indie pop', 'pov: indie']
no genre
no genre
['madchester', 'new wave', 'permanent wave', 'solo wave']
no genre
no genre
['modern rock', 'permanent wave', 'punk', 'rock']
no genre
['pop', 'r&b', 'rap']
['alternative hip hop', 'boom bap', 'queens hip hop']
['memphis hip hop', 'rap', 'southern hip hop', 'tennessee hip hop', 'trap']
no genre
['alternative dance', 'garage rock', 'indie rock', 'indietronica', 'modern alternative rock', 'modern rock', 'neo-synthpop', 'oxford indie', 'shimmer pop']
no genre
no genre
['dayton indie', 'neon pop punk', 'pop punk', 'screamo']
