In [3]:
from dotenv import load_dotenv
from requests import post, get
import os
import base64
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import random
from pprint import pprint
import pandas as pd

Api Keys & Env Variables

In [4]:
load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

os.environ["SPOTIPY_CLIENT_ID"] = client_id
os.environ["SPOTIPY_CLIENT_SECRET"] = client_secret


Manual Spotify API requests (not used)

In [5]:
def get_token():
    auth_string = client_id + ':' + client_secret
    auth_bytes = auth_string.encode('utf-8')
    auth_base64 = str(base64.b64encode(auth_bytes), 'utf-8')

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type" : "client_credentials"}

    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result['access_token']
    return token

In [6]:
def get_auth_header(token):
    return { "Authorization" : "Bearer " + token }

In [7]:
def search_for_track(token, query):
    url = 'https://api.spotify.com/v1/search'
    headers = get_auth_header(token)
    # query = f"?q={query}&type=track&limit=50"
    query = '?q=track:"praise god" year:2012-2019&type=track&limit=1&market=US'

    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)
    print(json_result)



Random Song Search Helper Functions

In [8]:

def get_random_search():
    # A list of all characters that can be chosen.
    characters = 'abcdefghijklmnopqrstuvwxyz'
  
    # Gets a random character from the characters string.
    random_character = random.choice(characters)
    random_search = ''

    # Places the wildcard character at the beginning, or both beginning and end, randomly.
    if random.randint(0, 1) == 0:
        random_search = random_character + '%'
    else:
        random_search = '%' + random_character + '%'

    return random_search


Setup Auth & Auto-refresh token

In [9]:
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

In [10]:
album = sp.search(
    q='album:"graduation"',
    limit=1,
    offset=0,
    type='album',
    market='US'
)

Random Song Function (spotipy, spotify api)

In [8]:
def get_random_song() -> list:
    q = get_random_search()
    offset = random.randint(0, 999)
    year = str(2016 + random.randint(0, 5))

    # get bottom 10% popularity album using 'hipster' tag
    album = sp.search(
        q='album:"' + q + '" year:' + year + " tag:hipster",
        limit=1,
        offset=offset,
        type='album',
        market='US'
    )
    artist = album['albums']['items'][0]['artists'][0]['name']
    
    # get genre of album artist
    genres = sp.artist(album['albums']['items'][0]['artists'][0]['id'])['genres']

    # Now get a random song from the album
    album_id = album['albums']['items'][0]['id']
    total_tracks = album['albums']['items'][0]['total_tracks']
    track = sp.album_tracks(
        album_id=album_id,
        limit=1,
        offset=random.randint(0, total_tracks - 1),
        market='US'    
    )
    song = track['items'][0]['name']

    return (genres, artist, song)



In [9]:
genres, artist, song = get_random_song()


Save 3000 songs to CSV using Pandas (since there are ~3000 billboard songs)

In [9]:
song_limit = 3000

relative_csv_path = "../data/random_2016-2021_song_artist_genres.csv"
df = pd.read_csv(relative_csv_path)
assert list(df.columns) == ['song', 'artist', 'genres']

i = 0
while df.shape[0] < song_limit:
    try:
        genres, artist, song = get_random_song()
        if genres:
            row = pd.Series(data=[song, artist, genres], index=df.columns)
            df = pd.concat([df, row.to_frame().T], axis=0, ignore_index=True)
            df.to_csv(relative_csv_path, index=False)
    except Exception as e:
        print("ERROR: ", e)
        # break
    i+=1
    print(i, df.shape[0])
