In [1]:
from dotenv import load_dotenv
from requests import post, get
import os
import base64
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import random
from pprint import pprint
import pandas as pd

Api Keys & Env Variables

In [2]:
load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

os.environ["SPOTIPY_CLIENT_ID"] = client_id
os.environ["SPOTIPY_CLIENT_SECRET"] = client_secret


In [3]:
client_id, client_secret

('f9950679bf624cb992682a894cb9f728', '6150b185a7384c14b663ddd3b3ab8200')

Manual Spotify API requests (not used)

In [4]:
def get_token():
    auth_string = client_id + ':' + client_secret
    auth_bytes = auth_string.encode('utf-8')
    auth_base64 = str(base64.b64encode(auth_bytes), 'utf-8')

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type" : "client_credentials"}

    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result['access_token']
    return token

In [5]:
def get_auth_header(token):
    return { "Authorization" : "Bearer " + token }

In [6]:
def search_for_track(token, query):
    url = 'https://api.spotify.com/v1/search'
    headers = get_auth_header(token)
    # query = f"?q={query}&type=track&limit=50"
    query = '?q=track:"praise god" year:2012-2019&type=track&limit=1&market=US'

    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)
    print(json_result)



In [7]:
def get_artist(token, artist_id):
    url = 'https://api.spotify.com/v1/artists/'
    headers = get_auth_header(token)
    # query = f"?q={query}&type=track&limit=50"
    query = artist_id

    query_url = url + query
    result = get(query_url, headers=headers)
    print(result)
    json_result = json.loads(result.content)
    print(json_result)


In [15]:
token = get_token()


In [17]:
result = get_artist(token, '1vl7m6FDEl2YUKnyUuCzrT')

<Response [200]>
{'external_urls': {'spotify': 'https://open.spotify.com/artist/1vl7m6FDEl2YUKnyUuCzrT'}, 'followers': {'href': None, 'total': 2170}, 'genres': [], 'href': 'https://api.spotify.com/v1/artists/1vl7m6FDEl2YUKnyUuCzrT', 'id': '1vl7m6FDEl2YUKnyUuCzrT', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/ab6761610000e5ebac0e056fc79e5e02f3427f65', 'width': 640}, {'height': 320, 'url': 'https://i.scdn.co/image/ab67616100005174ac0e056fc79e5e02f3427f65', 'width': 320}, {'height': 160, 'url': 'https://i.scdn.co/image/ab6761610000f178ac0e056fc79e5e02f3427f65', 'width': 160}], 'name': 'TJ Boyce', 'popularity': 14, 'type': 'artist', 'uri': 'spotify:artist:1vl7m6FDEl2YUKnyUuCzrT'}


Random Song Search Helper Functions

In [10]:

def get_random_search():
    # A list of all characters that can be chosen.
    characters = 'abcdefghijklmnopqrstuvwxyz'
  
    # Gets a random character from the characters string.
    random_character = random.choice(characters)
    random_search = ''

    # Places the wildcard character at the beginning, or both beginning and end, randomly.
    if random.randint(0, 1) == 0:
        random_search = random_character + '%'
    else:
        random_search = '%' + random_character + '%'

    return random_search


Setup Auth & Auto-refresh token

In [11]:
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

Random Song Function (spotipy, spotify api)

In [12]:
def get_random_song() -> list:
    q = get_random_search()
    offset = random.randint(0, 999)
    year = str(2016 + random.randint(0, 5))

    # get bottom 10% popularity album using 'hipster' tag
    album = sp.search(
        q='album:"' + q + '" year:' + year + " tag:hipster",
        limit=1,
        offset=offset,
        type='album',
        market='US'
    )
    artist = album['albums']['items'][0]['artists'][0]['name']
    
    # get genre of album artist
    genres = sp.artist(album['albums']['items'][0]['artists'][0]['id'])['genres']

    # Now get a random song from the album
    album_id = album['albums']['items'][0]['id']
    total_tracks = album['albums']['items'][0]['total_tracks']
    track = sp.album_tracks(
        album_id=album_id,
        limit=1,
        offset=random.randint(0, total_tracks - 1),
        market='US'    
    )
    song = track['items'][0]['name']

    return (genres, artist, song)



In [13]:
q = get_random_search()
offset = random.randint(0, 999)
year = str(2016 + random.randint(0, 5))

# get bottom 10% popularity album using 'hipster' tag
album = sp.search(
    q='album:"' + q + '" year:' + year + " tag:hipster",
    limit=1,
    offset=offset,
    type='album',
    market='US'
)
artist = album['albums']['items'][0]['artists'][0]['name']
print(artist)

TJ Boyce


In [14]:
a_id = album['albums']['items'][0]['artists'][0]['id']

In [16]:
a_id

'1vl7m6FDEl2YUKnyUuCzrT'

In [18]:
genres = sp.artist(artist_id=a_id)


In [19]:
print(genres['genres'])

[]


Save 3000 songs to CSV using Pandas (since there are ~3000 billboard songs)

In [20]:
song_limit = 3000

relative_csv_path = "../data/random_2016-2021_song_artist_genres.csv"
df = pd.read_csv(relative_csv_path)
assert list(df.columns) == ['song', 'artist', 'genres']

i = 0
while df.shape[0] < song_limit:
    try:
        genres, artist, song = get_random_song()
        if genres:
            row = pd.Series(data=[song, artist, genres], index=df.columns)
            df = pd.concat([df, row.to_frame().T], axis=0, ignore_index=True)
            df.to_csv(relative_csv_path, index=False)
    except Exception as e:
        print("ERROR: ", e)
        # break
    i+=1
    print(i, df.shape[0])


1 2464
2 2464
3 2465
4 2465
5 2465
6 2465
7 2465
8 2466
9 2467
10 2468
11 2469
12 2469
13 2469
14 2470
15 2470
16 2471
17 2471
18 2471
19 2472
20 2472
21 2472
22 2473
23 2473
24 2473
25 2474
26 2474
27 2474
28 2474
29 2474
30 2474
31 2475
32 2475
33 2476
34 2476
35 2477
36 2477
37 2477
38 2477
39 2478
40 2478
41 2479
42 2480
43 2480
44 2481
45 2481
46 2481
47 2481
48 2482
49 2482
50 2482
51 2482
52 2483
53 2484
54 2484
55 2485
56 2485
57 2485
58 2486
59 2487
60 2487
61 2487
62 2487
63 2488
64 2488
65 2488
66 2489
67 2490
68 2490
69 2491
70 2491
71 2491
72 2491
73 2491
74 2491
75 2491
76 2491
77 2491
78 2491
79 2491
80 2492
81 2492
82 2492
83 2492
84 2493
85 2494
86 2494
87 2495
88 2495
89 2496
90 2497
91 2497
92 2498
93 2499
94 2499
95 2499
96 2499
97 2500
98 2501
99 2501
100 2501
101 2501
102 2501
103 2501
104 2502
105 2502
106 2502
107 2502
108 2503
109 2503
110 2503
111 2503
112 2503
113 2503
114 2504
115 2504
116 2504
117 2504
118 2504
119 2505
120 2505
121 2506
122 2506
123 2506
1