In [31]:
import numpy as np
import pandas as pd
import random
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from tqdm import tqdm
SPOTIPY_CLIENT_ID='4f89cd57785747e1b1d7ee0b95e61985'
secret = '61708a8ace7647ba888be760fbc822d1'
genres_to_collect = ('alt-rock','classical', 'country',
                       'edm', 'heavy-metal',  'hip-hop',
                       'latin')

In [32]:
def create_credentials_obj():
    """
    return a spotipy object
    """
    client_credentials_manager = SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=secret)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    return sp

In [33]:
def collect_seed_tracks(sp, genre_list, num_tracks_per_genre):
    """
    provide a genre list, number of tracks per genre
    return a dataframe of artist, track, genre
    """
    artist_genre_id_df = pd.DataFrame(columns=['artist_name', 'artist_id', 'track_name', 'track_id', 'genre'])
    if num_tracks_per_genre > 100:
        num_iterations = round(num_tracks_per_genre/100)
        num_tracks_per_genre = 100
    else:
        num_iterations = 1

    track_id_list = []
    for i in tqdm(range(num_iterations)):
        for genre in genre_list:
            tempRecs = sp.recommendations(seed_genres=[genre], limit=num_tracks_per_genre, market='US')
            for track in tempRecs['tracks']:
                temp_artist = track['artists'][0]['name']
                temp_artist_id = track['artists'][0]['id']
                temp_artist_id = track['artists'][0]['id']
                temp_track_name = track['name']
                temp_track_id = track['id']
                # only assign track if it is not already in the list 
                # this avoids having tracks assigned to multiple genres
                if temp_track_id not in set(track_id_list):
                    track_id_list.append(temp_track_id)
                    # add row to dataframe
                    artist_genre_id_df.loc[len(artist_genre_id_df.index)] = [temp_artist, temp_artist_id, temp_track_name, temp_track_id, genre]
    return artist_genre_id_df


In [34]:
def collect_track_attributes(sp, artist_track_df):
    """
    provide a dataframe containing track_id
    return a merged df containing both features and artist info
    """
    num_tracks = len(artist_track_df)
    num_groups = int(num_tracks/95)
    track_features_df = pd.DataFrame()
    subDFs = np.array_split(artist_track_df, num_groups)
    for df in tqdm(subDFs):
        temp_features_df = pd.DataFrame(sp.audio_features(df['track_id'].values))
        temp_features_df.rename(columns={'id': 'track_id'}, inplace=True)
        track_features_df = pd.concat([track_features_df, temp_features_df])
        # save the csv incase it gets stuck along the way.
        track_features_df.to_csv('iterative_features.csv')
    result_df = pd.merge(track_features_df, artist_track_df, how='inner', on='track_id')
    result_df.to_csv('7_unique_genre_attributes.csv')
    return result_df

In [35]:
# integrate the pipeline together

# step 1 create a spotipy instance
sp_creds = create_credentials_obj()
artist_track_df = collect_seed_tracks(sp_creds, genres_to_collect, 10000)
feature_artist_track_df = collect_track_attributes(sp_creds, artist_track_df)




100%|██████████| 100/100 [05:34<00:00,  3.34s/it]
100%|██████████| 45/45 [00:11<00:00,  4.07it/s]


In [36]:
feature_artist_track_df.groupby('genre').count()
print(len(feature_artist_track_df))

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,track_id,uri,track_href,analysis_url,duration_ms,time_signature,artist_name,artist_id,track_name
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alt-rock,500,500,500,500,500,500,500,500,500,500,...,500,500,500,500,500,500,500,500,500,500
classical,517,517,517,517,517,517,517,517,517,517,...,517,517,517,517,517,517,517,517,517,517
country,836,836,836,836,836,836,836,836,836,836,...,836,836,836,836,836,836,836,836,836,836
edm,105,105,105,105,105,105,105,105,105,105,...,105,105,105,105,105,105,105,105,105,105
heavy-metal,598,598,598,598,598,598,598,598,598,598,...,598,598,598,598,598,598,598,598,598,598
hip-hop,860,860,860,860,860,860,860,860,860,860,...,860,860,860,860,860,860,860,860,860,860
latin,867,867,867,867,867,867,867,867,867,867,...,867,867,867,867,867,867,867,867,867,867
