In [None]:
#setting up spotipy
import sys
import spotipy
import spotipy.util as util

scope = 'user-library-read'

if len(sys.argv) > 1:
    username = sys.argv[1]
else:
    print ("Usage: %s username" % (sys.argv[0],))
    sys.exit()
    
token = util.prompt_for_user_token(username,scope,client_id='ff9ff248e80e428bbb9796e1a7d62aeb',
                                   client_secret='7f70a7c884494c6c96d2e6f03c4d2388', redirect_uri='http://localhost/')

In [None]:
username

In [None]:
sp = spotipy.Spotify(auth=token)

In [None]:
import pandas as pd
import numpy as np

sample = pd.read_csv('sample.csv')

In [None]:
feature_names = ['duration_ms', 'key', 'mode', 'time_signature', 'acousticness', 'danceability', 
                'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'valence', 'tempo']

In [None]:
def get_features_with_id(id):
    track = sp.audio_features(id)[0]
    
    return [track[feature] for feature in feature_names]
    

In [None]:
column_names = list(sample.columns)
column_names.extend(feature_names)
sample = sample.reindex(columns = column_names)

In [None]:
sample[feature_names] = sample['Spotify_Track_ID'].apply(lambda x: pd.Series(get_features_with_id(x)))

In [None]:
column_names = list(sample.columns)
column_names.extend(feature_names)
sample.reindex(columns = column_names)

In [None]:
analysis = sp.audio_analysis('2XkuSbp5say8nZW8g6156Z')

In [None]:
billboard_weekly = pd.read_csv('billboard_weekly.csv')
billboard_weekly.head()

In [None]:
def get_track_id(track_name = "", artist = ""):
    # 'featuring' problem
    if 'Featuring' in artist:
        artist = artist.split('Featuring')[0]

#     print(track_name, artist)
    
    q = track_name + ' ' + artist
    search_results = sp.search(q)
    
    #Index Error
    try:
        track_id = search_results['tracks']['items'][0]['id']
    except IndexError:
        track_id = 'null'
    return track_id

In [None]:
%%timeit
billboard_weekly['sp_id'] = billboard_weekly.apply(lambda row: get_track_id(row['Song'], row['Performer']), axis=1)

In [None]:
# parallel computing:
from multiprocessing import Pool

def get_sp_id(df):
    df['sp_id'] = df.apply(lambda row: get_track_id(row['Song'], row['Performer']), axis=1)
    return df

def parallelize_dataframe(df, func, start_time, n_cores=4):
    df_split = np.array_split(df, n_cores)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df

In [None]:
%%timeit
billboard_weekly_with_id = parallelize_dataframe(billboard_weekly, get_sp_id, 4)

In [None]:
def index_marks(nrows, chunk_size):
    return range(1 * chunk_size, (nrows // chunk_size + 1) * chunk_size, chunk_size)

def split(dfm, chunk_size):
    indices = index_marks(dfm.shape[0], chunk_size)
    print(dfm.shape[0])
    return np.split(dfm, indices)

chunks = split(billboard_weekly, 10000)

In [None]:
for c in chunks:
    
    print("Shape: {}; {}".format(c.shape, c.index))


In [None]:
count = 0
for c in chunks:
    count += 1
    print('Chunk #: ' + str(count))
    token = util.prompt_for_user_token(username,scope,client_id='ff9ff248e80e428bbb9796e1a7d62aeb',
                                   client_secret='7f70a7c884494c6c96d2e6f03c4d2388', redirect_uri='http://localhost/')
    sp = spotipy.Spotify(auth=token)
    c = parallelize_dataframe(c, get_sp_id, 4)

In [None]:
sample = billboard_weekly.sample(n=5000, random_state=2019)

In [None]:
%%time
sample = parallelize_dataframe(sample, get_sp_id, 4)

In [None]:
sample.head()

In [None]:
sample.to_csv('billboard_weekly_sample.csv')

In [None]:
sample[sample['sp_id'] == 'null']

In [None]:
sp.search('Oochie Wally QB Finest')

In [None]:
sp.search('Don\'t Set Me Free Ray Charles')