In [None]:
# !pip install spotipy
# !pip install pandas
# !pip install scipy
# !pip install ipywidgets
# !pip install voila
# !jupyter serverextension enable --sys-prefix voila 
# !pip install sklearn
# #!pip install python-graphviz
# !pip install graphviz
# !pip install pydot
# !python --version
# !pip install python-dotenv

In [None]:
%load_ext dotenv
%dotenv

In [None]:
import spotipy
import pandas as pd 
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import numpy as np 
import json
import time

from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt
import seaborn as sns

from scipy import misc

from sklearn.metrics import accuracy_score
from PIL import Image as PImage
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.ensemble import GradientBoostingClassifier
 

from sklearn.metrics import accuracy_score

import ipywidgets as widgets
from ipywidgets import HBox, VBox, Layout, HTML

from dotenv import dotenv_values

In [None]:
config = dotenv_values(".env")

In [None]:
client_credentials_manager = SpotifyClientCredentials(client_id=config["CID"], client_secret=config["SECRET"]) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
def fetch_playlist_tracks(sp,playlistsid): 
    offset = 0
    tracks = []
    while True:
            content = sp.playlist_tracks(playlistsid, fields=None, limit=100, offset=offset, market=None)
            tracks += content['items']
        
            if content['next'] is not None:
                offset += 100
            else:
                break
    
    track_id = []
    track_name = []
    track_pop = []
    track_artist = []
    track_artist_id = []
    track_url = []
    
    for track in tracks:
        track_id.append(track['track']['id'])
        track_name.append(track['track']['name'])
        track_pop.append(track['track']['popularity'])
        track_artist.append(track['track']['artists'][0]['name'])
        track_artist_id.append(track['track']['artists'][0]['id'])
        track_url.append(track['track']['external_urls']['spotify'])

    df_playlists_tracks = pd.DataFrame({"track_id":track_id, "track_name": track_name, "track_popularity":track_pop, "track_artist":track_artist, "track_artist_id":track_artist_id, "track_url":track_url})

    return df_playlists_tracks


In [None]:
def get_track_df(playlist_ids):

    frames = []
    for i, id_ in enumerate(playlist_ids):

        playlist_df = fetch_playlist_tracks(sp,id_)
        frames.append(playlist_df)


    df = pd.concat(frames, ignore_index=True)
    return df 

In [None]:
def get_track_features(tracks_df,target):
    features = []
    tracks = tracks_df.track_id.to_list()
    track_pops = tracks_df.track_popularity.to_list()
    artist_ids = tracks_df.track_artist_id.to_list()
    j = 0
    for i in range(0,len(tracks),50):

        audio_features = sp.audio_features(tracks[i:i+50])
        for track in audio_features:
            #print(j, i, track)
            features.append(track)
            features[-1]['trackPopularity'] = track_pops[j] 
            features[-1]['target'] = target # 1/0 - good/bad
            j += 1

    df = pd.DataFrame(features)

    df.dropna(inplace=True)

    return df

In [None]:
# Gettting track info and track features from a list of good playlist IDs
def compile_good_bad_data(good_playlist_ids, bad_playlist_ids):
    frames = []
    good_df = get_track_df(good_playlist_ids)
    good_track_df = good_df

    good_df_feats = get_track_features(good_track_df, 1)

    frames.append(good_df_feats)

    bad_df = get_track_df(bad_playlist_ids)

    bad_track_df = bad_df
    bad_df_feats = get_track_features(bad_track_df, 0)

    frames.append(bad_df_feats)
    return frames 

In [None]:
# Func to generate graphs 

def get_graphs(trainingData):
    feats = ["tempo","danceability", "loudness", "valence", "energy", "instrumentalness", "acousticness", "key", "speechiness", "trackPopularity"]
    pos_tempo = trainingData[trainingData['target'] == 1]['tempo']
    neg_tempo = trainingData[trainingData['target'] == 0]['tempo']
    pos_dance = trainingData[trainingData['target'] == 1]['danceability']
    neg_dance = trainingData[trainingData['target'] == 0]['danceability']
    pos_loudness = trainingData[trainingData['target'] == 1]['loudness']
    neg_loudness = trainingData[trainingData['target'] == 0]['loudness']
    pos_speechiness = trainingData[trainingData['target'] == 1]['speechiness']
    neg_speechiness = trainingData[trainingData['target'] == 0]['speechiness']
    pos_valence = trainingData[trainingData['target'] == 1]['valence']
    neg_valence = trainingData[trainingData['target'] == 0]['valence']
    pos_energy = trainingData[trainingData['target'] == 1]['energy']
    neg_energy = trainingData[trainingData['target'] == 0]['energy']
    pos_acousticness = trainingData[trainingData['target'] == 1]['acousticness']
    neg_acousticness = trainingData[trainingData['target'] == 0]['acousticness']
    pos_key = trainingData[trainingData['target'] == 1]['key']
    neg_key = trainingData[trainingData['target'] == 0]['key']
    pos_instrumentalness = trainingData[trainingData['target'] == 1]['instrumentalness']
    neg_instrumentalness = trainingData[trainingData['target'] == 0]['instrumentalness']
    

    fig2 = plt.figure(figsize=(12,12))
    
    alp = 0.7
    
    # plot tempo
    ax1 = fig2.add_subplot(331)
    #ax1.set_xlabel("Dancability")
    ax1.set_ylabel("# of Songs")
    ax1.set_title("Tempo Distribution")
    pos_tempo.hist(alpha=alp, bins=30, label='Like')
    ax2 = fig2.add_subplot(331)
    neg_tempo.hist(alpha=alp, bins=30, label='Dislike')
    plt.legend(loc='upper right')
    
    #Key
    ax15 = fig2.add_subplot(332)
    #ax15.set_xlabel('Key')
    ax15.set_ylabel('Count')
    ax15.set_title('Key Distribution')
    pos_key.hist(alpha=alp, bins=30)
    ax16 = fig2.add_subplot(332)
    neg_key.hist(alpha=alp, bins=30)
    
    #Danceability
    ax3 = fig2.add_subplot(333)
    #ax3.set_xlabel('Danceability')
    ax3.set_ylabel('# of Songs')
    ax3.set_title('Danceability Distribution')
    pos_dance.hist(alpha= alp, bins=30)
    ax4 = fig2.add_subplot(333)
    neg_dance.hist(alpha= alp, bins=30)

    #Loudness
    ax7 = fig2.add_subplot(334)
    #ax7.set_xlabel('Loudness')
    ax7.set_ylabel('# of Songs')
    ax7.set_title('Loudness Distribution')
    pos_loudness.hist(alpha=alp, bins=30)
    ax8 = fig2.add_subplot(334)
    neg_loudness.hist(alpha=alp, bins=30)

    #Speechiness
    ax9 = fig2.add_subplot(335)
    #ax9.set_xlabel('Speechiness')
    ax9.set_ylabel('Count')
    ax9.set_title('Speechiness Distribution')
    pos_speechiness.hist(alpha=alp, bins=30)
    ax10 = fig2.add_subplot(335)
    neg_speechiness.hist(alpha=alp, bins=30)

#     #Valence
#     ax11 = fig2.add_subplot(335)
#     #ax11.set_xlabel('Valence')
#     ax11.set_ylabel('Count')
#     ax11.set_title('Valence Distribution')
#     pos_valence.hist(alpha=alp, bins=30)
#     ax12 = fig2.add_subplot(335)
#     neg_valence.hist(alpha=alp, bins=30)

    #Energy
    ax13 = fig2.add_subplot(336)
    ax13.set_xlabel('Energy')
    ax13.set_ylabel('Count')
    ax13.set_title('Energy Distribution')
    pos_energy.hist(alpha=alp, bins=30)
    ax14 = fig2.add_subplot(336)
    neg_energy.hist(alpha=alp, bins=30)

    
    
    return fig2 



In [None]:
def training_data(good_playlist_ids, bad_playlist_ids, features):
    frames = compile_good_bad_data(good_playlist_ids, bad_playlist_ids)
    trainingData = pd.concat(frames, ignore_index=True)
    graphs = get_graphs(trainingData)
    
    # print(trainingData.head())
    # print(len(trainingData))

    train, test = train_test_split(trainingData,test_size = 0.2)
    #print("Training size: {}, Test size: {}".format(len(train),len(test)))
    #Define the set of features that we want to look at


    # train test split
    x_train = train[features]
    y_train = train["target"]

    x_test = test[features]
    y_test = test["target"]

    return x_train, y_train, x_test, y_test, graphs 

In [None]:
def choose_best_model(x_train, y_train, x_test, y_test):

    models = [DecisionTreeClassifier(min_samples_split=100), KNeighborsClassifier(3), 
            RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), 
            AdaBoostClassifier(n_estimators=100), GaussianNB(), KMeans(n_clusters=3, random_state=0), 
            GradientBoostingClassifier(n_estimators=100, learning_rate=.1, max_depth=1, random_state=0)]
    best_score = {'model':'','score':0}
    for model in models:
        
        model.fit(x_train, y_train)
        predicted = model.predict(x_test)
        score = accuracy_score(y_test, predicted)*100

        if score > best_score['score']:

            best_score['model'] = model 
            best_score['score'] = score 

    #print(len(models))
    return best_score['model']

In [None]:
# Gettting track info and track features from a playlist to reccomend songs - using my daily mix 1

def get_reccomendation_data(sp, test_playlist_ids, features):  
    test_df = get_track_df(test_playlist_ids)
    test_df_feats = get_track_features(test_df, 1)
    test_df[features] = test_df_feats[features]

    return test_df

In [None]:
#Define the set of features that we want to look at
def predict_results(model, test_df, features):

    pred = model.predict(test_df[features])
    prob = model.predict_proba(test_df[features])

    prob_df = pd.DataFrame(prob)
    prob_df.columns = [['Dislike','Like']]
    return pred, prob_df

def make_recommendations(pred, prob_df, test_df):
    mean = prob_df.iloc[:,-1].mean()
    data = []
    for i,prediction in enumerate(pred):
        if prediction == 1 and prob_df.iloc[i,-1] > mean:
            prob_like = round(prob_df.iloc[i,-1] * 100,1)
            data.append([prob_like, test_df['track_name'][i], test_df['track_artist'][i], test_df['track_url'][i]])

        elif prediction == 0 and prob_df.iloc[i,0] > 0.7:
            dislike_prob = round(prob_df.iloc[i,0] * 100, 1)
            
    df = pd.DataFrame(data)

    return df 

In [None]:

def make_clickable(val):
    return '<a href="{}">Link</a>'.format(val)
    #return "<a href=%s target='_blank'>View Track</a>" % (val) 

def run_model(good_playlist_ids, bad_playlist_ids, search_playlist_ids):

    #features
    features = ["tempo","danceability", "loudness", "valence", "energy", "instrumentalness", "acousticness", "key", "speechiness", "trackPopularity"]

    test_df = get_reccomendation_data(sp, search_playlist_ids, features)
    print('Got test df')

    # training data and model 
    x_train, y_train, x_test, y_test, graphs = training_data(good_playlist_ids, bad_playlist_ids, features)
    model = choose_best_model(x_train, y_train, x_test, y_test)
    print('got trained model')

    pred, prob_df = predict_results(model, test_df, features)
    print('made predictions')

    #make reccomentdations
    recc_df = make_recommendations(pred, prob_df, test_df)
    print('made recc df')

    recc_df = recc_df.sort_values(by=[0], ascending=False, na_position='first')
    headers = ['Like Probability', 'Track Name', 'Artist', 'Track URL']
    recc_df.columns = headers


    recc_df.reset_index(inplace=True)
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    recc_df = recc_df.iloc[:,1:]

    return recc_df, graphs

In [None]:
def url_to_id(urls):
    ids = []
    for url in urls:
        if len(url) > 5:
            elems = url.split('/')
            id = elems[-1]
            ids.append(id)

    return ids 

In [None]:
# DF Styling

my_style = """background-color: rgba(0, 0, 0, 0);
border-bottom-color: rgb(0, 0, 0);
border-bottom-style: none;
border-bottom-width: 0px;
border-collapse: collapse;
border-image-outset: 0px;
border-image-repeat: stretch;
border-image-slice: 100%;
border-image-source: none;
border-image-width: 1;
border-left-color: rgb(0, 0, 0);
border-left-style: none;
border-left-width: 0px;
border-right-color: rgb(0, 0, 0);
border-right-style: none;
border-right-width: 0px;
border-top-color: rgb(0, 0, 0);
border-top-style: none;
border-top-width: 0px;
box-sizing: border-box;
color: rgb(0, 0, 0);
display: table;
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 12px;
height: 1675px;
line-height: 20px;
margin-left: 0px;
margin-right: 0px;
margin-top: 12px;
table-layout: fixed;
text-size-adjust: 100%;
width: 700px;
-webkit-border-horizontal-spacing: 0px;
-webkit-border-vertical-spacing: 0px;
-webkit-tap-highlight-color: rgba(0, 0, 0, 0);"""

def HTML_with_style(df, style=None, random_id=None):
    from IPython.display import HTML
    import numpy as np
    import re

    df_html = df.to_html(escape=False)

    if random_id is None:
        random_id = 'id%d' % np.random.choice(np.arange(1000000))

    if style is None:
        style = """
        <style>
            table#{random_id} {{color: blue}}
        </style>
        """.format(random_id=random_id)
    else:
        new_style = []
        s = re.sub(r'</?style>', '', style).strip()
        for line in s.split('\n'):
                line = line.strip()
                if not re.match(r'^table', line):
                    line = re.sub(r'^', 'table ', line)
                new_style.append(line)
        new_style = ['<style>'] + new_style + ['</style>']

        style = re.sub(r'table(#\S+)?', 'table#%s' % random_id, '\n'.join(new_style))

    df_html = re.sub(r'<table', r'<table id=%s ' % random_id, df_html)

    return HTML(style + df_html)


In [None]:
def btn_click_run(change):

    out_pl2.clear_output()
    out_pl.clear_output()
    graph_pl.clear_output()
    feat_pl.clear_output()
    
    with out_pl:
        display(widgets.Label('Running...'))
    good_list = [i.value for i in left_items[1:] if i.value != '']
    bad_list = [i.value for i in right_items[1:] if i.value != '']
    search_list = [i.value for i in search_items[1:] if i.value != '']

    good_playlist_ids = url_to_id(good_list)
    bad_playlist_ids = url_to_id(bad_list)
    search_playlist_ids = url_to_id(search_list)

    df_out, graphs_out = run_model(good_playlist_ids, bad_playlist_ids, search_playlist_ids)

    with graph_pl:
        display(graphs_out)
        #display(graph_box)
    with feat_pl:
        display(feat_box)
        
    df_out['Track URL'] = '<a href=' + df_out['Track URL'] + '><div>' + 'Link' + '</div></a>'
 
    print(df_out)
    
    out_pl2.clear_output()
    with out_pl2: 
        df = HTML_with_style(df_out, '<style>table {{{}}}</style>'.format(my_style))
        display(df)
        
 
    out_pl.clear_output() 

    

In [None]:
good1 = widgets.Text(placeholder='Good Playlist URL 1 (Required)', description='', disabled=False)

good2 = widgets.Text(placeholder='Good Playlist URL 2', description='',disabled=False)

good3 = widgets.Text(placeholder='Good Playlist URL 3', description='', disabled=False)

good4 = widgets.Text(placeholder='Good Playlist URL 4', description='', disabled=False)

bad1 = widgets.Text(placeholder='Bad Playlist URL 1 (Required)', description='', disabled=False)

bad2 = widgets.Text(placeholder='Bad Playlist URL 2', description='', disabled=False)

bad3 = widgets.Text(placeholder='Bad Playlist URL 3', description='', disabled=False)

bad4 = widgets.Text(placeholder='Bad Playlist URL 4', description='', disabled=False)



In [None]:

top_items_layout = Layout(width='auto')

top_box_layout = Layout(display='flex',flex_flow='columns', justify_content='center')

left1 = widgets.HTML(value='<b>Tempo:</b> Speed of track measured in BPM')
left2 = widgets.HTML(value='<b>Key:</b> Track key in Pitch Standard notation')
left3 = widgets.HTML(value='<b>Danceability:</b> 0.0 = Lowest, 1.0 = Highest')
left_feat_items = [left1, left2, left3]
left_feat_box = VBox(left_feat_items)

right1 = widgets.HTML(value='<b>Loudness:</b> Average decible (dB) of track')
right2 = widgets.HTML(value='<b>Speechiness:</b> Scale of vocal presence in track')
right3 = widgets.HTML(value='<b>Energy:</b> Measure of track intenisty')
right_feat_items = [right1, right2, right3]


right_feat_box = VBox(right_feat_items)

feat_box = HBox([left_feat_box,widgets.Label(' '), right_feat_box], layout=top_box_layout)

search_playlist = widgets.Text(placeholder='Enter a playlist URL', description='', disabled=False)
search_items = [widgets.HTML(value='<b>Search Playlist:</b>'),search_playlist]
search_box = VBox(search_items)
  
submit_btn = widgets.Button(description='Submit',button_style='info')
submit_box = VBox([widgets.Label(''),submit_btn])#, layout=top_box_layout)
search_submit = HBox([search_box,widgets.Label(''),submit_box])#,layout=top_box_layout)

out_pl = widgets.Output()
graph_pl = widgets.Output()
out_pl2 = widgets.Output()
feat_pl = widgets.Output()

inp_item0 = widgets.HTML(value='<b>Model Inputs:</b>')
inp_item1 = widgets.HTML(value=' 1.) Paste link(s) to public Spotify playlists you enjoy')
inp_item2 = widgets.HTML(value=" 2.) Paste link(s) to public Spotify playlists you don't enjoy")
inp_item3 = widgets.HTML(value=" 3.) Link a playlist to find songs from (ex. discover weekly)")
inp_item4 = widgets.HTML(value=" 4.) Submit")

inp_items = [widgets.Label(''),inp_item0,inp_item1,inp_item2,inp_item3,inp_item4]
inp_box = VBox(inp_items)

out_item0 = widgets.HTML(value='<b>Model Outputs:</b>')
out_item1 = widgets.HTML(value=' - The model will analyze the features of the songs')
out_item2 = widgets.HTML(value=" - It will learn what you like and dislike")
out_item3 = widgets.HTML(value=" - Graph the individual features")
out_item4 = widgets.HTML(value=" - Provide reccomendations from the search playlist")

out_items = [widgets.Label(''),out_item0,out_item1,out_item2,out_item3,out_item4]
out_box = VBox(out_items)


left_items = [widgets.HTML(value='<b>Good Playlists:</b>'), good1, good2, good3, good4]
right_items = [widgets.HTML(value='<b>Bad Playlists:</b>'), bad1, bad2, bad3, bad4]

left_side = VBox(left_items, layout=top_box_layout)
right_side = VBox(right_items, layout=top_box_layout)

good_bad_box = HBox([left_side, widgets.Label(' '), right_side])

top_items = [widgets.Label(''), good_bad_box, widgets.Label(''), search_submit]
master_top1 = VBox(top_items)

master_top = HBox([inp_box,widgets.Label(''), master_top1,widgets.Label(''),widgets.Label(''), out_box], layout=top_box_layout)

graph_feat_box = VBox([graph_pl, feat_pl])

master_bottom = HBox([graph_feat_box,widgets.Label(''),out_pl2])

master = VBox([master_top, widgets.Label(''),master_bottom])

display(master)
submit_btn.on_click(btn_click_run)
