In [1]:
import pandas as pd
import numpy as np
import random
import altair as alt
import plotly.graph_objects as go
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import networkx as nx
import networkx.algorithms.community as nx_comm
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import plotly.express as px
import pyvis
from pyvis import network as net
from copy import deepcopy
import time
import csv
from sklearn.neighbors import NearestNeighbors

In [2]:
def get_SP(ID, Secret, Name):
    CLIENT_ID = ID
    CLIENT_SECRET = Secret
    my_username = Name

    # instantiating the client.  This 'sp' version of the client is used repeatedly below
    # source: Max Hilsdorf (https://towardsdatascience.com/how-to-create-large-music-datasets-using-spotipy-40e7242cc6a6)
    client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager, status_retries=3, backoff_factor=0.3)
    
    return sp

In [3]:
sp = get_SP("09f53c0a70fc4818ab7438fb2dff64dd", "fcfcaea7cc9a42f697da7ce6d4680d3d", "Fejiro Anigboro")
sp1 = get_SP("922db1c93c90406fb8608e2729577cf4", "8561aa0a985f470f91193d912a494287", "my_spotify_name")
sp2 = get_SP("79724bfb388b451e9d80af9972ed118f", "4c1784725f7a468aa97127ee54d63209", "rami")
sp3 = get_SP("d7e20fdf6b044b8c83c6ef74045bd89d", "bb863640b3844f3a84289182ce5a7c01", "Idan")
sp4 = get_SP("6ca97a1822a849458f3c105d5f83e3c2", "bb57f33f51da4321b14e0f5309abf7ca", "my_spotify_F2")
all_sps = [sp1, sp2, sp3, sp4]

In [4]:
def filter_info(playlist_id, username="Any user"):
    r = sp.user_playlist_tracks(username, playlist_id)
    t = r['items']
    

    info_list = []
    
    
    while r['next']:
        r = sp.next(r)
        t.extend(r['items'])
    for s in t: 
        album_type = s['track']['album']['album_type'] 
        song_id = s["track"]["id"]
        song_name = s["track"]["name"]
        #print(song_name)
        
        artist_name = s["track"]["artists"][0]["name"]
        #print(artist_name)
        
        # this gives the id of an artist
        artist_id = s["track"]["artists"][0]["id"] # use href if I need to get the full api call for this -- split by "/" thne get [-1] element
        #print(artist_id) 
        
        popularity = s["track"]["popularity"]
        
        #print(s)
        
        try:
            # getting the url of the album
            album_id = s['track']['album']['id']
            #print(album_id) # split by same condiiton above
        except:
            album_id = None
           
        
        info_list.append({"song_name" : song_name, "song_id": song_id, "artist" : artist_name, "artist_id": artist_id, "album_type": album_type, "album_id": album_id, "popularity": popularity})
        
    
    return pd.DataFrame(info_list)

In [5]:
test_pl = filter_info("7mVCbkMcSXApmQ08F9uUi5")

In [6]:
def get_artist_top_tracks(artist_id, sp):
    top_hits = sp.artist_top_tracks(artist_id, country='US')
    info_list = []
    
    for track in top_hits['tracks']:
        song_id = track["id"]
        song_name = track["name"]
        artist_name = track["artists"][0]["name"]
        popularity = track["popularity"]
        cover_img = track['album']['images'][1]['url']
        track_preview = track["preview_url"]
        
        info_list.append({"song_name" : song_name, "song_id" : song_id, "artist" : artist_name, "popularity": popularity, "track_preview": track_preview, "track_img": cover_img})
    
    return pd.DataFrame(info_list)

In [7]:
gatt = get_artist_top_tracks("4ETSs924pXMzjIeD6E9b4u", sp)

In [8]:
def csv_writer(info):
    with open('Song_data.csv', 'a', newline='') as file:
        # Step 4: Using csv.writer to write the list to the CSV file
        writer = csv.writer(file)
        writer.writerow(info) # Use writerow for single list

In [9]:
df = pd.read_csv("data/song_info_final.csv")
selected_columns = ['id',
                    'song_name',
                    'danceability',
                     'energy',
                     'key',
                     'loudness',
                     'mode',
                     'speechiness',
                     'acousticness',
                     'instrumentalness',
                     'liveness',
                     'valence',
                     'tempo'
                                         ]

filtered_df = df[selected_columns]

In [34]:
def get_audio_features_slowly(track_info, sp): #replace this(sp) -- since using jhub, can leave this for now.
    track_dict_list = []
    tracker = 0
    for track in track_info:
        tracker += 1
        try:
            audio_features_temp = all_sps[tracker%len(all_sps)].audio_features(track)
            track_dict_list.append(audio_features_temp)
        except Exception as e:
            print(e, track)

        print(tracker)
    
    track_info_df = pd.DataFrame()
    
    for track in track_dict_list:
        track_dict_df = pd.DataFrame(track[0], [0])
        track_info_df = pd.concat([track_info_df, track_dict_df], ignore_index=True)
        
    return get_our_recommended_songs(track_info_df)

In [31]:
def get_our_recommended_songs(track_df):
    matrix_cols = [  'danceability',
                     'energy',
                     'key',
                     'loudness',
                     'mode',
                     'speechiness',
                     'acousticness',
                     'instrumentalness',
                     'liveness',
                     'valence',
                     'tempo'
                                         ]
    our_cols = selected_columns
    our_cols.remove('song_name')
    user_track_df = user_track_df[our_cols]
    user_track_matrix = user_track_df[matrix_cols].to_numpy()
    user_playlist_avg = np.mean(user_track_matrix, axis=0)
    
    database_tracks = pd.read_csv("data/song_info_final.csv")
    database_matrix = database_tracks[matrix_cols].to_numpy()

    return track_df

In [32]:
def userInput(playlist_id):
    playlist_songs = filter_info(playlist_id)
    playlist_songs = playlist_songs.drop_duplicates(subset=['artist_id'])

    top_tracks_artists = []
    
    artist_ids = set(list(playlist_songs["artist_id"]))
    album_ids = set(list(playlist_songs["album_id"]))
    track_ids = set(list(playlist_songs['song_id']))
    
 
    top_hits = pd.DataFrame(columns=["song_name", "song_id", "artist", "popularity"])
    all_album_tracks = pd.DataFrame() 
    
    for artist_id in artist_ids:
        top_tracks_artists.append(pd.DataFrame(get_artist_top_tracks(artist_id, sp)))
    
    for df in top_tracks_artists:
        df.dropna(inplace=True)
        df.sort_values(by='popularity', inplace=True)
        top_hits = pd.concat([top_hits, df.head(2)], ignore_index=True)
        
    top_hits = top_hits.drop_duplicates().sort_values(by='popularity', ignore_index=True)
    
    for album_id in album_ids:
        all_album_tracks = pd.concat([all_album_tracks, get_album_tracks(album_id)], ignore_index=True)
    
    
    all_album_tracks.dropna(inplace=True)
    all_album_tracks = all_album_tracks.drop_duplicates(ignore_index=True)
    
    track_data = get_audio_features_slowly(track_ids, sp2)
    
    return top_hits, all_album_tracks, track_data

In [33]:
user = userInput("7mVCbkMcSXApmQ08F9uUi5")

1
2
3
4
5
6
7
8
9
10
11


UnboundLocalError: local variable 'user_track_df' referenced before assignment

In [None]:
user[2]

NameError: name 'user' is not defined

In [21]:
def get_album_tracks(album_id):
    album_info = sp.album(album_id=album_id)
    album_track_info = album_info['tracks']['items']
    track_img = album_info['images'][1]['url']
    album_df = pd.DataFrame(columns=["song_name", "song_id", "track_preview", "track_img"])
    index = 0
    
    for track in album_track_info:
        df = pd.DataFrame({"song_name": track["name"] , "song_id": track["id"], "track_preview": track['preview_url'], "track_img": track_img}, [index])
        album_df = pd.concat([album_df, df], ignore_index=True)
        index += 1
        
    return album_df

In [22]:
gat = get_album_tracks("6P1sBa0T1fRooA0UTAQfOu")

In [23]:
gat

Unnamed: 0,song_name,song_id,track_preview,track_img
0,My Love,5p0ietGkLNEqx1Z7ijkw5g,https://p.scdn.co/mp3-preview/08a6e25e893d1b3d...,https://i.scdn.co/image/ab67616d00001e0215f19e...
1,What Makes a Man,3W2QKq6ks7QInnM9j4pOBt,https://p.scdn.co/mp3-preview/14002d28bb45df54...,https://i.scdn.co/image/ab67616d00001e0215f19e...
2,I Lay My Love on You - Remix,0V82wcNlunw76nvvmPL9tk,https://p.scdn.co/mp3-preview/2ef349405c09ef13...,https://i.scdn.co/image/ab67616d00001e0215f19e...
3,Against All Odds (Take A Look at Me Now) (feat...,0Ac0Ge47UpkVceZyPin7IP,https://p.scdn.co/mp3-preview/53930a96590ebd7d...,https://i.scdn.co/image/ab67616d00001e0215f19e...
4,When You're Looking Like That - Single Remix,5t3oszlshIPTzpAwcCMqgw,https://p.scdn.co/mp3-preview/aa18d47a224bd9ce...,https://i.scdn.co/image/ab67616d00001e0215f19e...
5,Close,491wygUI74zXsPjtl28sRL,https://p.scdn.co/mp3-preview/6a6cb8b812539b79...,https://i.scdn.co/image/ab67616d00001e0215f19e...
6,Somebody Needs You,5vFxnu2jkxn8vTA185oH10,https://p.scdn.co/mp3-preview/8ebd84c95ffbcd1c...,https://i.scdn.co/image/ab67616d00001e0215f19e...
7,Angels Wings,5FfyTkLyYtIIw26hVTcTyl,https://p.scdn.co/mp3-preview/89af7a6c027cf3c4...,https://i.scdn.co/image/ab67616d00001e0215f19e...
8,Soledad,1vYZywcjdbPUnsEP43it0q,https://p.scdn.co/mp3-preview/92375e6aa9a8756f...,https://i.scdn.co/image/ab67616d00001e0215f19e...
9,Puzzle of My Heart,3rn07BEDuQLxbpKsStMido,https://p.scdn.co/mp3-preview/55f144d3bde42553...,https://i.scdn.co/image/ab67616d00001e0215f19e...


In [None]:
sp.recommendations(seed_tracks = ['0ofbQMrRDsUaVKq2mGLEAb'], limit=5, country="US")['tracks'][0]

{'album': {'album_type': 'SINGLE',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7CajNmpbOovFoOoasH2HaY'},
    'href': 'https://api.spotify.com/v1/artists/7CajNmpbOovFoOoasH2HaY',
    'id': '7CajNmpbOovFoOoasH2HaY',
    'name': 'Calvin Harris',
    'type': 'artist',
    'uri': 'spotify:artist:7CajNmpbOovFoOoasH2HaY'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/2wY79sveU1sp5g7SokKOiI'},
    'href': 'https://api.spotify.com/v1/artists/2wY79sveU1sp5g7SokKOiI',
    'id': '2wY79sveU1sp5g7SokKOiI',
    'name': 'Sam Smith',
    'type': 'artist',
    'uri': 'spotify:artist:2wY79sveU1sp5g7SokKOiI'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/3KedxarmBCyFBevnqQHy3P'},
    'href': 'https://api.spotify.com/v1/artists/3KedxarmBCyFBevnqQHy3P',
    'id': '3KedxarmBCyFBevnqQHy3P',
    'name': 'Jessie Reyez',
    'type': 'artist',
    'uri': 'spotify:artist:3KedxarmBCyFBevnqQHy3P'}],
  'external_urls': {'spotify': 'https://ope

In [None]:
spr = sp.recommendations(seed_tracks = list(df['id'])[:5], limit=5, country="US")['tracks'][0]
print(spr.keys())
print(spr['name'])
print(spr['id'])
print(spr['external_urls']['spotify'])
print(spr['album']['images'][0]['url'])

dict_keys(['album', 'artists', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'is_playable', 'linked_from', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
7 Years
5kqIPrATaCc2LqxVWzQGbk
https://open.spotify.com/track/5kqIPrATaCc2LqxVWzQGbk
https://i.scdn.co/image/ab67616d0000b2739c2ba50154a64c3c898ec7e2


In [None]:
matrix_cols = [  'danceability',
                 'energy',
                 'key',
                 'loudness',
                 'mode',
                 'speechiness',
                 'acousticness',
                 'instrumentalness',
                 'liveness',
                 'valence',
                 'tempo'
                                     ]

filtered_df_matrix = filtered_df[matrix_cols].to_numpy()

In [None]:
filtered_df_matrix[1]

array([ 6.8700e-01,  8.4500e-01,  7.0000e+00, -4.3700e+00,  1.0000e+00,
        5.7600e-02,  1.0000e-01,  0.0000e+00,  4.5200e-02,  8.0900e-01,
        8.7972e+01])

In [None]:
neigh = NearestNeighbors(n_neighbors=3)
neigh.fit(filtered_df_matrix)
neigh.kneighbors([[ 6.150e-01,  7.790e-01,  2.000e+00, -6.454e+00,  1.000e+00,
        1.350e-01,  6.650e-02,  0.000e+00,  1.550e-01,  4.530e-01,
        1.600e+02], [ 6.8700e-01,  8.4500e-01,  7.0000e+00, -4.3700e+00,  1.0000e+00,
        5.7600e-02,  1.0000e-01,  0.0000e+00,  4.5200e-02,  8.0900e-01,
        8.7972e+01] , [ 6.8700e-01,  8.4500e-01,  7.0000e+00, -4.3700e+00,  1.0000e+00,
        5.7600e-02,  1.0000e-01,  0.0000e+00,  4.5200e-02,  8.0900e-01,
        8.7972e+01]], return_distance=False)

array([[   0,  582, 3945],
       [   1, 6244, 4079],
       [   1, 6244, 4079]], dtype=int64)

In [None]:
filtered_df_matrix[582]

array([ 6.13000e-01,  7.64000e-01,  2.00000e+00, -6.50900e+00,
        1.00000e+00,  1.36000e-01,  5.27000e-02,  0.00000e+00,
        1.97000e-01,  4.17000e-01,  1.60015e+02])

In [None]:
filtered_df.iloc[0], filtered_df.iloc[582], filtered_df.iloc[3945]

(id                  7zgqtptZvhf8GEmdsM2vp2
 song_name                 ...Ready For It?
 danceability                         0.615
 energy                               0.779
 key                                      2
 loudness                            -6.454
 mode                                     1
 speechiness                          0.135
 acousticness                        0.0665
 instrumentalness                       0.0
 liveness                             0.155
 valence                              0.453
 tempo                                160.0
 Name: 0, dtype: object,
 id                  2yLa0QULdQr0qAIvVwN6B5
 song_name                 ...Ready For It?
 danceability                         0.613
 energy                               0.764
 key                                      2
 loudness                            -6.509
 mode                                     1
 speechiness                          0.136
 acousticness                        0.0527
 instru