In [178]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import json
import spotipy

In [179]:
# path to playlist file
root_path = !pwd
root_path = str(root_path[0])

In [180]:
# filename 
filepath = f"{root_path}/playlist_data/sampledata/"
filename = 'mpd.slice.0-999.json'

In [181]:
# path + filename
fpath_name = f"{filepath}{filename}"
# print(fpath_name)

In [182]:
# Open file of playlist and obtain fields of json file 
with open(fpath_name) as data_file:    
    data = json.load(data_file)  

In [183]:
# # Explore fields in json file
# print("File keys: ", list(data.keys()))
# print("info: ",data[list(data.keys())[0]])
# #print(f"{list(data.keys())[1]} : ",data[list(data.keys())[1]])
# print("playlists: ",list(data['playlists'][0].keys()))
# print("tracks: ", list(data['playlists'][0]['tracks'][0].keys()))

In [184]:
# Extract track features
num_keys_old = 0
for index, playlist in enumerate(data['playlists']):
    num_keys =  len(playlist.keys())
    if index == 0:
        num_key_old = num_keys
        continue
    if num_keys > num_key_old:
        keys = playlist.keys()
    num_key_old = num_keys

In [185]:
# Construct keys of playlist
keys = list(keys)
keys.remove('tracks')
print(" Playlist keys: ", keys)

 Playlist keys:  ['name', 'collaborative', 'pid', 'modified_at', 'num_tracks', 'num_albums', 'num_followers', 'num_edits', 'duration_ms', 'num_artists', 'description']


In [186]:
# Create dataframe with track and playlist info
music_df = pd.json_normalize(data['playlists'],  meta = keys, meta_prefix = 'playlist_', errors='ignore', record_path=['tracks'], record_prefix = 'track_')
music_df.head()

Unnamed: 0,track_pos,track_artist_name,track_track_uri,track_artist_uri,track_track_name,track_album_uri,track_duration_ms,track_album_name,playlist_name,playlist_collaborative,playlist_pid,playlist_modified_at,playlist_num_tracks,playlist_num_albums,playlist_num_followers,playlist_num_edits,playlist_duration_ms,playlist_num_artists,playlist_description
0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks,False,0,1493424000,52,47,1,6,11532414,37,
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone,Throwbacks,False,0,1493424000,52,47,1,6,11532414,37,
2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit),Throwbacks,False,0,1493424000,52,47,1,6,11532414,37,
3,3,Justin Timberlake,spotify:track:1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,267266,Justified,Throwbacks,False,0,1493424000,52,47,1,6,11532414,37,
4,4,Shaggy,spotify:track:1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,227600,Hot Shot,Throwbacks,False,0,1493424000,52,47,1,6,11532414,37,


In [187]:
# Check column values 
music_df[music_df['playlist_pid'] == 0].columns

Index(['track_pos', 'track_artist_name', 'track_track_uri', 'track_artist_uri',
       'track_track_name', 'track_album_uri', 'track_duration_ms',
       'track_album_name', 'playlist_name', 'playlist_collaborative',
       'playlist_pid', 'playlist_modified_at', 'playlist_num_tracks',
       'playlist_num_albums', 'playlist_num_followers', 'playlist_num_edits',
       'playlist_duration_ms', 'playlist_num_artists', 'playlist_description'],
      dtype='object')

In [188]:
# Select indexes of playlists that have a description
playlists_id_with_description = music_df[~music_df['playlist_description'].isna()]['playlist_pid'].unique()
print(playlists_id_with_description)

[94 102 320 329 339 353 354 370 475 491 522 526 548 626 741 767 778 818
 871 925]


In [189]:
# Working only with playlists that contain a "description"
subset_df = music_df[music_df['playlist_pid'].isin(playlists_id_with_description)]
print(subset_df['playlist_pid'].unique())

[94 102 320 329 339 353 354 370 475 491 522 526 548 626 741 767 778 818
 871 925]


In [207]:
num_records = len(subset_df)
num_unique_records = len(subset_df[['track_artist_name','track_track_name']].apply(lambda x: ',.'.join(x),axis=1).unique())
print('Number of table records: ', num_records)
print('Number of unique artist, song pairs: ', num_unique_records )
print('Number of repeated songs: ', num_records - num_unique_records )
print('Number of playlists: ', len(subset_df['playlist_pid'].unique()))
print('Number of unique artists: ', len(subset_df['track_artist_name'].unique()))

Number of table records:  1543
Number of unique artist, song pairs:  1486
Number of repeated songs:  57
Number of playlists:  20
Number of unique artists:  754


In [196]:
# Function to select track_uri ONLY 100 records for now
def get_features(df):
    indexes = []
    audio_features = []
    track_popularity = []
    artist_genre = []
    artist_popularity = []
    album_popularity = []
    start = 0
    for row_index, row in df.iloc[0:100].iterrows():
        #print(row_index, row['track_track_uri'])
        track_uri = row['track_track_uri']
        artist_uri = row['track_artist_uri']
        album_uri = row['track_album_uri']
        if start == 0:
            keys = spotify.audio_features(tracks=track_uri)[0].keys()
        start += 1
    
        track_popularity.append(spotify.track(track_uri)['popularity'])
        
        indexes.append(row_index) 
        audio_features.append(spotify.audio_features(tracks=track_uri)[0].values())
        
        artist_results = spotify.artist(artist_uri)
        
        artist_genre.append(artist_results['genres'])
        artist_popularity.append(artist_results['popularity'])
        #print(artist_results['genres'], artist_results['popularity'])
        album_results = spotify.album(album_uri)
        album_popularity.append(album_results['popularity'])
        
        
    features_df = pd.DataFrame(audio_features, columns=keys)
    features_df['song_popularity'] = track_popularity
    features_df['artist_genre'] = artist_genre
    features_df['artist_popularity']  = artist_popularity
    features_df['album_popularity'] = album_popularity
    features_df['index'] = indexes
    features_df.set_index('index', inplace=True)
    return features_df

spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

In [170]:
audio_sp_df = get_features(subset_df)
audio_sp_df.index.name = None
audio_sp_df.drop(['type', 'id', 'track_href', 'analysis_url'], inplace=True, axis=1)
audio_sp_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri,duration_ms,time_signature,song_popularity,artist_genre,artist_popularity,album_popularity
5431,0.546,0.311,1,-10.396,1,0.0288,0.8180,0.000017,0.1070,0.246,147.464,spotify:track:13HVjjWUZFaWilh2QUJKsP,260533,4,71,"[neo mellow, pop, uk pop]",71,68
5432,0.448,0.431,4,-11.475,0,0.0456,0.8840,0.000009,0.1000,0.240,173.728,spotify:track:4jpwHDjemKbmNy0pyWDHpr,233973,3,0,"[folk-pop, neo mellow]",70,0
5433,0.627,0.699,10,-4.504,1,0.0244,0.1740,0.000000,0.0789,0.535,94.003,spotify:track:1a1i2IwXnUN0PjllVEhA4B,261427,4,34,"[celtic rock, pop]",73,33
5434,0.742,0.479,10,-5.830,1,0.0551,0.2900,0.000000,0.3910,0.346,119.951,spotify:track:7129iqBafaphfc3WPCGC0L,237300,4,53,"[danish pop, scandipop]",70,53
5435,0.343,0.241,0,-10.980,1,0.0367,0.8080,0.000000,0.1070,0.169,184.232,spotify:track:6hsP7b83RjEY51ENfo1ZOc,233240,3,36,[idol],27,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5526,0.515,0.867,11,-5.438,1,0.1060,0.0456,0.000000,0.1380,0.523,129.961,spotify:track:3Vmbt5Yw39WqaIg4eRoOmk,236547,4,47,"[neon pop punk, pop punk, vegas indie]",56,59
5527,0.510,0.760,0,-6.995,1,0.0411,0.1480,0.011600,0.0706,0.550,118.829,spotify:track:5YtxOL4iUchhynLL9nEBwQ,170787,4,0,"[classic rock, glam rock, rock]",85,0
5528,0.692,0.651,9,-8.267,1,0.0324,0.2920,0.002410,0.1050,0.706,97.923,spotify:track:0bRXwKfigvpKZUurwqAlEh,254560,4,80,"[funk, quiet storm, soul]",72,72
5529,0.502,0.156,0,-9.155,1,0.0556,0.8830,0.000000,0.0867,0.352,164.191,spotify:track:0golpXYIyNhpxz9GH1MMTR,224880,4,14,[post-teen pop],54,17


In [198]:
# Parameters to use for the model
audio_sp_df.keys()

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'uri', 'duration_ms', 'time_signature', 'song_popularity',
       'artist_genre', 'artist_popularity', 'album_popularity'],
      dtype='object')

In [171]:
check_df = subset_df.iloc[0:100].merge(audio_sp_df, how='left' , left_on = 'track_track_uri', right_on='uri')
check_df.drop(['track_pos', 'uri', 'mode', 'playlist_duration_ms','playlist_num_albums','playlist_num_artists',  'track_artist_uri', 'track_album_uri', 'track_duration_ms','playlist_num_followers', 'playlist_num_edits', 'playlist_collaborative', 'playlist_modified_at', 'playlist_num_tracks'], inplace = True, axis=1)
check_df

Unnamed: 0,track_artist_name,track_track_uri,track_track_name,track_album_name,playlist_name,playlist_pid,playlist_description,danceability,energy,key,...,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,song_popularity,artist_genre,artist_popularity,album_popularity
0,James Bay,spotify:track:13HVjjWUZFaWilh2QUJKsP,Let It Go,Chaos And The Calm,relax,94,chilllll out,0.546,0.311,1,...,0.000017,0.1070,0.246,147.464,260533,4,71,"[neo mellow, pop, uk pop]",71,68
1,Passenger,spotify:track:4jpwHDjemKbmNy0pyWDHpr,All the Little Lights,All The Little Lights,relax,94,chilllll out,0.448,0.431,4,...,0.000009,0.1000,0.240,173.728,233973,3,0,"[folk-pop, neo mellow]",70,0
2,The Script,spotify:track:1a1i2IwXnUN0PjllVEhA4B,Breakeven,The Script,relax,94,chilllll out,0.627,0.699,10,...,0.000000,0.0789,0.535,94.003,261427,4,34,"[celtic rock, pop]",73,33
3,Lukas Graham,spotify:track:7129iqBafaphfc3WPCGC0L,7 Years,Lukas Graham (Blue Album),relax,94,chilllll out,0.742,0.479,10,...,0.000000,0.3910,0.346,119.951,237300,4,53,"[danish pop, scandipop]",70,53
4,Jason Castro,spotify:track:6hsP7b83RjEY51ENfo1ZOc,Hallelujah,Jason Castro,relax,94,chilllll out,0.343,0.241,0,...,0.000000,0.1070,0.169,184.232,233240,3,36,[idol],27,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,The Cab,spotify:track:3Vmbt5Yw39WqaIg4eRoOmk,Endlessly,Symphony Soldier,relax,94,chilllll out,0.515,0.867,11,...,0.000000,0.1380,0.523,129.961,236547,4,47,"[neon pop punk, pop punk, vegas indie]",56,59
96,Queen,spotify:track:5YtxOL4iUchhynLL9nEBwQ,You're My Best Friend - Remastered 2011,A Night At The Opera,relax,94,chilllll out,0.510,0.760,0,...,0.011600,0.0706,0.550,118.829,170787,4,0,"[classic rock, glam rock, rock]",85,0
97,Bill Withers,spotify:track:0bRXwKfigvpKZUurwqAlEh,Lovely Day,Menagerie,relax,94,chilllll out,0.692,0.651,9,...,0.002410,0.1050,0.706,97.923,254560,4,80,"[funk, quiet storm, soul]",72,72
98,Grace VanderWaal,spotify:track:0golpXYIyNhpxz9GH1MMTR,Beautiful Thing,Perfectly Imperfect (Japan Version),relax,94,chilllll out,0.502,0.156,0,...,0.000000,0.0867,0.352,164.191,224880,4,14,[post-teen pop],54,17


In [176]:
check_df.describe()

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,song_popularity,artist_popularity,album_popularity
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.54484,0.498211,5.09,-8.25196,0.051032,0.411161,0.011082,0.138284,0.379152,124.19823,225042.67,3.92,58.23,65.22,59.34
std,0.117165,0.21346,3.530802,3.872903,0.044053,0.318725,0.084354,0.087923,0.177338,35.150118,41958.409532,0.485757,20.829202,17.799433,19.695264
min,0.209,0.0262,0.0,-27.691,0.0243,0.00186,0.0,0.0368,0.0797,63.05,116532.0,1.0,3.0,5.0,1.0
25%,0.46475,0.323,2.0,-10.126,0.02915,0.118,0.0,0.09525,0.24925,93.7605,195005.5,4.0,44.75,56.0,51.25
50%,0.543,0.504,5.0,-7.499,0.03505,0.296,0.0,0.112,0.355,123.4075,227619.5,4.0,64.5,68.5,63.0
75%,0.62825,0.66575,7.25,-5.62125,0.050325,0.74525,1.8e-05,0.15175,0.49175,147.87,245570.0,4.0,73.25,76.0,73.0
max,0.824,0.928,11.0,-3.325,0.328,0.964,0.829,0.686,0.955,205.903,376827.0,5.0,91.0,100.0,90.0


In [199]:
# Make album popularity equal to artist popularity if album popularity is 0
check_df['album_popularity'] = np.where(check_df['album_popularity'] == 0, check_df['artist_popularity'], check_df['album_popularity'])
check_df['song_popularity'] = np.where(check_df['song_popularity'] == 0, check_df['artist_popularity'], check_df['song_popularity'])
check_df

Unnamed: 0,track_artist_name,track_track_uri,track_track_name,track_album_name,playlist_name,playlist_pid,playlist_description,danceability,energy,key,...,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,song_popularity,artist_genre,artist_popularity,album_popularity
0,James Bay,spotify:track:13HVjjWUZFaWilh2QUJKsP,Let It Go,Chaos And The Calm,relax,94,chilllll out,0.546,0.311,1,...,0.000017,0.1070,0.246,147.464,260533,4,71,"[neo mellow, pop, uk pop]",71,68
1,Passenger,spotify:track:4jpwHDjemKbmNy0pyWDHpr,All the Little Lights,All The Little Lights,relax,94,chilllll out,0.448,0.431,4,...,0.000009,0.1000,0.240,173.728,233973,3,70,"[folk-pop, neo mellow]",70,70
2,The Script,spotify:track:1a1i2IwXnUN0PjllVEhA4B,Breakeven,The Script,relax,94,chilllll out,0.627,0.699,10,...,0.000000,0.0789,0.535,94.003,261427,4,34,"[celtic rock, pop]",73,33
3,Lukas Graham,spotify:track:7129iqBafaphfc3WPCGC0L,7 Years,Lukas Graham (Blue Album),relax,94,chilllll out,0.742,0.479,10,...,0.000000,0.3910,0.346,119.951,237300,4,53,"[danish pop, scandipop]",70,53
4,Jason Castro,spotify:track:6hsP7b83RjEY51ENfo1ZOc,Hallelujah,Jason Castro,relax,94,chilllll out,0.343,0.241,0,...,0.000000,0.1070,0.169,184.232,233240,3,36,[idol],27,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,The Cab,spotify:track:3Vmbt5Yw39WqaIg4eRoOmk,Endlessly,Symphony Soldier,relax,94,chilllll out,0.515,0.867,11,...,0.000000,0.1380,0.523,129.961,236547,4,47,"[neon pop punk, pop punk, vegas indie]",56,59
96,Queen,spotify:track:5YtxOL4iUchhynLL9nEBwQ,You're My Best Friend - Remastered 2011,A Night At The Opera,relax,94,chilllll out,0.510,0.760,0,...,0.011600,0.0706,0.550,118.829,170787,4,85,"[classic rock, glam rock, rock]",85,85
97,Bill Withers,spotify:track:0bRXwKfigvpKZUurwqAlEh,Lovely Day,Menagerie,relax,94,chilllll out,0.692,0.651,9,...,0.002410,0.1050,0.706,97.923,254560,4,80,"[funk, quiet storm, soul]",72,72
98,Grace VanderWaal,spotify:track:0golpXYIyNhpxz9GH1MMTR,Beautiful Thing,Perfectly Imperfect (Japan Version),relax,94,chilllll out,0.502,0.156,0,...,0.000000,0.0867,0.352,164.191,224880,4,14,[post-teen pop],54,17


In [177]:
check_df.describe()

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,song_popularity,artist_popularity,album_popularity
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.54484,0.498211,5.09,-8.25196,0.051032,0.411161,0.011082,0.138284,0.379152,124.19823,225042.67,3.92,58.23,65.22,59.34
std,0.117165,0.21346,3.530802,3.872903,0.044053,0.318725,0.084354,0.087923,0.177338,35.150118,41958.409532,0.485757,20.829202,17.799433,19.695264
min,0.209,0.0262,0.0,-27.691,0.0243,0.00186,0.0,0.0368,0.0797,63.05,116532.0,1.0,3.0,5.0,1.0
25%,0.46475,0.323,2.0,-10.126,0.02915,0.118,0.0,0.09525,0.24925,93.7605,195005.5,4.0,44.75,56.0,51.25
50%,0.543,0.504,5.0,-7.499,0.03505,0.296,0.0,0.112,0.355,123.4075,227619.5,4.0,64.5,68.5,63.0
75%,0.62825,0.66575,7.25,-5.62125,0.050325,0.74525,1.8e-05,0.15175,0.49175,147.87,245570.0,4.0,73.25,76.0,73.0
max,0.824,0.928,11.0,-3.325,0.328,0.964,0.829,0.686,0.955,205.903,376827.0,5.0,91.0,100.0,90.0


In [206]:
# Example of how to pull metadata from a single track 
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

Row = 5 

track_uri = subset_df.iloc[Row]['track_track_uri']
artist_uri = subset_df.iloc[Row]['track_artist_uri']
album_uri = subset_df.iloc[Row]['track_album_uri']

print('ARTIST: ', subset_df.iloc[Row]['track_artist_name'])
print('ALBUM: ', subset_df.iloc[Row]['track_album_name'])
print('SONG: ', subset_df.iloc[Row]['track_track_name'])
track_popularity = spotify.track(track_uri)['popularity']
print('track_popularity: ', track_popularity)
track_results = spotify.audio_features(tracks=track_uri)
print('audio_features: ', track_results[0])
artist_results = spotify.artist(artist_uri)
print('artist_genre: ', artist_results['genres'])
print('artist_popularity: ', artist_results['popularity'])
artist_albums_results = spotify.artist_albums(artist_uri)
#print(artist_albums_results)
album_results = spotify.album(album_uri)
print('album_popularity: ',album_results['popularity'])
print('album_keys: ', album_results.keys())
# print(artist_results['genres'])
# print(artist_results['popularity'])

ARTIST:  Ron Pope
ALBUM:  The Bedroom Demos
SONG:  A Drop In The Ocean
track_popularity:  67
audio_features:  {'danceability': 0.447, 'energy': 0.393, 'key': 5, 'loudness': -8.65, 'mode': 1, 'speechiness': 0.038, 'acousticness': 0.785, 'instrumentalness': 0, 'liveness': 0.28, 'valence': 0.564, 'tempo': 73.139, 'type': 'audio_features', 'id': '5JDcQAztvZTIkrWoZihgvC', 'uri': 'spotify:track:5JDcQAztvZTIkrWoZihgvC', 'track_href': 'https://api.spotify.com/v1/tracks/5JDcQAztvZTIkrWoZihgvC', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/5JDcQAztvZTIkrWoZihgvC', 'duration_ms': 220239, 'time_signature': 3}
artist_genre:  ['neo mellow', 'piano rock', 'viral pop']
artist_popularity:  53
album_popularity:  59
album_keys:  dict_keys(['album_type', 'artists', 'available_markets', 'copyrights', 'external_ids', 'external_urls', 'genres', 'href', 'id', 'images', 'label', 'name', 'popularity', 'release_date', 'release_date_precision', 'total_tracks', 'tracks', 'type', 'uri'])


In [None]:
# Work only with important features
important_features = ['track_artist_name', 'track_track_name', 'playlist_name', 'playlist_duration_ms', 'playlist_description']

In [None]:
artist_count_df = subset_df[['playlist_name','track_artist_name']].groupby(['playlist_name'], sort=False).value_counts(sort=False).reset_index(name="artist_count")

In [None]:
artist_count_df

In [None]:
subset_df[['playlist_name', 'track_artist_name']]

In [None]:
subset_df[['playlist_name','track_artist_name']].groupby(['playlist_name'], sort = False).value_counts(sort=False).reset_index(name="artist_count").info()

In [None]:
music_df = pd.merge(subset_df, artist_count_df,  how='left', on = ['playlist_name','track_artist_name'])

In [None]:
music_df

In [None]:
# subset_df.join(.sum(), on='playlist_name', rsuffix='_count')
subset_df.groupby(['playlist_name','track_artist_name']).size().unstack(fill_value=0)

In [None]:
music_df[['track_artist_name','track_track_name']].apply(lambda x: ',.'.join(x),axis=1)

In [None]:
important_features.append("artist_count")

In [None]:
music_df[important_features]

In [None]:
music_df['artist_fraction'] =  music_df['artist_count'] / music_df['playlist_num_tracks'] 
music_df

In [None]:
subset_df.groupby(['playlist_name','track_artist_name']).size().unstack(fill_value=0)

In [None]:
music_df[music_df['playlist_num_tracks'] == Ntracks][['track_artist_name','track_track_name']].apply(lambda x: ',.'.join(x),axis=1).value_counts() 

In [None]:
vc = music_df[music_df['playlist_num_tracks'] == Ntracks][['track_artist_name','track_track_name']].apply(lambda x: ',.'.join(x), axis=1).value_counts()
vc[vc > 1].sum() - len(vc[vc > 1])

In [None]:
# for i in df[['track_artist_name','track_track_name']].apply(lambda x: ',.'.join(x),axis=1).unique():
# #     print(i.split(',.'))
#     if len(i.split(',.')) == 2:
#         print(i)

In [None]:
np.unique(music_df[music_df['playlist_num_tracks'] == Ntracks]['track_track_name'])

In [None]:
important_features = ['track_artist_name', 'track_track_name', 'playlist_name', 'playlist_num_tracks', 'playlist_num_albums', 'playlist_duration_ms', 'playlist_num_artists']

In [None]:
music_df[music_df['playlist_num_tracks'] == 20][['track_artist_name', 'track_track_name', 'playlist_name', 'playlist_num_tracks', 'playlist_num_albums', 'playlist_duration_ms', 'playlist_num_artists', 'playlist_description']]

In [None]:
music_df[music_df['playlist_num_tracks'] == Ntracks][important_features].groupby('playlist_name')['track_artist_name'].value_counts()