## Libraries & APIs

In [1]:
# Spotify API
with open('api_id.txt') as file:
    client_id = file.read()
    
with open('api_s.txt') as file:
    client_secret = file.read()

In [7]:
# import usual libraries
import numpy as np
import pandas as pd

In [8]:
# to see entire output
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# # plot libraries
# import matplotlib.pyplot as plt
# import seaborn as sns

In [9]:
# spotipy & co libraries
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [11]:
#Initialize SpotiPy with user credentials

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id, client_secret))

In [19]:
import pprint

## Pulling Results

In [85]:
# making a list of playlist names

playlist_names = ["top 50 colombia", "top 50 USA", "top 50 Germany", "top 50 Poland", "top 50 Indonesia", 
                  "top 50 Spain", "top 50 Nigeria", "top 50 Ghana", "top 50: Jamaica", "top 50 Netherlands",
                  "top 50 South Korea", "top 50 Greece"]

In [86]:
# pulling song data
def playlist_search(playlist_names):
    results = []
    for name in playlist_names:
        playlist_id = sp.search(q=name, limit=1, market="DE", type='playlist')['playlists']['items'][0]['id']
        results.append(sp.playlist(playlist_id))
    return results

In [87]:
results = playlist_search(playlist_names)

In [99]:
# defining a function

def spotify_extractor(results):
    song_id = []
    song_names = []
    artist_names = []

    for result in results:
        for item in result['tracks']['items']:
            song_id.append(item['track']['id'])
            song_names.append(item['track']['name'])
            artist_names.append(item['track']['artists'][0]['name'])

    return pd.DataFrame({'song_id': song_id, 'song_name': song_names, 'artist_name': artist_names})

In [100]:
spotify_df = spotify_extractor(results)

In [104]:
# getting song features

feature_list = ["danceability","energy","loudness","speechiness","acousticness", "instrumentalness","liveness","valence","tempo","duration_ms"]

def add_song_features(df, feature_list):
    feats = []

    for s_id in df['song_id']:
        feats.append(sp.audio_features(s_id)[0])

    feats_df = pd.DataFrame(feats)
    return pd.concat([df, feats_df[feature_list]], axis=1)

In [105]:
spotify_df = add_song_features(spotify_df, feature_list)

In [125]:
spotify_df.head(5) 

Unnamed: 0,song_id,song_name,artist_name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,2btNsI4OvcVl7SAHQQDHFB,Mirame,Blessd,0.717,0.656,-4.449,0.0797,0.141,3e-05,0.0661,0.695,175.956,157453
1,6WatFBLVB0x077xWeoVc2k,Si Antes Te Hubiera Conocido,KAROL G,0.924,0.668,-6.795,0.0469,0.446,0.000594,0.0678,0.787,128.027,195824
2,7bywjHOc0wSjGGbj04XbVi,LUNA,Feid,0.774,0.86,-2.888,0.13,0.131,0.0,0.116,0.446,100.019,196800
3,5QjmUqgpPQgXgg4606DqZF,UWAIE,Kapo,0.705,0.783,-4.783,0.0403,0.138,0.0,0.0984,0.454,103.001,172427
4,13BDiikG6y5o5cQTK0HpW6,Soltera - W Sound 01,W Sound,0.734,0.578,-4.147,0.295,0.155,0.000242,0.113,0.88,199.997,142022


In [None]:
name = song_name
id = song_id
drop artist name

## Combining our dataframes

In [122]:
paola_df = pd.read_csv("/Users/sashacrowe/codebase-ih/wk9_project/all_track_features.csv") 

In [126]:
paola_df.rename(columns={'name':'song_name', 'id':'song_id'}, inplace = True)

In [127]:
paola_df.head(5)

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,song_id,duration_ms,song_name
0,0.717,0.656,-4.449,0.0797,0.141,3e-05,0.0661,0.695,175.956,2btNsI4OvcVl7SAHQQDHFB,157453,Mirame
1,0.924,0.668,-6.795,0.0469,0.446,0.000594,0.0678,0.787,128.027,6WatFBLVB0x077xWeoVc2k,195824,Si Antes Te Hubiera Conocido
2,0.774,0.86,-2.888,0.13,0.131,0.0,0.116,0.446,100.019,7bywjHOc0wSjGGbj04XbVi,196800,LUNA
3,0.705,0.783,-4.783,0.0403,0.138,0.0,0.0984,0.454,103.001,5QjmUqgpPQgXgg4606DqZF,172427,UWAIE
4,0.734,0.578,-4.147,0.295,0.155,0.000242,0.113,0.88,199.997,13BDiikG6y5o5cQTK0HpW6,142022,Soltera - W Sound 01


In [130]:
spotify_df = pd.concat([spotify_df, paola_df], ignore_index=True)

In [131]:
spotify_df.shape

(1180, 13)

### Checking for Duplicates

**pre-dropping duplicates**

In [132]:
spotify_df.drop_duplicates().shape

(1074, 13)

In [133]:
spotify_df.duplicated().sum()

106

In [134]:
spotify_df = spotify_df.drop_duplicates()

**after dropping dupicates**

In [135]:
spotify_df.duplicated().sum()

0

In [136]:
spotify_df.shape

(1074, 13)

In [138]:
spotify_df = spotify_df.drop('artist_name', axis=1)

In [141]:
spotify_df = spotify_df.reset_index(drop=True)

In [142]:
spotify_df

Unnamed: 0,song_id,song_name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,2btNsI4OvcVl7SAHQQDHFB,Mirame,0.717,0.656,-4.449,0.0797,0.141,3e-05,0.0661,0.695,175.956,157453
1,6WatFBLVB0x077xWeoVc2k,Si Antes Te Hubiera Conocido,0.924,0.668,-6.795,0.0469,0.446,0.000594,0.0678,0.787,128.027,195824
2,7bywjHOc0wSjGGbj04XbVi,LUNA,0.774,0.86,-2.888,0.13,0.131,0.0,0.116,0.446,100.019,196800
3,5QjmUqgpPQgXgg4606DqZF,UWAIE,0.705,0.783,-4.783,0.0403,0.138,0.0,0.0984,0.454,103.001,172427
4,13BDiikG6y5o5cQTK0HpW6,Soltera - W Sound 01,0.734,0.578,-4.147,0.295,0.155,0.000242,0.113,0.88,199.997,142022
5,08pbRBstuvH7ArRxeb8fwT,Hasta Aquí Llegué,0.641,0.657,-8.338,0.218,0.538,0.0,0.106,0.411,100.016,188887
6,2gmd7lY3QSzLh0xIY4cRvi,SE ME OLVIDA,0.786,0.707,-4.386,0.0635,0.0399,0.0,0.121,0.691,82.045,225373
7,281YJmnThveWjsWVic9Ym6,Las Piponas (Remix),0.73,0.587,-4.978,0.083,0.0223,0.0,0.14,0.109,119.656,244000
8,0Sr7ssScx54yxdM2ols7nW,Q U E V A S H A C E R H O Y ?,0.56,0.681,-6.304,0.244,0.103,0.0,0.106,0.388,155.079,223320
9,63pLfjK6FvcYJYMGwtHjd6,SORRY 4 THAT MUCH,0.819,0.675,-4.281,0.0727,0.0318,0.0,0.289,0.602,92.99,207747


## Exporting our complete df

In [143]:
spotify_df.to_csv('paola_sasha_spotify_df.csv', index=False)