# >> [Spotipy Documentation](https://spotipy.readthedocs.io/en/2.13.0/#)


## I. Install Spotipy library

`pip install spotipy`

[See more](https://pypi.org/project/spotipy/)

In [1]:
import pandas as pd
from datetime import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

## II. Quick start (API access)

 * Register on the [Spotify website](https://developer.spotify.com/dashboard/) dedicated to developers.
 * Select **"Create an App"**
 * Register your information
 * Get your **CLIENT_ID** and **CLIENT_SECRET**

In [2]:
client_id = 'your client_id'
client_secret = 'your client_secret'

client_credentials_manager = SpotifyClientCredentials(client_id = client_id, client_secret = client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

# III. Parameters to access Spotify playlists

## a) All playlists of the 50 most played by country

 * [Charts regional](https://open.spotify.com/genre/charts-regional)

In [3]:
top_username = 'spotifycharts'
top_playlist_ids = ['37i9dQZEVXbJiZcmkrIHGU','37i9dQZEVXbMMy2roB9myp','37i9dQZEVXbJPcfkRz0wJ0','37i9dQZEVXbKNHh6NIXu36',
                    '37i9dQZEVXbJNSeeHswcKB','37i9dQZEVXbJqfMFK4d691','37i9dQZEVXbMXbN3EUUhlg','37i9dQZEVXbNfM2w2mq1B8',
                    '37i9dQZEVXbKj23U1GF4IR','37i9dQZEVXbL0GavIqMTeb','37i9dQZEVXbOa2lmxNORXQ','37i9dQZEVXbMZAjGMynsQX',
                    '37i9dQZEVXbL3J0k32lWnN','37i9dQZEVXbLxoIml4MYkT','37i9dQZEVXbJlM6nvL1nD1','37i9dQZEVXbKIVTPX9a2Sb',
                    '37i9dQZEVXbNFJfN1Vw8d9','37i9dQZEVXbLRQDuF5jeBp','37i9dQZEVXbLesry2Qw2xS','37i9dQZEVXbNBz9cRCSFkY',
                    '37i9dQZEVXbMxcczTSoGwZ','37i9dQZEVXbIPWwFssbupI','37i9dQZEVXbJqdarpmTJDL','37i9dQZEVXbLy5tBFyQvd4',
                    '37i9dQZEVXbKCF6dqVpDkS','37i9dQZEVXbJp9wcIM9Eo5','37i9dQZEVXbLwpL8TjsxOG','37i9dQZEVXbNHwMxAkvmF8',
                    '37i9dQZEVXbLZ52XmnySJg','37i9dQZEVXbObFQZ3JLcXt','37i9dQZEVXbKM896FDX8L1','37i9dQZEVXbKMzVsSGQ49S',
                    '37i9dQZEVXbJ6IpvItkve3','37i9dQZEVXbIQnj7RRhdSX','37i9dQZEVXbKXQ4mDTEBXq','37i9dQZEVXbJWuzDrTxbKS',
                    '37i9dQZEVXbMx56Rdq5lwc','37i9dQZEVXbKGcyg6TFGx6','37i9dQZEVXbJlfUljuZExa','37i9dQZEVXbO3qyFxbkOE1',
                    '37i9dQZEVXbISk8kxnzfCq','37i9dQZEVXbJvfa0Yxg7E7','37i9dQZEVXbM8SIrkERIYl','37i9dQZEVXbKypXHVwk1f0',
                    '37i9dQZEVXbNOUPGj7tW6T','37i9dQZEVXbJfdy5b0KP7W','37i9dQZEVXbN6itCcaL3Tt','37i9dQZEVXbKyJS56d1pgi',
                    '37i9dQZEVXbLnolsZ8PSNw','37i9dQZEVXbKAbrMR8uuf7','37i9dQZEVXbNZbJ6TZelCq','37i9dQZEVXbL8l7ra5vVdB',
                    '37i9dQZEVXbK4gjvS1FjPY','37i9dQZEVXbLoATJ81JYXz','37i9dQZEVXbJiyhoAPEfMK','37i9dQZEVXbMnz8KIWsvf9',
                    '37i9dQZEVXbMnZEatlMSiu','37i9dQZEVXbIP3c3fqVrJY','37i9dQZEVXbIVYVBNw9D5K','37i9dQZEVXbKkidEfWYRuD',
                    '37i9dQZEVXbMJJi3wgRbAy','37i9dQZEVXbLdGSmz6xilI','37i9dQZEVXbMDoHDwVN2tF','37i9dQZEVXbMH2jvi6jvjk']

## b) A random playlist of each musical genre

**Playlists:** Esquenta Sertanejo, Funk Hits, Pagodeira, Lacradorxs, RAPública, Sucessos Gospel,
Divina MPB, Indiespensável, Mansión Reggaetón, Nação Reggae, R&B Brasil, Kpop Daebak,
Jazz Classics, Arab X, African Heat, Tokyo Super Hits!, Desi Hits, Blues Classics,
Classic Punk, Kickass Metal, Roots Rising, Rock Leve, Pop Up, Electro Heat

In [4]:
username = 'Spotify'
playlist_ids = ['37i9dQZF1DXdSjVZQzv2tl','37i9dQZF1DWTkIwO2HDifB','37i9dQZF1DWVjNkaVEkL3X','37i9dQZF1DWZylNQMXNhNe',
                '37i9dQZF1DWSIvbYYt1Dvi','37i9dQZF1DX0OEZC3cbQmU','37i9dQZF1DWTWoEMrRMHZ8','37i9dQZF1DWXx6OpWBkqVv',
                '37i9dQZF1DWZjqjZMudx9T','37i9dQZF1DWTMlvEhEJDRB','37i9dQZF1DX6EQe4DjJNfI','37i9dQZF1DX9tPFwDMOaN1',
                '37i9dQZF1DXbITWG1ZJKYt','37i9dQZF1DWWkrGNlIHxPl','37i9dQZF1DWYkaDif7Ztbp','37i9dQZF1DXafb0IuPwJyF',
                '37i9dQZF1DWTwzVdyRpXm1','37i9dQZF1DXd9rSDyQguIk','37i9dQZF1DX3LDIBRoaCDQ','37i9dQZF1DWTcqUzwhNmKv',
                '37i9dQZF1DWYV7OOaGhoH0','37i9dQZF1DX4vCk1GJH7zl','37i9dQZF1DX6aTaZa0K6VA','37i9dQZF1DX2R0a3scWaq6']

## c) Playlist to predict


**Playlist:** "Novidades da semana" (username: Spotify)

In [5]:
predict_playlist_ids = ['37i9dQZF1DX34KNiFQJNVm']

# IV. Collecting data


## a) Get track IDs of hits and non hits

In [6]:
def get_playlists_id_tracks(username, playlist_id, sp, ids):
    offset = 0
    songs = []
    while True:
        content_playlist = sp.user_playlist_tracks(username, playlist_id, fields=None, limit=100, offset=offset)
        songs += content_playlist['items']
        if content_playlist['next'] is not None:
            offset += 100
        else:
            break
    for i in songs:
        # Consider music only
        if (i['track'] != None):
            ids.append(i['track']['id'])
    
def remove_duplicates(ids):
    print("Total number of songs: ", len(ids))
    final_ids = list(set(ids))
    print("Number of song after removing duplicates: ", len(final_ids))
    return final_ids

# Get top (hit) track ids
print("Top 50 songs by country")
top_ids = []
for playlist_id in top_playlist_ids:
    get_playlists_id_tracks(top_username, playlist_id, sp, top_ids)
top_ids = remove_duplicates(top_ids)

# Get genre track ids
print("\nRandom playlist songs by genre")
genre_ids = []
for playlist_id in playlist_ids:
    get_playlists_id_tracks(username, playlist_id, sp, genre_ids)
genre_ids = remove_duplicates(genre_ids)

#  Get only non hit track ids
non_hit_ids = [x for x in genre_ids if x not in top_ids]
print("\nNumber of random playlist songs by genre that are not in the Top 50 by country: ", len(non_hit_ids))

# Get predict track ids
print("\nPredict playlist songs")
predict_ids = []
for playlist_id in predict_playlist_ids:
    get_playlists_id_tracks(username, playlist_id, sp, predict_ids)
predict_ids = remove_duplicates(predict_ids)

Top 50 songs by country
Total number of songs:  3200
Number of song after removing duplicates:  1251

Random playlist songs by genre
Total number of songs:  1635
Number of song after removing duplicates:  1621

Number of random playlist songs by genre that are not in the Top 50 by country:  1509

Predict playlist songs
Total number of songs:  60
Number of song after removing duplicates:  60


## b) Get track information


**Track information:** Name of the song, artist and popularity.

**Audio features:** Relevant features from [Audio Features Object](https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/)

In [7]:
def get_track_info(track_ids, track_name, artist_name, popularity):
    for id in track_ids:
        info = sp.track(id)
        track_name.append(info['name']) 
        artist_name.append(info['artists'][0]['name'])
        popularity.append(info['popularity'])

def get_track_features(track_ids, features_list): 
    index = 0
    audio_features = []
    while index < len(track_ids):
        audio_features += sp.audio_features(track_ids[index:index + 50])
        index += 50
    for features in audio_features:
        features_list.append([features['energy'], features['liveness'], features['tempo'], features['speechiness'],
                              features['acousticness'], features['instrumentalness'], features['time_signature'],
                              features['danceability'], features['key'], features['duration_ms'],
                              features['loudness'], features['valence'], features['mode']])

# Get information from success tracks
top_track_name = []
top_artist_name = []
top_popularity = []
top_features_list = []
get_track_info(top_ids, top_track_name, top_artist_name, top_popularity)
get_track_features(top_ids, top_features_list)

# Get information from non success tracks
track_name = []
artist_name = []
popularity = []
features_list = []
get_track_info(non_hit_ids, track_name, artist_name, popularity)
get_track_features(non_hit_ids, features_list)

# Get information from predict tracks
predict_track_name = []
predict_artist_name = []
predict_popularity = []
predict_features_list = []
get_track_info(predict_ids, predict_track_name, predict_artist_name, predict_popularity)
get_track_features(predict_ids, predict_features_list)

## c) DataFrame builder

In [8]:
def build_dataframe(features_list, track_name, artist_name, popularity, top, predict):
    df = pd.DataFrame(features_list, columns=['energy', 'liveness', 'tempo', 'speechiness', 'acousticness',
                                              'instrumentalness', 'time_signature', 'danceability', 'key', 'duration_ms',
                                              'loudness', 'valence', 'mode'])
    df['artist'] = artist_name
    df['track_name'] = track_name
    df['popularity'] = popularity
    
    if not predict:
        # Create class column: 1 sucess and 0 non success
        if (top):
            df['success'] = [1] * len(features_list)
        else:
            df['success'] = [0] * len(features_list)  
    
    return df

# Build dataframe of hit songs
top_df = build_dataframe(top_features_list, top_track_name, top_artist_name, top_popularity, True, False)

# Build dataframe of non hit songs
df = build_dataframe(features_list, track_name, artist_name, popularity, False, False)

# Concatenate the two dataframes
final_df = pd.concat([top_df, df])

# Build dataframe of songs to predict
predict_df = build_dataframe(predict_features_list, predict_track_name, predict_artist_name, predict_popularity, False, True)

## d) Export to csv

In [9]:
now = datetime.now()
date = now.strftime("%m%d%Y")
name = ['spotifyAnalysis', 'predictSpotifyAnalysis']
dataframe = [final_df, predict_df]

for i in range(len(dataframe)):
    dataframe[i].to_csv('{}-{}.csv'.format(name[i], date), index=False)