In [325]:
import requests

# Your app's client ID and secret
CLIENT_ID = 'Put your CLIENT_ID'
CLIENT_SECRET = 'Put your CLIENT_SECRET'

### Getting top 200 songs by year from 1962 to 2023 from billboard package.

In [267]:
chart_names = ['country-songs', 'hot-rock-songs', 'r-b-hip-hop-songs', 'dance-electronic-songs', 'latin-songs', 'jazz-songs']

In [None]:
song_title = []
artists = []
category = []
import billboard
for chart_name in chart_names:
    for year in range(1960, 2024):
        chart = billboard.ChartData(name=chart_name, date=f'{year}-09-12')
        for song in chart:
            song_title.append(song.title)
            artists.append(song.artist)
            category.append(chart_name)


In [318]:
len(song_title), len(artists), len(category)

(18554, 18554, 18554)

In [319]:
song_title[:2], artists[:2], category[:2]

(['Alabam', "Please Help Me, I'm Falling"],
 ['Cowboy Copas', 'Hank Locklin'],
 ['country-songs', 'country-songs'])

In [320]:
len(song_title), len(set(song_title))

(18554, 11947)

### Remove duplicate songs.

In [321]:
def remove_duplicates(song_names, artist_names, category_names):
    # Use a set to keep track of seen songs
    seen_songs = set()
    
    # Lists to hold unique songs and corresponding artists
    unique_songs = []
    unique_artists = []
    unique_category =[]
    
    for song, artist, category in zip(song_names, artist_names, category_names):
        # Use a tuple of song and artist as the unique identifier for a track
        track = (song, artist, category)
        if track not in seen_songs:
            seen_songs.add(track)
            unique_songs.append(song)
            unique_artists.append(artist)
            unique_category.append(category)
    
    return unique_songs, unique_artists, unique_category

filtered_songs, filtered_artists, filtered_category = remove_duplicates(song_title, artists, category)

In [322]:
len(filtered_songs), len(filtered_artists), len(filtered_category)

(12864, 12864, 12864)

### Functions to Call Spotify APIs.


In [276]:
def get_track_id(song_name, access_token, artist_name=None):
    """Search for a track by its name and artist and return its ID."""
    base_url = "https://api.spotify.com/v1/search"
    headers = {
        "Authorization": f"Bearer {access_token}"
    }
    query = song_name
    if artist_name:
        query += f" artist:{artist_name}"
    params = {
        "q": query,
        "type": "track",
        "limit": 1  # We're only interested in the first result for this example
    }
    response = requests.get(base_url, headers=headers, params=params)
    data = response.json()

    
    # Extract the track ID from the first result
    tracks = data.get("tracks", {}).get("items", [])
    if tracks:
        return tracks[0].get("id")
    else:
        return None
    
def get_access_token():
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_response = requests.post(auth_url, {
        'grant_type': 'client_credentials',
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
    })
    auth_response_data = auth_response.json()
    return auth_response_data['access_token']


def get_audio_features_for_multiple_tracks(track_ids, access_token):
    base_url = "https://api.spotify.com/v1/audio-features"
    headers = {
        "Authorization": f"Bearer {access_token}"
    }
    params = {
        "ids": ",".join(track_ids)
    }
    response = requests.get(base_url, headers=headers, params=params)
    return response.json().get("audio_features", [])

def get_track_details(track_ids, access_token):
    """
    Retrieve details for multiple tracks using the Spotify API.
    
    Parameters:
    - track_ids: A list of track IDs.
    - access_token: Your Spotify access token.
    
    Returns:
    A list of track details.
    """
    base_url = "https://api.spotify.com/v1/tracks"
    headers = {
        "Authorization": f"Bearer {access_token}"
    }
    params = {
        "ids": ",".join(track_ids)  # Convert list of track IDs to comma-separated string
    }
    
    response = requests.get(base_url, headers=headers, params=params)
    data = response.json()
    return data.get("tracks", [])

In [323]:
track_ids = []

### Obtain the ids of the tracks.

In [None]:
access_token = get_access_token()

for i in range(len(filtered_songs)):
    id = get_track_id(song_name=filtered_songs[i], artist_name=filtered_artists[i], access_token=access_token)
    track_ids.append((id, filtered_category[i]))


In [330]:
len(track_ids)

12864

In [342]:
all_ids = [tuplee for tuplee in track_ids if tuplee[0] is not None]
all_ids[:5]

[('5eURshOQPzQWBEBCJivKsi', 'country-songs'),
 ('3eTYeAOll7LaINGjv9Fnvi', 'country-songs'),
 ('62WC0WPprLsyw3hpTZAeBJ', 'country-songs'),
 ('4EUL6WeHpGltSOBSQwDWNA', 'country-songs'),
 ('5MXe6dKcR6iO33GWzVW8cr', 'country-songs')]

### Get the audio features from the API. For more info: https://developer.spotify.com/documentation/web-api/reference/get-audio-features

In [None]:
audio_features = []
categories = []
access_token = get_access_token()
for i in range(0, len(all_ids), 100):
    tuples = all_ids[i:i+100]
    ids = [id[0] for id in tuples]
    categories_t = [id[1] for id in tuples]
    features = get_audio_features_for_multiple_tracks(ids, access_token=access_token)
    audio_features.append(features)
    categories.append(categories_t)

### Get track details (name, artists and release date.)

In [None]:
track_details = []
for i in range(0, len(all_ids), 50):
    tuples = all_ids[i:i+50]
    ids = [id[0] for id in tuples]
    details = get_track_details(ids, access_token=access_token)
    track_details.append(details)

In [385]:
from joblib import dump
dump(audio_features, 'audio_features.joblib')
dump(track_details, 'track_details.joblib')
dump(categories, 'categories.joblib')

['categories.joblib']

In [4]:
from joblib import load
audio_features = load('audio_features.joblib')
track_details = load('track_details.joblib')
categories = load('categories.joblib')

In [5]:
final_features = []
for a in audio_features:
    for aa in a:
        final_features.append(aa)

In [6]:
final_details = []
for a in track_details:
    for aa in a:
        final_details.append(aa)

In [7]:
final_categories = []
for a in categories:
    for aa in a:
        final_categories.append(aa)

In [8]:
def extract_track_and_artist(track_details):
    """
    Extract track names and artist names from track details.
    
    Parameters:
    - track_details: A list of track details (as retrieved from the Spotify API).
    
    Returns:
    A list of dictionaries. Each dictionary has 'track_name' and 'artist_names' as keys.
    """
    result = []
    
    for track in track_details:
        track_name = track.get("name")
        track_date = track.get("album").get("release_date")
        artist_names = [artist["name"] for artist in track.get("artists", [])]
        result.append({
            "track_name": track_name,
            "artist_names": artist_names,
            "release_date": track_date,
            
        })
    
    return result

In [9]:
detailss = extract_track_and_artist(track_details=final_details)

### Create the DataFrame.

In [10]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
df = pd.DataFrame()

for features, name, category in zip(final_features, detailss, final_categories):
   if features == None:
      continue
   features['track_name'] = name['track_name']
   features['artists_names'] = name['artist_names']
   features['release_date'] = name['release_date']
   features['chart_name'] = category

   df = df.append(features, ignore_index=True)

In [394]:
df.to_csv('music_v2.csv', index=False)

In [11]:
df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,id,uri,track_href,analysis_url,duration_ms,time_signature,track_name,artists_names,release_date,chart_name
0,0.759,0.675,2,-10.281,1,0.0268,0.291,0.000165,0.146,0.961,...,5eURshOQPzQWBEBCJivKsi,spotify:track:5eURshOQPzQWBEBCJivKsi,https://api.spotify.com/v1/tracks/5eURshOQPzQW...,https://api.spotify.com/v1/audio-analysis/5eUR...,141347,4,Alabam,[Cowboy Copas],2005,country-songs
1,0.557,0.388,7,-9.464,1,0.0259,0.731,0.0,0.306,0.504,...,3eTYeAOll7LaINGjv9Fnvi,spotify:track:3eTYeAOll7LaINGjv9Fnvi,https://api.spotify.com/v1/tracks/3eTYeAOll7La...,https://api.spotify.com/v1/audio-analysis/3eTY...,144867,4,Please Help Me I'm Falling,[Hank Locklin],1996-01-01,country-songs
2,0.589,0.447,4,-11.458,1,0.0303,0.5,0.000209,0.23,0.903,...,62WC0WPprLsyw3hpTZAeBJ,spotify:track:62WC0WPprLsyw3hpTZAeBJ,https://api.spotify.com/v1/tracks/62WC0WPprLsy...,https://api.spotify.com/v1/audio-analysis/62WC...,161232,4,Anymore,[Roy Drusky],1978-01-01,country-songs
3,0.466,0.143,7,-17.569,1,0.0312,0.903,0.00319,0.107,0.283,...,4EUL6WeHpGltSOBSQwDWNA,spotify:track:4EUL6WeHpGltSOBSQwDWNA,https://api.spotify.com/v1/tracks/4EUL6WeHpGlt...,https://api.spotify.com/v1/audio-analysis/4EUL...,135200,4,I'm Gettin' Better,[Jim Reeves],1997-02-14,country-songs
4,0.617,0.197,9,-12.145,1,0.0267,0.777,0.0,0.133,0.384,...,5MXe6dKcR6iO33GWzVW8cr,spotify:track:5MXe6dKcR6iO33GWzVW8cr,https://api.spotify.com/v1/tracks/5MXe6dKcR6iO...,https://api.spotify.com/v1/audio-analysis/5MXe...,165267,4,(I Can't Help You) I'm Falling Too,[Skeeter Davis],1994-05-23,country-songs
