In [0]:
import pandas as pd
import requests
import time
from typing import Optional, Dict
from pprint import pprint

API_KEY = "3cb8a7d2719bb8f379dd52d7c86ff3e3"
BASE = "https://ws.audioscrobbler.com/2.0/"



In [0]:
def get_track_info(
    api_key: str,
    artist: str,
    track: str,
    timeout: int = 30
) -> Dict[str, Optional[str]]:
    """
    Query Last.fm for track metadata and MBIDs.

    Parameters
    ----------
    api_key : str
        Your Last.fm API key.
    artist : str
        Artist name (free text).
    track : str
        Track title (free text).
    timeout : int
        Request timeout in seconds.

    Returns
    -------
    dict
        {
            "track_name": str,
            "artist_name": str,
            "track_mbid": Optional[str],
            "artist_mbid": Optional[str]
        }

    Raises
    ------
    requests.HTTPError
        If the HTTP request fails.
    KeyError
        If the response structure is unexpected.
    """

    params = {
        "method": "track.getInfo",
        "api_key": api_key,
        "artist": artist,
        "track": track,
        "autocorrect": 1,  # improves canonical matching
        "format": "json",
    }

    response = requests.get(BASE, params=params, timeout=timeout)
    response.raise_for_status()
    data = response.json()

    track_data = data["track"]
    pprint(data["track"])

    return {
        "track_name": track_data["name"],
        "artist_name": track_data["artist"]["name"],
        "track_mbid": track_data.get("mbid") or None,
        "artist_mbid": track_data["artist"].get("mbid") or None,
        "duration": track_data["duration"] or None, 
        "toptags": [tag["name"] for tag in track_data["toptags"]["tag"]] or None
    }

In [0]:

info = get_track_info(
    api_key=API_KEY,
    artist="Major Lazer",
    track="Cold Water"
)

print(info)

In [0]:
info.get("track_name")

In [0]:
df = pd.read_csv("/Workspace/AUS vs US Music Taste/AUS-vs-US-Music-Taste-Data-Project/Data/Bronze/Aria_bronze/Aria_2016.csv")

display(df)

In [0]:
import time



enriched_rows = []

for _, row in df.iterrows():
    try:
        info = get_track_info(
            api_key=API_KEY,
            artist=row["artist"],
            track=row["song_title"]
        )
    except Exception as e:
        # Hard fail protection â€“ don't kill the whole run
        info = {
            "track_name": None,
            "artist_name": None,
            "track_mbid": None,
            "artist_mbid": None,
            "duration": None,
            "toptags": None,
        }

    enriched_rows.append(info)

    time.sleep(0.2)  # IMPORTANT: be nice to the API

In [0]:
enriched_df = pd.DataFrame(enriched_rows)

final_df = pd.concat([df.reset_index(drop=True), enriched_df], axis=1)

In [0]:
display(enriched_df)

In [0]:
display(final_df)