<a href="https://colab.research.google.com/github/NatashaKamami/Music-Recommendation-System/blob/main/lastFM_scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import requests
import time

# Last.fm API key
API_KEY = "ea3bde756fd4e8be6fde7c56702b71b6"
BASE_URL = "http://ws.audioscrobbler.com/2.0/"


In [None]:
# Function to fetch song details from Last.fm
def get_song_info(artist, track):
    params = {
        "method": "track.getInfo",
        "api_key": API_KEY,
        "artist": artist,
        "track": track,
        "format": "json"
    }

    try:
        response = requests.get(BASE_URL, params=params)
        data = response.json()

        if "track" in data:
            track_info = data["track"]

            # Extract genre (Top 3 tags)
            tags = track_info.get("toptags", {}).get("tag", [])
            genre = ", ".join([tag["name"] for tag in tags[:3]]) if tags else "Unknown"


            # Extract listeners count (convert to int)
            listeners = track_info.get("listeners", "0")
            listeners = int(listeners) if listeners.isdigit() else 0  # Ensure it's an integer

            # Extract play count (convert to int)
            plays = track_info.get("playcount", "0")
            plays = int(plays) if plays.isdigit() else 0  # Ensure it's an integer

            # Compute recommendation score (plays per listener)
            score = plays / listeners if listeners > 0 else 0  # Avoid division by zero

            return genre, listeners, plays, score

    except Exception as e:
        print(f"Error fetching info for {track} by {artist}: {e}")

    return "Unknown", 0, 0, 0

In [None]:
# Load CSV file
df = pd.read_csv("music_data.csv")

df = df.iloc[2111:3310]

In [None]:
# Ensure new columns exist
df["genre"] = ""
df["listeners"] = 0
df["plays"] = 0
df["replay_score"] = 0.0

# Process each song
for index, row in df.iterrows():
    artist = row["artist"]
    track = row["name"]

    # Fetch song details
    genre, listeners, plays, replay_score = get_song_info(artist, track)

    # Store the results
    df.at[index, "genre"] = genre
    df.at[index, "listeners"] = listeners
    df.at[index, "plays"] = plays
    df.at[index, "replay_score"] = replay_score

    print(f"Processed {index+1}: {track} by {artist} → Genre: {genre}, Listeners: {listeners}, Plays: {plays}, Score: {replay_score:.2f}")

    time.sleep(1)  # Prevent hitting Last.fm's rate limit

Processed 2112: Gettin' You Home by Chris Young → Genre: country, chris young, Love, Listeners: 130965, Plays: 510022, Score: 3.89
Processed 2113: Coke Bottle Body by Coi Leray → Genre: Unknown, Listeners: 4558, Plays: 13487, Score: 2.96
Processed 2114: Dance Grenade by SmukDJ → Genre: Unknown, Listeners: 337, Plays: 1002, Score: 2.97
Processed 2115: Fallen Demon by Yung Lean → Genre: alternative hip-hop, cloud rap, Listeners: 64733, Plays: 558694, Score: 8.63
Processed 2116: Drop by Eric Bellinger → Genre: Unknown, Listeners: 2360, Plays: 6772, Score: 2.87
Processed 2117: Kwama by Bien → Genre: Unknown, Listeners: 399, Plays: 2304, Score: 5.77
Processed 2118: Kuu Kuu by Willy Paul → Genre: Unknown, Listeners: 170, Plays: 879, Score: 5.17
Processed 2119: Can't Let You Go (The Sha La Song) by Dave Koz → Genre: beautiful, saxophone, relaxing, Listeners: 4959, Plays: 18787, Score: 3.79
Processed 2120: My Day by Tarrus Riley → Genre: Unknown, Listeners: 1102, Plays: 4295, Score: 3.90
Proce

In [None]:
# Save the updated dataset
df.to_csv("music_data_with_extra_info5.csv", index=False)
print("File saved as 'music_data_with_extra_info5.csv'.")

File saved as 'music_data_with_extra_info5.csv'.
