# Similar Artists to Artists I Like in My Last.fm

In [2]:
import pandas as pd

# Load the two files
df_liked = pd.read_csv('data/liked.csv')
df_liked_albums = pd.read_csv('data/liked_albums.csv')

# Extract the first artist from 'Artist Name(s)' and combine the two datasets
df_liked['Artist'] = df_liked['Artist Name(s)'].str.split(',').str[0]
df_liked_albums['Artist'] = df_liked_albums['Artist Name(s)'].str.split(',').str[0]

# Combine the unique artists from both datasets
unique_artists = pd.concat([df_liked['Artist'], df_liked_albums['Artist']]).unique()

# Output the number of unique artists
print(f"Number of unique artists: {len(unique_artists)}")


Number of unique artists: 2126


In [None]:
import pandas as pd
import csv
import requests
from typing import List
from concurrent.futures import ThreadPoolExecutor, as_completed
from time import sleep
from datetime import datetime
import os

class LastFMAPI:
    def __init__(self, api_key: str, username: str, rate_limit_delay: float = 0.25):
        self.api_key = api_key
        self.username = username
        self.base_url = "http://ws.audioscrobbler.com/2.0/"
        self.rate_limit_delay = rate_limit_delay
        
    def _make_request(self, params: dict) -> dict:
        sleep(self.rate_limit_delay)
        try:
            response = requests.get(self.base_url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"API request failed for {params.get('artist')}: {e}")
            return None

    def get_similar_artists(self, artist_name: str, limit: int = 5) -> List[str]:
        params = {
            'method': 'artist.getsimilar',
            'artist': artist_name,
            'api_key': self.api_key,
            'format': 'json',
            'limit': limit
        }
        data = self._make_request(params)
        if data and 'similarartists' in data:
            return [artist['name'] for artist in data['similarartists'].get('artist', [])]
        return []

def export_similar_artists(api_key: str, username: str, output_file: str = 'data/liked_artists_only_similar.csv'):
    df_liked = pd.read_csv('data/liked.csv')
    df_liked_albums = pd.read_csv('data/liked_albums.csv')
    
    df_liked['Artist'] = df_liked['Artist Name(s)'].str.split(',').str[0].str.strip()
    df_liked_albums['Artist'] = df_liked_albums['Artist Name(s)'].str.split(',').str[0].str.strip()
    
    unique_artists = set(pd.concat([df_liked['Artist'], df_liked_albums['Artist']]).unique())
    
    existing_artists = set()
    if os.path.exists(output_file):
        existing_df = pd.read_csv(output_file)
        existing_artists = set(existing_df['Artist'].str.strip())
    
    new_artists = list(unique_artists - existing_artists)
    
    if not new_artists:
        print("No new artists found. Skipping API calls.")
        return
        
    if len(new_artists) <= 30:
        print("New artists to process:")
        for artist in new_artists:
            print(f"- {artist}")
    else:
        print(f"Processing {len(new_artists)} new artists...")
    
    api = LastFMAPI(api_key, username)
    processed_count = 0
    
    mode = 'a' if os.path.exists(output_file) else 'w'
    with open(output_file, mode, newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['Artist', 'Similar Artists'])
        if mode == 'w':
            writer.writeheader()
            
        for artist in new_artists:
            similar_artists = api.get_similar_artists(artist)
            writer.writerow({
                'Artist': artist,
                'Similar Artists': ', '.join(similar_artists)
            })
            processed_count += 1
            
            if len(new_artists) > 30 and processed_count % 50 == 0:
                print(f"Processed {processed_count} artists...")
    
    print(f"Complete! Added {len(new_artists)} new artists to the database.")

if __name__ == "__main__":
    API_KEY = '74a510ecc9fc62bf3e0edc6adc2e99f9'
    USERNAME = 'Strusz_Music'
    export_similar_artists(API_KEY, USERNAME)


Phase 2: Finding similar artists for 2126 unique artists...
API request failed: Expecting value: line 1 column 1 (char 0)
