## Testing the NMF Similar Artist API

In [16]:
import csv
import requests
from typing import Dict, Optional, Set, List
from time import sleep
from datetime import datetime
import pandas as pd
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import os 

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class LastFMAPI:
    def __init__(self, api_key: str, username: str):
        self.api_key = api_key
        self.username = username
        self.base_url = "http://ws.audioscrobbler.com/2.0/"
        self.rate_limit_delay = 0.25
        self.max_delay = 5.0  # Maximum delay in seconds
        
    def _make_request(self, params: Dict) -> Optional[Dict]:
        try:
            sleep(self.rate_limit_delay)
            response = requests.get(self.base_url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            logging.error(f"API request failed: {e}")
            if response.status_code == 429:
                logging.warning("Rate limit hit, increasing delay")
                self.rate_limit_delay = min(self.rate_limit_delay * 2, self.max_delay)
            return None

    def get_similar_artists(self, artist_name: str, limit: int = 5) -> List[str]:
        params = {
            'method': 'artist.getsimilar',
            'artist': artist_name,
            'api_key': self.api_key,
            'format': 'json',
            'limit': limit
        }
        data = self._make_request(params)
        if data and 'similarartists' in data:
            return [artist['name'] for artist in data['similarartists'].get('artist', [])]
        return []

def collect_primary_artists(df_nmf: pd.DataFrame) -> Set[str]:
    # Group by 'Album Name' to get unique albums
    unique_albums = df_nmf.groupby('Album Name')['Artist Name(s)'].first().reset_index()
    
    # Collect the first artist for each album
    primary_artists = set(unique_albums['Artist Name(s)'])
    
    return primary_artists

def collect_similar_artists_for_nmf(df_nmf: pd.DataFrame, api: LastFMAPI) -> Dict[str, List[str]]:
    primary_artists = collect_primary_artists(df_nmf)
    similar_artists_dict = {}
    
    # Use ThreadPoolExecutor to parallelize API requests
    with ThreadPoolExecutor(max_workers=5) as executor:
        future_to_artist = {executor.submit(api.get_similar_artists, artist): artist for artist in primary_artists}
        for future in as_completed(future_to_artist):
            artist = future_to_artist[future]
            try:
                similar_artists = future.result()
                similar_artists_dict[artist] = similar_artists
                logging.info(f"Processed similar artists for {artist}")
            except Exception as e:
                logging.error(f"Error processing {artist}: {e}")
    
    return similar_artists_dict

def export_similar_artists(api_key: str, username: str, df_nmf: pd.DataFrame, output_file: str = 'data/nmf_artist_adjacent.csv'):
    api = LastFMAPI(api_key, username)
    similar_artists_dict = collect_similar_artists_for_nmf(df_nmf, api)
    
    try:
        with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=['Artist', 'Similar Artists'])
            writer.writeheader()
            
            for artist, similar_artists in similar_artists_dict.items():
                # Ensure that multi-artist names are properly quoted
                artist_name = f'"{artist}"' if ',' in artist else artist
                similar_artists_names = ', '.join([f'"{similar}"' if ',' in similar else similar for similar in similar_artists])
                
                writer.writerow({
                    'Artist': artist_name,
                    'Similar Artists': similar_artists_names
                })
        
        logging.info(f"\nExport complete! Processed {len(similar_artists_dict)} artists.")
        
    except Exception as e:
        logging.error(f"Fatal error during export: {e}")

# Example usage
if __name__ == "__main__":
    # Directly provide the API key and username here
    API_KEY = '74a510ecc9fc62bf3e0edc6adc2e99f9'  # Your API key
    USERNAME = 'your_lastfm_username'  # Your Last.fm username

    if not API_KEY or not USERNAME:
        raise ValueError("Please provide a valid Last.fm API key and username.")

    # Load the NMF playlist data from 'data/nmf.csv'
    try:
        df_nmf = pd.read_csv('data/nmf.csv')  # Ensure this path is correct
    except FileNotFoundError:
        logging.error("The file 'data/nmf.csv' was not found. Please check the path.")
        exit(1)
    
    # Export similar artists
    export_similar_artists(API_KEY, USERNAME, df_nmf)


2025-02-09 13:19:31,577 - INFO - Processed similar artists for Rats On Rafts
2025-02-09 13:19:31,582 - INFO - Processed similar artists for Adwaith
2025-02-09 13:19:31,607 - INFO - Processed similar artists for Swept Away Original Broadway Cast,The Avett Brothers
2025-02-09 13:19:31,608 - INFO - Processed similar artists for Krept & Konan
2025-02-09 13:19:31,652 - INFO - Processed similar artists for Shannon Wright
2025-02-09 13:19:33,888 - INFO - Processed similar artists for The Bird Calls
2025-02-09 13:19:33,892 - INFO - Processed similar artists for NATTI NATASHA
2025-02-09 13:19:33,953 - INFO - Processed similar artists for Biig Piig
2025-02-09 13:19:33,958 - INFO - Processed similar artists for Tony Allen,La BOA
2025-02-09 13:19:33,978 - INFO - Processed similar artists for Diffrent
2025-02-09 13:19:36,214 - INFO - Processed similar artists for Zelooperz,Real Bad Man
2025-02-09 13:19:36,237 - INFO - Processed similar artists for The Ocelots
2025-02-09 13:19:36,280 - INFO - Proces