In [2]:
import csv
import requests
import os
import time

# Input and output file paths
spotify_data_path = "../../Data/Processed/spotify_data.csv"
deezer_data_path = "../../Data/Raw/deezer_data_v.csv"

# Deezer API base URL
deezer_api_url = "https://api.deezer.com/track/isrc:"

# Ensure the output directory exists
os.makedirs(os.path.dirname(deezer_data_path), exist_ok=True)

# Read ISRCs from the Spotify data file
with open(spotify_data_path, mode='r', encoding='utf-8') as spotify_file:
    reader = csv.DictReader(spotify_file)
    isrcs = [row['external_ids.isrc'] for row in reader if 'external_ids.isrc' in row]

# Query Deezer API and save results
with open(deezer_data_path, mode='a+', encoding='utf-8', newline='') as deezer_file:
    fieldnames = ['id', 'title', 'artist', 'album', 'duration', 'link', 'rank', 'release_date', 'explicit_lyrics', 'isrc']
    writer = csv.DictWriter(deezer_file, fieldnames=fieldnames)
    writer.writeheader()

    for isrc in isrcs[87000:]:
        response = requests.get(deezer_api_url + isrc)
        if response.status_code == 200:
            data = response.json()
            # print(data)
            writer.writerow({
                'id': data.get('id'),
                'title': data.get('title'),
                'artist': data.get('artist', {}).get('name'),
                'album': data.get('album', {}).get('title'),
                'duration': data.get('duration'),
                'link': data.get('link'),
                'rank': data.get('rank'),
                'release_date': data.get('release_date'),
                'explicit_lyrics': data.get('explicit_lyrics'),
                'isrc': data.get('isrc')
            })
        else:
            print(f"Failed to fetch data for ISRC: {isrc}")
        time.sleep(0.1)