In [1]:
import os
import json
import time
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

file_path = 'docs/tracks_features.csv'
raw_responses_file_path = 'docs/raw_responses.json'

# Spotify API credentials
client_id = 'your_client_id'
client_secret = 'your_client_secret'

def authenticate_spotify():
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    return spotipy.Spotify(client_credentials_manager=client_credentials_manager)

def save_raw_responses(raw_responses):
    print("Saving raw responses...")
    if os.path.exists(raw_responses_file_path):
        with open(raw_responses_file_path, 'a') as file:
            file.write('\n')
            json.dump(raw_responses, file)
            print("Raw responses saved.")
    else:
        with open(raw_responses_file_path, 'w') as file:
            json.dump(raw_responses, file)
            print("Raw responses saved.")


def load_raw_responses():
    print("Loading raw responses...")
    if os.path.exists(raw_responses_file_path):
        with open(raw_responses_file_path, 'r') as file:
            print("Raw responses loaded.")
            return json.load(file)
    return {}

def calculate_wait_time(positive_responses, total_time):
    if positive_responses == 0:
        return 0 
    return total_time / positive_responses

def process_tracks(sp, tracks):
    if os.path.exists(raw_responses_file_path):
        raw_responses = load_raw_responses()
    else:
        raw_responses = {}
    positive_responses_since_429 = 0
    total_time_since_429 = 0
    wait_time = 120 

    pending_tracks = tracks[~tracks['id'].isin(raw_responses.keys())] 
    
    track_ids = pending_tracks['id'].tolist()
    print(f"Processing {len(track_ids)} tracks...")
    for index in range(0, len(track_ids), 50): 
        batch_track_ids = track_ids[index:index+50]

        try:
            start_time = time.time()
            print(f"Processing {len(batch_track_ids)} tracks, total: {index + len(batch_track_ids)}/{len(track_ids)}")
            batch_responses = sp.tracks(batch_track_ids)
            print("response received")
            save_raw_responses(batch_responses)
            end_time = time.time()
            positive_responses_since_429 += 1
                
            total_time_since_429 += end_time - start_time

            print(f"Processed {len(batch_track_ids)} tracks, total: {index + len(batch_track_ids)}/{len(track_ids)}")
        except TimeoutError as te:
            print(f"TimeoutError: {te}. Retrying...")
            time.sleep(wait_time)
            continue  

        except spotipy.SpotifyException as e:
            if e.http_status == 429: 
                print(f"Rate limit exceeded. Waiting before retrying...")
                time.sleep(wait_time)  
            else:
                print(f"Error processing tracks: {e}")
        except Exception as e:
            print(f"Error processing tracks: {e}")
            continue
    if positive_responses_since_429 >= 10:  
        wait_time = calculate_wait_time(positive_responses_since_429, total_time_since_429)
        print(f"Rate limit calculated: {positive_responses_since_429} positive responses in a row.")
        print(f"Setting waiting time to approximately {wait_time:.2f} seconds between retries.")
        time.sleep(wait_time)
        positive_responses_since_429 = 0  
        total_time_since_429 = 0    


sp = authenticate_spotify()

tracks_df = pd.read_csv(file_path)

process_tracks(sp, tracks_df)

Processing 1204025 tracks...
Processing 50 tracks, total: 50/1204025
