In [0]:
import requests
import json
from datetime import datetime  # Import the datetime module


In [0]:
token = dbutils.widgets.get('token')
artist_id_index = dbutils.widgets.get('artist_id_index')
artist_id_index = int(artist_id_index)
headers = {"Authorization": token}
display(headers) 

# Assuming you have a CSV file uploaded to Databricks File System (DBFS)
csv_file_path = "/mnt/spotifyetlprojectdl/raw/raw-artist-ids/ds_raw_artist_ids.csv"

# Read CSV file into a DataFrame
df = spark.read.csv(csv_file_path, header=True)

# Display the DataFrame to see its structure
df.show()

# Collect album IDs as a list
artist_ids = df.select("ArtistID").rdd.flatMap(lambda x: x).collect()

# Print each album ID
for artist_id in artist_ids:
    print(artist_id)

In [0]:

start_index = artist_id_index
current_index = start_index  # Initialize current_index

# Specify the destination folder for Exception DBFS
exception_destination_folder = "/dbfs/mnt/ingestion-exception"

def raise_custom_exception(error_message, current_index, response_code, retry_after=None):
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    exception_dict = {
        "error_message": error_message,
        "current_index": str(current_index),
        "response_code": response_code,
        "retry_after": retry_after,
        "timestamp": timestamp
    }
    
    save_exception(exception_dict)
    
    exception_json = json.dumps(exception_dict)
    raise Exception(exception_json)

def save_exception(exception_dict):
    # Generate a timestamp for the unique filename
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")

    # Create unique filenames for Blob Storage and DBFS
    
    dbfs_log_file  = f"{exception_destination_folder}/album_error_logs.json"
    print("Saving to DBFS:", dbfs_log_file)

    # Save the exception log to DBFS using open() and json.dump()
    with open(dbfs_log_file, 'w') as json_file:
        json.dump(exception_dict, json_file, indent=2)
        
        
# Iterate over album IDs
for artist_id in artist_ids[start_index:]:
    offset = 0
    limit = 50
    while True:
        search_url = f"https://api.spotify.com/v1/artists/{artist_id}/albums?include_groups=album%2Csingle&limit={limit}&offset={offset}"
        try:
            result = requests.get(search_url, headers=headers)
            response_code = result.status_code  # Capture the response code
            print(response_code)
            if response_code == 429:
                retry_after = result.headers.get('Retry-After')
                error_message = "Rate limit exceeded."
                raise_custom_exception(error_message, current_index, response_code, retry_after)
                
            elif response_code == 401:
                raise_custom_exception(f"Bad or expired token.", current_index, response_code)
                   
            elif response_code == 200:
                albums = json.loads(result.content)
                timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                #Save album tracks to a JSON file in /mnt/incomming-albums
                filename = f"/dbfs/mnt/incoming-albums/{artist_id}_{offset}_incomming_albums_{timestamp}.json"
                with open(filename, 'w') as json_file:
                    json.dump(albums, json_file, indent=2)
                
                current_index += 1
                offset += len(albums["items"])
                
            if len(albums["items"]) < limit:
                break
                
                   
        except Exception as e:
           
            raise

        
dbutils.notebook.exit("Success")
     
        
        
        
