In [2]:
import requests
import json
import os
import time

from api_secrets import JAMENDO_CLIENT

## Guardar Cover Album

In [3]:
# Set your API keys here
JAMENDO_CLIENT_ID = JAMENDO_CLIENT

# Set the number of tracks you want to fetch per request
TRACK_LIMIT = 200

# Create a directory to store the cover art images
os.makedirs('cover_art', exist_ok=True)

# Initialize the dataset and cover art URL counter
dataset = []
album_cover_art_counter = {}

# Fetch tracks from Jamendo API using pagination
page = 1
total_tracks = 0
desired_total_tracks = 1000
offset = 0


In [30]:
while True:
    # Construct the API request URL
    jamendo_url = f'https://api.jamendo.com/v3.0/tracks/?client_id={JAMENDO_CLIENT_ID}&format=json&type=single&limit={TRACK_LIMIT}&offset={total_tracks}'

    # Fetch tracks for the current page
    jamendo_response = requests.get(jamendo_url)
    jamendo_data = jamendo_response.json()

    # Check if there are no more tracks
    if not jamendo_data['results']:
        break

    # Check if the new iteration has any new tracks
    new_ids = set([track['id'] for track in jamendo_data['results']])
    previous_ids = set([track['id'] for track in dataset])
    jamendo_filtered_data = [track for track in jamendo_data['results'] if track['id'] not in previous_ids]

    if len(jamendo_filtered_data) == 0:
        break

    # Iterate through the tracks and fetch cover art
    for track in jamendo_filtered_data:
        # Prepare track data
        track_data = {
            'id': track['id'],
            'name': track['name'],
            'artist': track['artist_name'],
            'releasedate': track['releasedate'],
            'album': track['album_name'],
            'audio_url': track['audio'],
            'audio_download_url': track['audiodownload'],
            'cover_art_url': track["album_image"],
            'audiodownload_allowed': track['audiodownload_allowed']
        }

        # Check if the audio is downloadable and if there is a cover_art_url
        if track_data['audiodownload_allowed']:
            # Check if the cover art URL has already been encountered for more than three songs
            if album_cover_art_counter.get(track_data['album'], 0) == 0:
                # Download the cover art and save it to the 'cover_art' directory
                cover_art_response = requests.get(track_data['cover_art_url'])
                cover_art_file = os.path.join('cover_art', f'{track_data["id"]}.jpg')
                with open(cover_art_file, 'wb') as f:
                    f.write(cover_art_response.content)
                track_data['cover_art_file'] = cover_art_file

                # Increment the cover art URL counter
                album_cover_art_counter[track_data['album']] = album_cover_art_counter.get(track_data['album'], 0) + 1

                # Add the track data to the dataset
                dataset.append(track_data)
                total_tracks += 1

                # Check if the desired dataset size is reached
                if total_tracks >= desired_total_tracks:
                    break

    # Check if the desired dataset size is reached
    if total_tracks >= desired_total_tracks:
        break

    # Increment the page number for the next request
    page += 1
    offset = TRACK_LIMIT*(page - 1) + 1

    # Sleep the call to avoid being disconected
    time.sleep(30)



KeyboardInterrupt: 

In [31]:
print(offset)
print(page)

35001
176


In [32]:
# Save the dataset to a JSON file
with open('dataset.json', 'w') as f:
    json.dump(dataset, f, indent=2)

In [4]:
os.makedirs('music_tracks', exist_ok=True)

## Guardar canciones

In [5]:
def download_song(url, save_path):
    response = requests.get(url)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            file.write(response.content)
        print("Download completed successfully.")
    else:
        print("Failed to download the song.")

In [7]:
with open("./dataset.json", "rb") as f:
    dataset = json.load(f)

In [18]:
for track in dataset:
    id = track['id']
    song_url = track['audio_download_url']
    save_location = f"./music_tracks/{id}.mp3"
    download_song(song_url, save_location)
    time.sleep(5)

Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download completed successfully.
Download c