In [2]:
import os
import requests
from zipfile import ZipFile
from tqdm import tqdm  # for progress bar

def download_irmas_dataset(dataset_url, save_path):
    # Check if the dataset directory exists
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Define the file path for the downloaded dataset
    dataset_filename = os.path.join(save_path, "IRMAS.zip")

    # Check if the dataset file exists
    if not os.path.exists(dataset_filename):
        # If not, download the dataset
        print("Downloading IRMAS dataset...")
        response = requests.get(dataset_url, stream=True)

        # Use tqdm to display a progress bar while downloading
        with open(dataset_filename, "wb") as file, tqdm(
                desc="Downloading", total=int(response.headers.get("content-length", 0))
        ) as bar:
            for data in response.iter_content(chunk_size=1024):
                bar.update(len(data))
                file.write(data)

        print("Download complete.")

        # Extract the downloaded dataset
        extract_dataset(dataset_filename, save_path)
    else:
        print("IRMAS dataset already exists.")


def extract_dataset(dataset_filename, extract_path):
    print("Extracting IRMAS dataset...")
    with ZipFile(dataset_filename, "r") as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete.")


# IRMAS dataset URL
irmas_dataset_url = "https://zenodo.org/records/1290750/files/IRMAS-TrainingData.zip?download=1"

# Specify the directory where you want to save the dataset
save_directory = "dataset"

# Call the function to download the IRMAS dataset
download_irmas_dataset(irmas_dataset_url, save_directory)

Downloading IRMAS dataset...


Downloading: 100%|██████████| 3181049879/3181049879 [03:31<00:00, 15040580.33it/s]


Download complete.
Extracting IRMAS dataset...
Extraction complete.
