# Install libs

In [1]:
# !pip install ...

# Import libs

In [None]:
import torch

# Glabal variables and preferences

In [None]:
random_state = 69

shared_folder = "data"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


# Download Data
The download may take 5-7 minutes.

In [None]:
import requests
import zipfile
import os
from urllib.parse import urlencode


def download_and_unzip_yandex_disk(public_url, extract_to_folder):
    """
    Downloads and unzips a file from Yandex.Disk using a public link.

    :param public_url: Public URL to the file on Yandex.Disk.
    :param extract_to_folder: The folder where the archive will be extracted.
    """
    print(f"Starting to process link: {public_url}")

    # 1. Get the direct download link from the Yandex.Disk API
    base_api_url = "https://cloud-api.yandex.net/v1/disk/public/resources/download?"
    api_url = base_api_url + urlencode(dict(public_key=public_url))

    try:
        response = requests.get(api_url)
        response.raise_for_status()  # Check for HTTP errors
        download_url = response.json().get("href")

        if not download_url:
            print(f"Error: Could not get a direct download link for {public_url}")
            return

        print("Direct download link obtained.")

        # 2. Download the file
        print("Downloading file...")
        download_response = requests.get(download_url)
        download_response.raise_for_status()

        # Temporary name for the zip file
        zip_filename = "temp_download.zip"

        with open(zip_filename, "wb") as f:
            f.write(download_response.content)
        print(f"File '{zip_filename}' downloaded successfully.")

        # 3. Unzip the archive
        print(f"Unzipping archive to folder '{extract_to_folder}'...")
        if not os.path.exists(extract_to_folder):
            os.makedirs(extract_to_folder)

        with zipfile.ZipFile(zip_filename, "r") as zip_ref:
            zip_ref.extractall(extract_to_folder)
        print(f"Files successfully unzipped to '{extract_to_folder}'.")

        # 4. Remove the downloaded zip archive
        os.remove(zip_filename)
        print(f"Temporary file '{zip_filename}' has been removed.")

    except requests.exceptions.RequestException as e:
        print(f"A network or API error occurred: {e}")
        print("Tip: Make sure your environment can access 'cloud-api.yandex.net'.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


# --- MAIN SCRIPT ---
# Define the files to be downloaded
files_to_download = {
    "train": {
        "url": "https://disk.yandex.ru/d/RRXJu9ZtEmSXzQ",  # Use the standard share link
        "folder": shared_folder,
    },
    "test": {
        "url": "https://disk.yandex.ru/d/TmbB7BsGzg1dQQ",  # Use the standard share link
        "folder": shared_folder,
    },
}

# Start the process for each file
for key, data in files_to_download.items():
    download_and_unzip_yandex_disk(data["url"], data["folder"])
    print("-" * 20)

print("\nAll operations completed!")

Starting to process link: https://disk.yandex.ru/d/RRXJu9ZtEmSXzQ
Direct download link obtained.
Downloading file...
File 'temp_download.zip' downloaded successfully.
Unzipping archive to folder 'data'...
Files successfully unzipped to 'data'.
Temporary file 'temp_download.zip' has been removed.
--------------------
Starting to process link: https://disk.yandex.ru/d/TmbB7BsGzg1dQQ
Direct download link obtained.
Downloading file...
File 'temp_download.zip' downloaded successfully.
Unzipping archive to folder 'data'...
Files successfully unzipped to 'data'.
Temporary file 'temp_download.zip' has been removed.
--------------------

All operations completed!
