# Generate necessary folder structure and download everything

In [None]:
import os
import requests
from bs4 import BeautifulSoup

# Ziel-URL von Kaggle
URL = "https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database/data"
DOWNLOAD_FOLDER = "COVID-19_Radiography_Dataset"

def create_folder(folder):
    """Erstellt den Zielordner, falls er nicht existiert."""
    if not os.path.exists(folder):
        os.makedirs(folder)

def get_image_urls(url):
    """Scrapt die Seite und extrahiert alle Bild-URLs."""
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print("Fehler beim Abrufen der Webseite")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    image_urls = []

    # Suche nach div-Containern mit Bildern
    for div in soup.find_all("div", class_="sc-jXGfdH ka-dkW"):
        img_tag = div.find("img")
        if img_tag and "src" in img_tag.attrs:
            image_urls.append(img_tag["src"])

    return image_urls

def download_images(image_urls, folder):
    """Lädt Bilder herunter und speichert sie in einem Ordner."""
    for url in image_urls:
        filename = os.path.join(folder, url.split("/")[-1].split("?")[0])

        if os.path.exists(filename):
            print(f"Bild existiert bereits: {filename}")
            continue

        print(f"Lade herunter: {filename}")
        response = requests.get(url, stream=True)

        if response.status_code == 200:
            with open(filename, "wb") as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
        else:
            print(f"Fehler beim Herunterladen: {url}")

# Hauptablauf
create_folder(DOWNLOAD_FOLDER)
image_urls = get_image_urls(URL)
download_images(image_urls, DOWNLOAD_FOLDER)

print("Fertig!")