In [1]:
import asyncio
import zipfile
from pathlib import Path

In [58]:
# set up and clean up the folders
folders = [Path(".") / "data" / "zip", Path(".") / "data" / "unzip"]
import shutil

for folder in folders:
    if not folder.exists():
        folder.mkdir()
    else:
        shutil.rmtree(folder)
        folder.mkdir()

In [59]:
# make many zip files
source = Path(".") / "data" / "zip_10MB.zip"
location = Path(".") / "data" / "zip"

for i in range(10):
    with zipfile.ZipFile(source, "r") as zin, zipfile.ZipFile(
        location / f"zip_{i}.zip", "w"
    ) as zout:
        for item in zin.infolist():
            # Split the path into parts
            parts = item.filename.split("/")
            if parts[0]:  # If there's a top-level folder
                parts[0] = f"{parts[0]}_{i}"
            new_filename = "/".join(parts)
            # Create a new ZipInfo object with the new filename
            new_item = zipfile.ZipInfo(new_filename)
            # Copy over date_time and external_attr (permissions)
            new_item.date_time = item.date_time
            new_item.external_attr = item.external_attr
            if item.is_dir():
                zout.writestr(new_item, b"")  # Create directory entry
            else:
                buffer = zin.read(item.filename)
                zout.writestr(new_item, buffer)

### process

In [55]:
def unzip_file(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Unzipped {zip_path} to {extract_to}")


async def async_unzip(zip_path, extract_to):
    await asyncio.to_thread(unzip_file, zip_path, extract_to)

In [56]:
async def process_zip_files(zip_files, extract_to):
    tasks = []
    for zip_file in zip_files:
        tasks.append(async_unzip(zip_file, extract_to))
    await asyncio.gather(*tasks)

In [61]:
zip_files = list((Path(".") / "data" / "zip").glob("*.zip"))
extract_to = Path(".") / "data" / "unzip"
for file in zip_files:
    with zipfile.ZipFile(file, "r") as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Unzipped {file} to {extract_to}")

Unzipped data\zip\zip_0.zip to data\unzip
Unzipped data\zip\zip_1.zip to data\unzip
Unzipped data\zip\zip_2.zip to data\unzip
Unzipped data\zip\zip_3.zip to data\unzip
Unzipped data\zip\zip_4.zip to data\unzip
Unzipped data\zip\zip_5.zip to data\unzip
Unzipped data\zip\zip_6.zip to data\unzip
Unzipped data\zip\zip_7.zip to data\unzip
Unzipped data\zip\zip_8.zip to data\unzip
Unzipped data\zip\zip_9.zip to data\unzip


In [62]:
await process_zip_files(zip_files, extract_to)

Unzipped data\zip\zip_8.zip to data\unzip
Unzipped data\zip\zip_2.zip to data\unzip
Unzipped data\zip\zip_6.zip to data\unzip
Unzipped data\zip\zip_1.zip to data\unzip
Unzipped data\zip\zip_7.zip to data\unzip
Unzipped data\zip\zip_9.zip to data\unzip
Unzipped data\zip\zip_0.zip to data\unzip
Unzipped data\zip\zip_3.zip to data\unzip
Unzipped data\zip\zip_5.zip to data\unzip
Unzipped data\zip\zip_4.zip to data\unzip


In [63]:
# set up and clean up the folders
folders = [Path(".") / "data" / "zip", Path(".") / "data" / "unzip"]
import shutil

for folder in folders:
    if not folder.exists():
        folder.mkdir()
    else:
        shutil.rmtree(folder)
        folder.mkdir()