In [1]:
import zipfile
import os
from tqdm import tqdm
import gc  # Garbage collector

def unzip_with_low_memory(zip_path, extract_to):
    os.makedirs(extract_to, exist_ok=True)

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        file_names = zip_ref.namelist()
        total_files = len(file_names)

        print(f"Extracting {total_files} files from {zip_path}...\n")

        for idx, file in enumerate(tqdm(file_names, desc="Unzipping", unit="file")):
            try:
                zip_ref.extract(file, path=extract_to)
            except Exception as e:
                print(f"❌ Error extracting {file}: {e}")
            
            # Free memory every 5000 files
            if idx % 4000 == 0 and idx != 0:
                gc.collect()  # force garbage collection
                print(f"🧹 Cleared RAM at {idx} files")

    print("\n✅ Extraction completed successfully.")

zip_path = 'MachineLearningCSV.zip'       # e.g., 'data/archive.zip'
extract_to = './'    # e.g., 'data/unzipped'

unzip_with_low_memory(zip_path, extract_to)

Extracting 9 files from MachineLearningCSV.zip...



Unzipping: 100%|████████████████████████████████████████████████████████████████████████| 9/9 [00:03<00:00,  2.71file/s]


✅ Extraction completed successfully.



