In [None]:
import gdown
from tqdm import tqdm
import zipfile
import os

def download_and_extract(url, output, extract=True):
    """
    Download a file from Google Drive and optionally extract it if it's a zip file.
    Shows a progress bar for both download and extraction.
    """
    # Download the file
    print(f"Downloading {output}...")
    with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=output) as progress_bar:
        gdown.download(url, output, quiet=False, callback=lambda x: progress_bar.update(x))
    
    if extract and output.endswith('.zip'):
        # Extract the file if it's a zip
        print(f"\nExtracting {output}...")
        with zipfile.ZipFile(output, 'r') as zip_ref:
            total = sum((file.file_size for file in zip_ref.infolist()))
            extracted = 0
            with tqdm(total=total, unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc="Extracting") as progress_bar:
                for file in zip_ref.infolist():
                    zip_ref.extract(file)
                    extracted += file.file_size
                    progress_bar.update(file.file_size)
        
        #remove the zip file after extraction
        os.remove(output)
        print(f"\n{output} has been extracted and the zip file has been removed.")
    else:
        print(f"\n{output} has been downloaded successfully.")

''' Usage example
url = "https://drive.google.com/uc?id=YOUR_FILE_ID"
output = "dataset.zip"  # Change this to your desired filename

download_and_extract(url, output) '''

In [3]:
import tarfile
import os
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

def extract_member(tar, member, extract_to):
    """
    Extracts a single member from the tar file.
    
    Args:
        tar (tarfile.TarFile): The tarfile object.
        member (tarfile.TarInfo): The member to extract.
        extract_to (str): Directory where the contents should be extracted.
        
    Returns:
        str: The name of the extracted file.
    """
    tar.extract(member, path=extract_to)
    return member.name

def extract_tar_gz(tar_gz_path, extract_to, num_threads=4):
    """
    Extracts a .tar.gz file to a specified directory with a progress bar and multi-threading.
    
    Args:
        tar_gz_path (str): Path to the .tar.gz file.
        extract_to (str): Directory where the contents should be extracted.
        num_threads (int): Number of threads to use for extraction.
        
    Returns:
        None
    """
    # Ensure the directory exists
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    try:
        with tarfile.open(tar_gz_path, "r:gz") as tar:
            # Get the list of members in the tar file
            members = tar.getmembers()
            
            # Create a progress bar for the extraction process
            with tqdm(total=len(members), desc="Extracting", unit="file") as pbar:
                # Create a ThreadPoolExecutor for multi-threading
                with ThreadPoolExecutor(max_workers=num_threads) as executor:
                    futures = []
                    
                    for member in members:
                        futures.append(executor.submit(extract_member, tar, member, extract_to))
                    
                    for future in as_completed(futures):
                        try:
                            future.result()  # To raise any exceptions from the threads
                            pbar.update(1)
                        except Exception as e:
                            print(f"Error extracting member: {e}")

        print(f"Extracted {tar_gz_path} to {extract_to}")
    except tarfile.TarError as e:
        print(f"Error extracting {tar_gz_path}: {e}")

# Example usage:
extract_tar_gz('DDoS-ACK_Fragmentation.tar.gz', './out')


KeyboardInterrupt: 