In [2]:
import gdown
import os
import py7zr
import pandas as pd
from tqdm import tqdm

def download_extract_and_process(file_id, output_dir):
    """
    Download a file from Google Drive, extract it if it's an archive,
    and process CSV files in the extracted directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    url = f"https://drive.google.com/uc?id={file_id}"
    output_file = os.path.join(output_dir, "downloaded_file")
    
    print(f"Downloading file...")
    try:
        gdown.download(url, output_file, quiet=False)
    except Exception as e:
        print(f"Error downloading file: {e}")
        return None

    # Attempt to extract if it's an archive
    try:
        with py7zr.SevenZipFile(output_file, mode='r') as z:
            z.extractall(path=output_dir)
        print(f"File extracted successfully.")
        os.remove(output_file)  # Remove the archive file after extraction
    except Exception as e:
        print(f"Extraction failed or file is not an archive: {e}")
        print("Will process the file directly.")
    rename_folders(output_dir + "/Datasets")

def rename_folders(base_dir):
    """
    Rename folders in the base directory to sequential numbers.
    """
    folders = [f for f in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, f))]
    
    print(f"Renaming folders in {base_dir}...")
    for i, folder in enumerate(folders, start=1):
        old_path = os.path.join(base_dir, folder)
        new_path = os.path.join(base_dir, str(i))
        os.rename(old_path, new_path)
        print(f"Renamed '{folder}' to '{i}'")
    
    print(f"Renamed {len(folders)} folders.")

# Example usage
file_id = "1ZAYkCRI9vf67OvggoOYl7k22S7fp8Dq9"
output_dir = "data"

# Download and extract (if needed)
download_extract_and_process(file_id, output_dir)

# Rename folders


Downloading file...


Downloading...
From (original): https://drive.google.com/uc?id=1ZAYkCRI9vf67OvggoOYl7k22S7fp8Dq9
From (redirected): https://drive.google.com/uc?id=1ZAYkCRI9vf67OvggoOYl7k22S7fp8Dq9&confirm=t&uuid=5e5333a3-a7a4-4b28-8aa7-4b0baffb0621
To: e:\LogicInsights\data\downloaded_file
100%|██████████| 1.17G/1.17G [05:06<00:00, 3.83MB/s]


File extracted successfully.
Renaming folders in data/Datasets...
Renamed 'archive' to '1'
Renamed 'CICIoT2023' to '2'
Renamed 'CSV-01-12' to '3'
Renamed 'GeneratedLabelledFlows (1)' to '4'
Renamed 'OneDrive_1_7-17-2024' to '5'
Renamed 5 folders.
