In [1]:
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import os

# ================= USER SETUP ======================
ROOT_FOLDER_ID = "1XxJfbcsKiDI4Hzq2PEd_CbAcwLF8iZLO"  # main folder ID
OUTPUT_DIR     = "./gdrive_mirror"                    # downloads here
MAX_THREADS    = 12                                    # set higher for faster
# ====================================================

os.makedirs(OUTPUT_DIR, exist_ok=True)

# Authenticate (uses token after first time)
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)


def walk_folder(folder_id, path=""):
    """Recursively list files and preserve structure."""
    query = f"'{folder_id}' in parents and trashed=false"
    file_list = drive.ListFile({'q': query}).GetList()
    items = []

    for item in file_list:
        local_path = os.path.join(path, item['title'])

        # If it's a subfolder ‚Üí recurse deeper
        if item['mimeType'] == "application/vnd.google-apps.folder":
            items += walk_folder(item['id'], local_path)
        else:
            items.append((item['id'], local_path))

    return items


def download_file(file_id, local_path):
    """Safely download one file, skip if exists."""
    full_path = os.path.join(OUTPUT_DIR, local_path)
    os.makedirs(os.path.dirname(full_path), exist_ok=True)

    # Skip if already downloaded
    if os.path.exists(full_path) and os.path.getsize(full_path) > 0:
        return "skip"

    f = drive.CreateFile({'id': file_id})
    f.GetContentFile(full_path)
    return "done"


# ================= RUN ======================
print("\n‚è≥ Scanning Google Drive structure...")
files = walk_folder(ROOT_FOLDER_ID)
print(f"üìÅ Found {len(files)} files to download\n")

# Parallel download
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
    futures = {executor.submit(download_file, fid, path): path for fid, path in files}

    for future in tqdm(as_completed(futures), total=len(files), desc="Downloading"):
        status = future.result()
# ============================================

print(f"\n\n‚úî COMPLETE ‚Äî files saved to:\n{os.path.abspath(OUTPUT_DIR)}\n")

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=404189088942-ope6hdic3ocscnrf12u1hmraot0s847q.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=online&response_type=code

Authentication successful.

‚è≥ Scanning Google Drive structure...
üìÅ Found 8832 files to download



Downloading: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8832/8832 [05:36<00:00, 26.24it/s]



‚úî COMPLETE ‚Äî files saved to:
/Users/jjmurdock/Desktop/MIT/Deep_Learning/gdrive_mirror




