In [None]:
""""
This script downloads the necessary processed datasets and trained models for the lab

"""

from pathlib import Path

# Set project root as parent of the notebooks folder
PROJECT_ROOT = Path.cwd().parent
local_data_dir = PROJECT_ROOT / "data" / "processed"
local_model_dir = PROJECT_ROOT / "data" / "models"
local_layer_dir = PROJECT_ROOT / "data" / "layers"

local_data_dir.mkdir(parents=True, exist_ok=True)
local_model_dir.mkdir(parents=True, exist_ok=True)
local_layer_dir.mkdir(parents=True, exist_ok=True)

# Public Google Drive file IDs (ensure these match your shared links)
data_files = {
    "feature_engineered_eval.csv": "1fH2TXIyuK_ZlMgT4tjHGGHTdZxv3z6rv",
    "feature_engineered_holdout.csv": "12H0hyDcIXOyrNNTctWxFge72WqWInji9",
    "feature_engineered_train.csv": "10RsZsdGmFzNbFRByyEwnxP1kGxBvRUm6",
}

model_files = {
    "lgbm_model.pkl": "1YzOpqBNc4nDwS04GlUfZDgO1T41wLauT",
    "lgbm_best_model.pkl": "1uuT8Mjoj5QKhm3yV6zByODVWwnlGb3Fi" 
}

layer_files = {
    "core-layer.zip": "14JBDCVMfJOr8vydbnEazBh2P4_ZY0s7X", 
    "ml-layer.zip": "1l39v2VcZseQMrwGXONdAywh2e2nINgI7",
    "src.zip": "1ttd5YTVSsdvdFaA27toQd5jny2IewYUt",
}

# Simple downloader for public Drive files using gdown
import sys, subprocess
import gdown

for filename, file_id in data_files.items():
    destination = local_data_dir / filename
    if destination.exists():
        print(f"⚠️ Skipping {filename}, already exists at {destination}")
        continue
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    print(f"⬇️ Downloading {filename} to {destination}")
    gdown.download(url, str(destination), quiet=False)

for filename, file_id in model_files.items():
    destination = local_model_dir / filename
    if destination.exists():
        print(f"⚠️ Skipping {filename}, already exists at {destination}")
        continue
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    print(f"⬇️ Downloading {filename} to {destination}")
    gdown.download(url, str(destination), quiet=False)

for filename, file_id in model_files.items():
    destination = local_model_dir / filename
    if destination.exists():
        print(f"⚠️ Skipping {filename}, already exists at {destination}")
        continue
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    print(f"⬇️ Downloading {filename} to {destination}")
    gdown.download(url, str(destination), quiet=False)

for filename, file_id in layer_files.items():
    destination = local_layer_dir / filename
    if destination.exists():
        print(f"⚠️ Skipping {filename}, already exists at {destination}")
        continue
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    print(f"⬇️ Downloading {filename} to {destination}")
    gdown.download(url, str(destination), quiet=False)

print('✅ Download step complete. Check', local_data_dir)


: 