In [None]:
from pathlib import Path

# Set project root as parent of the notebooks folder
PROJECT_ROOT = Path.cwd().parent
local_data_dir = PROJECT_ROOT / "data" / "processed"
local_model_dir = PROJECT_ROOT / "models"

local_data_dir.mkdir(parents=True, exist_ok=True)
local_model_dir.mkdir(parents=True, exist_ok=True)

# Public Google Drive file IDs (ensure these match your shared links)
data_files = {
    "train_data.csv": "1fH2TXIyuK_ZlMgT4tjHGGHTdZxv3z6rv",
    "validation_data.csv": "12H0hyDcIXOyrNNTctWxFge72WqWInji9",
    "test_data.csv": "10RsZsdGmFzNbFRByyEwnxP1kGxBvRUm6",
}

# Simple downloader for public Drive files using gdown
import sys, subprocess
try:
    import gdown
except Exception:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
    import gdown

for filename, file_id in data_files.items():
    destination = local_data_dir / filename
    if destination.exists():
        print(f"⚠️ Skipping {filename}, already exists at {destination}")
        continue
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    print(f"⬇️ Downloading {filename} to {destination}")
    gdown.download(url, str(destination), quiet=False)

print('✅ Download step complete. Check', local_data_dir)


Downloading train_data.csv to c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed\train_data.csv


Downloading...
From: https://drive.google.com/uc?id=1fH2TXIyuK_ZlMgT4tjHGGHTdZxv3z6rv&export=download
To: c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed\train_data.csv
100%|██████████| 46.7M/46.7M [00:06<00:00, 6.90MB/s]


Downloading validation_data.csv to c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed\validation_data.csv


Downloading...
From: https://drive.google.com/uc?id=12H0hyDcIXOyrNNTctWxFge72WqWInji9&export=download
To: c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed\validation_data.csv
100%|██████████| 46.8M/46.8M [00:07<00:00, 5.85MB/s]


Downloading test_data.csv to c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed\test_data.csv


Downloading...
From (original): https://drive.google.com/uc?id=10RsZsdGmFzNbFRByyEwnxP1kGxBvRUm6&export=download
From (redirected): https://drive.google.com/uc?id=10RsZsdGmFzNbFRByyEwnxP1kGxBvRUm6&export=download&confirm=t&uuid=db955b47-8b2d-40d4-8f38-f08ea714debf
To: c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed\test_data.csv
100%|██████████| 179M/179M [00:28<00:00, 6.37MB/s] 

Download step complete. Check c:\Users\MATIAS\Desktop\git\paris-aws-cloud-club\labs\aws\regression-end-to-end-ml-project\phase 1\data\processed



