### Install boto3

In [0]:
%pip install boto3

### Set AWS configs

In [0]:
import os

os.environ["AWS_ACCESS_KEY_ID"] = "AWS_ACCESS_KEY_ID"
os.environ["AWS_SECRET_ACCESS_KEY"] = "AWS_SECRET_ACCESS_KEY"
os.environ["AWS_REGION"] = "AWS_REGION"

In [0]:
import os
import shutil
import zipfile
import boto3
from datetime import datetime
from botocore.client import Config

# --- CONFIG ---
VOLUME_PATH = "/Volumes/processed/detection_data/processed_volume/output_detections_data/"
S3_BUCKET = "testing-pyspark-sairam"
S3_REGION = "eu-north-1"
ZIP_S3_KEY_PREFIX = "detections_zipped"

# Timestamp for uniqueness
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
ZIP_S3_KEY = f"{ZIP_S3_KEY_PREFIX}/output_detections_{timestamp}.zip"
LOCAL_TMP_DIR = f"/tmp/detection_output_{timestamp}"
LOCAL_ZIP_PATH = f"/tmp/output_detections_{timestamp}.zip"

# --- Step 1: Prepare Local Folder ---
if os.path.exists(LOCAL_TMP_DIR):
    shutil.rmtree(LOCAL_TMP_DIR)
os.makedirs(LOCAL_TMP_DIR, exist_ok=True)

# --- Step 2: Collect All Parquet/CSV Files from Volume ---
files = dbutils.fs.ls(VOLUME_PATH)
data_files = []

for folder in files:
    if folder.name.startswith("file_id="):
        inner_files = dbutils.fs.ls(folder.path)
        for f in inner_files:
            if f.path.endswith(".parquet") or f.path.endswith(".csv"):
                data_files.append(f)

print(f"📁 Found {len(data_files)} data files to download.")

if not data_files:
    raise Exception("❌ No output files found in volume!")

# --- Step 3: Download to Local ---
for f in data_files:
    filename = os.path.basename(f.path)
    local_path = os.path.join(LOCAL_TMP_DIR, filename)
    dbutils.fs.cp(f.path, f"file:{local_path}")

print(f"✅ Downloaded all files to: {LOCAL_TMP_DIR}")

# --- Step 4: Zip All Files ---
with zipfile.ZipFile(LOCAL_ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(LOCAL_TMP_DIR):
        for file in files:
            abs_path = os.path.join(root, file)
            arcname = os.path.relpath(abs_path, LOCAL_TMP_DIR)
            zipf.write(abs_path, arcname=arcname)

print(f"✅ Zipped into: {LOCAL_ZIP_PATH}")

# --- Step 5: Upload ZIP to S3 ---
s3 = boto3.client("s3", region_name=S3_REGION, config=Config(signature_version="s3v4"))
s3.upload_file(LOCAL_ZIP_PATH, S3_BUCKET, ZIP_S3_KEY)
print(f"🚀 Uploaded to S3: s3://{S3_BUCKET}/{ZIP_S3_KEY}")

# --- Step 6: Generate Pre-signed URL (1 Hour Validity) ---
presigned_url = s3.generate_presigned_url(
    "get_object",
    Params={"Bucket": S3_BUCKET, "Key": ZIP_S3_KEY},
    ExpiresIn=3600
)

print(f"\n📦 Download ZIP (valid 1 hour):\n{presigned_url}")
