# Training Environment Setup

## Purpose

This notebook **validates your training environment** and exports variables for use in other notebooks.

**What it does:**
1. Gets your username from Databricks session
2. Finds your pre-created catalog (`ecommerce_platform_{username}`)
3. Validates schemas and Volume exist
4. Exports variables: `CATALOG`, `BRONZE_SCHEMA`, `SILVER_SCHEMA`, `GOLD_SCHEMA`, `DATASET_BASE_PATH`

**Prerequisites:**
- Trainer must run `00_pre_config.ipynb` before the training session
- You must be a member of the training group (e.g., `dp_trn_1`)

---

In [0]:
import re

# =============================================================================
# CONFIGURATION
# =============================================================================
CATALOG_PREFIX = "ecommerce_platform"
BRONZE_SCHEMA = "bronze"
SILVER_SCHEMA = "silver"
GOLD_SCHEMA = "gold"
DEFAULT_SCHEMA = "default"
VOLUME_NAME = "datasets"

# =============================================================================
# STEP 1: Get Current User
# =============================================================================
raw_user = spark.sql("SELECT current_user()").first()[0]
print(f"Current user: {raw_user}")

# Create safe catalog name suffix from email
user_slug = re.sub(r'[^a-zA-Z0-9]', '_', raw_user.split('@')[0]).lower()
user_slug = re.sub(r'_+', '_', user_slug).strip('_')
print(f"User slug: {user_slug}")

## Step 2: Find Your Catalog

In [None]:
# =============================================================================
# STEP 2: Find and Validate Catalog
# =============================================================================
CATALOG = f"{CATALOG_PREFIX}_{user_slug}"

# Check if catalog exists
try:
    catalogs = spark.sql("SHOW CATALOGS").collect()
    catalog_names = [row[0] for row in catalogs]
    
    if CATALOG in catalog_names:
        print(f"Catalog found: {CATALOG}")
        spark.sql(f"USE CATALOG {CATALOG}")
    else:
        print(f"ERROR: Catalog '{CATALOG}' not found!")
        print(f"\\nAvailable catalogs matching prefix '{CATALOG_PREFIX}':")
        for c in catalog_names:
            if c.startswith(CATALOG_PREFIX):
                print(f"  - {c}")
        print(f"\\nContact trainer to run 00_pre_config.ipynb\")\n",
        raise Exception(f"Catalog '{CATALOG}' not found")
        
except Exception as e:
    print(f"ERROR: {e}")
    raise

## Step 3: Validate Schemas

In [None]:
# =============================================================================
# STEP 3: Validate Schemas Exist
# =============================================================================
schemas = spark.sql(f"SHOW SCHEMAS IN {CATALOG}").collect()
schema_names = [row[0] for row in schemas]

required_schemas = [BRONZE_SCHEMA, SILVER_SCHEMA, GOLD_SCHEMA, DEFAULT_SCHEMA]
missing_schemas = [s for s in required_schemas if s not in schema_names]

if missing_schemas:
    print(f"ERROR: Missing schemas: {missing_schemas}")
    print("Contact trainer to run 00_pre_config.ipynb")
    raise Exception(f"Missing schemas: {missing_schemas}")
else:
    print(f"All schemas found: {', '.join(required_schemas)}")

In [None]:
## Step 4: Validate Volume and Dataset

In [None]:
# =============================================================================
# STEP 4: Validate Volume and Dataset Files
# =============================================================================
DATASET_BASE_PATH = f"/Volumes/{CATALOG}/{DEFAULT_SCHEMA}/{VOLUME_NAME}"

try:
    files = dbutils.fs.ls(DATASET_BASE_PATH)
    print(f"Volume found: {DATASET_BASE_PATH}")
    print(f"Dataset folders:")
    for f in files:
        print(f"  - {f.name}")
except Exception as e:
    print(f"ERROR: Cannot access Volume at {DATASET_BASE_PATH}")
    print(f"Error: {e}")
    print("\\nContact trainer to run 00_pre_config.ipynb")
    raise

## Step 5: Export Variables

These variables are now available in all notebooks that run `%run ../00_setup`

In [None]:
# =============================================================================
# STEP 5: Export Variables (Summary)
# =============================================================================
print("=" * 60)
print("TRAINING ENVIRONMENT READY")
print("=" * 60)
print()
print(f"User:              {raw_user}")
print(f"CATALOG:           {CATALOG}")
print(f"BRONZE_SCHEMA:     {BRONZE_SCHEMA}")
print(f"SILVER_SCHEMA:     {SILVER_SCHEMA}")
print(f"GOLD_SCHEMA:       {GOLD_SCHEMA}")
print(f"DATASET_BASE_PATH: {DATASET_BASE_PATH}")
print()
print("=" * 60)
print()
print("Example usage in notebooks:")
print(f"  spark.read.csv('{DATASET_BASE_PATH}/customers/customers.csv')")
print(f"  spark.sql('SELECT * FROM {CATALOG}.{BRONZE_SCHEMA}.customers')")