In [0]:
%run ./Setup-Common

In [0]:
# Configuration for Databricks AI Training
# Using catalog, schema, and volume for Databricks Free Edition

CATALOG_NAME = "cetpa_training_ws"
SCHEMA_NAME = "training"
VOLUME_NAME = "training_data"

# Get current user
username = spark.sql("SELECT current_user()").collect()[0][0]

# Create catalog if it doesn't exist
try:
    spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG_NAME}")
    print(f"✓ Catalog '{CATALOG_NAME}' ready")
except Exception as e:
    print(f"Note: {e}")

# Use the catalog
spark.sql(f"USE CATALOG {CATALOG_NAME}")

# Create schema if it doesn't exist
try:
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}")
    print(f"✓ Schema '{CATALOG_NAME}.{SCHEMA_NAME}' ready")
except Exception as e:
    print(f"Note: {e}")

# Use the schema
spark.sql(f"USE SCHEMA {SCHEMA_NAME}")

# Create volume if it doesn't exist
try:
    spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}.{VOLUME_NAME}")
    print(f"✓ Volume '{CATALOG_NAME}.{SCHEMA_NAME}.{VOLUME_NAME}' ready")
except Exception as e:
    print(f"Note: {e}")

# Define volume paths
VOLUME_BASE_PATH = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/{VOLUME_NAME}"
WORKING_DIR = f"{VOLUME_BASE_PATH}/working"
DATASETS_DIR = f"{VOLUME_BASE_PATH}/datasets"
ARXIV_DIR = f"{DATASETS_DIR}/arxiv"

# Create directory structure in volume
dbutils.fs.mkdirs(WORKING_DIR)
dbutils.fs.mkdirs(DATASETS_DIR)
dbutils.fs.mkdirs(ARXIV_DIR)
dbutils.fs.mkdirs(f"{ARXIV_DIR}/arxiv-articles")

print(f"\n✓ Directory structure created in volume")
print(f"\nConfiguration:")
print(f"  Catalog: {CATALOG_NAME}")
print(f"  Schema: {SCHEMA_NAME}")
print(f"  Volume: {VOLUME_NAME}")
print(f"  Base Path: {VOLUME_BASE_PATH}")
print(f"  Working Directory: {WORKING_DIR}")
print(f"  Datasets Directory: {DATASETS_DIR}")

In [0]:
# Create a simple DA-like object for compatibility with existing notebooks
class TrainingConfig:
    def __init__(self):
        self.username = username
        self.catalog_name = CATALOG_NAME
        self.schema_name = SCHEMA_NAME
        
        # Create paths object for compatibility
        class Paths:
            def __init__(self):
                self.working_dir = WORKING_DIR
                self.datasets = DATASETS_DIR
                
                # Nested paths for datasets
                class DatasetPaths:
                    def __init__(self):
                        self.arxiv = ARXIV_DIR
                
                self.arxiv = ARXIV_DIR
        
        self.paths = Paths()
    
    def unique_name(self, sep: str) -> str:
        # Generate a unique name based on username
        return username.replace('@', sep).replace('.', sep).replace('-', sep)

# Initialize DA object
DA = TrainingConfig()

print("\n✓ Training configuration initialized")
print("\nThe examples and models presented in this course are intended solely for demonstration and educational purposes.\nPlease note that the models and prompt examples may sometimes contain offensive, inaccurate, biased, or harmful content.")