In [0]:
# =============================================================================
# Simple Oracle to Databricks Table Replicator
# =============================================================================

# Oracle connection configuration
oracle_config = {
    "jdbc_url": "jdbc:oracle:thin:@//pr03db-scan.vs.csin.cz:1521/DWHP",
    "user": "ext98174",
    "password": "Cervenec2025**",
    "driver": "oracle.jdbc.driver.OracleDriver"
}

def replicate_oracle_table(ora_schema, ora_table, dbx_schema, include_data=False, overwrite_existing=False):
    """
    Replicate Oracle table structure and optionally data to Databricks
    
    Args:
        ora_schema: Oracle schema name (e.g., 'DWH_OWNER')
        ora_table: Oracle table name (e.g., 'USER_CONFIG_PARAMETERS')
        dbx_schema: Databricks schema name (e.g., 'dwh_owner')
        include_data: Copy data from Oracle (default: False)
        overwrite_existing: Drop and recreate if table exists (default: False)
    """
    
    target_catalog = "gap_catalog"
    target_table = f"{target_catalog}.{dbx_schema}.{ora_table.lower()}"
    
    print(f"=== Replicating {ora_schema}.{ora_table} → {target_table} ===")
    
    # Create schema if it doesn't exist
    try:
        spark.sql(f"CREATE SCHEMA IF NOT EXISTS {target_catalog}.{dbx_schema}")
        print(f"✓ Schema {target_catalog}.{dbx_schema} ready")
    except Exception as e:
        print(f"⚠️  Schema creation warning: {e}")
    
    # Check if table exists
    table_exists = False
    try:
        spark.sql(f"DESCRIBE TABLE {target_table}")
        table_exists = True
    except:
        pass
    
    if table_exists:
        if overwrite_existing:
            print(f"🗑️  Dropping existing table {target_table}...")
            spark.sql(f"DROP TABLE {target_table}")
            table_exists = False
        else:
            print(f"⚠️  WARNING: Table {target_table} already exists!")
            if not include_data:
                print("   Use overwrite_existing=True to recreate, or include_data=True to load data")
                return
    
    # Create table structure if needed
    if not table_exists:
        print(f"📊 Getting structure for {ora_schema}.{ora_table}...")
        
        # Get column information from Oracle
        column_query = f"""
        (
            SELECT 
                column_name,
                data_type,
                data_length,
                data_precision,
                data_scale,
                nullable,
                column_id
            FROM all_tab_columns 
            WHERE owner = '{ora_schema.upper()}' 
              AND table_name = '{ora_table.upper()}'
            ORDER BY column_id
        ) oracle_columns
        """
        
        try:
            columns_df = spark.read \
                .format("jdbc") \
                .option("url", oracle_config["jdbc_url"]) \
                .option("dbtable", column_query) \
                .option("user", oracle_config["user"]) \
                .option("password", oracle_config["password"]) \
                .option("driver", oracle_config["driver"]) \
                .load()
            
            columns_list = columns_df.collect()
            
            if not columns_list:
                print(f"❌ ERROR: Table {ora_schema}.{ora_table} not found or no access!")
                return
            
            print(f"✓ Found {len(columns_list)} columns")
            
        except Exception as e:
            print(f"❌ ERROR getting Oracle table structure: {e}")
            return
        
        # Convert Oracle data types to Spark data types
        def oracle_to_spark_type(ora_type, data_length, data_precision, data_scale):
            ora_type = ora_type.upper()
            
            if ora_type == 'NUMBER':
                if data_precision and data_scale is not None:
                    if data_scale == 0:
                        # Integer types
                        if data_precision <= 9:
                            return 'INT'
                        elif data_precision <= 18:
                            return 'BIGINT'
                        else:
                            return f'DECIMAL({data_precision},0)'
                    else:
                        # Decimal types
                        return f'DECIMAL({data_precision},{data_scale})'
                else:
                    # NUMBER without precision - common for IDs/keys, use BIGINT
                    return 'BIGINT'
            
            type_mapping = {
                'VARCHAR2': 'STRING',
                'CHAR': 'STRING',
                'NCHAR': 'STRING', 
                'NVARCHAR2': 'STRING',
                'CLOB': 'STRING',
                'DATE': 'TIMESTAMP',
                'TIMESTAMP': 'TIMESTAMP',
                'BLOB': 'BINARY',
                'RAW': 'BINARY'
            }
            
            return type_mapping.get(ora_type, 'STRING')
        
        # Build CREATE TABLE statement
        print("🔧 Building Databricks table structure...")
        column_definitions = []
        
        for col in columns_list:
            col_name = col['COLUMN_NAME'].lower()
            ora_type = col['DATA_TYPE']
            data_length = col['DATA_LENGTH']
            data_precision = col['DATA_PRECISION']
            data_scale = col['DATA_SCALE']
            nullable = col['NULLABLE'] == 'Y'
            
            spark_type = oracle_to_spark_type(ora_type, data_length, data_precision, data_scale)
            null_constraint = "" if nullable else " NOT NULL"
            col_def = f"    {col_name} {spark_type}{null_constraint}"
            column_definitions.append(col_def)
            
            print(f"  {col_name}: {ora_type} → {spark_type}{null_constraint}")
        
        # Create the table
        columns_text = ',\n'.join(column_definitions)
        create_table_sql = f"""
        CREATE TABLE {target_table} (
{columns_text}
        )
        USING DELTA
        COMMENT 'Replicated from Oracle {ora_schema}.{ora_table}'
        """
        
        try:
            spark.sql(create_table_sql)
            print(f"✅ SUCCESS: Table {target_table} created!")
        except Exception as e:
            print(f"❌ ERROR creating table: {e}")
            return
    
    # Copy data if requested
    if include_data:
        print(f"📊 Copying data from Oracle...")
        
        try:
            # Read data from Oracle
            data_df = spark.read \
                .format("jdbc") \
                .option("url", oracle_config["jdbc_url"]) \
                .option("dbtable", f"{ora_schema}.{ora_table}") \
                .option("user", oracle_config["user"]) \
                .option("password", oracle_config["password"]) \
                .option("driver", oracle_config["driver"]) \
                .load()
            
            row_count = data_df.count()
            print(f"✓ Found {row_count} rows to copy")
            
            if row_count > 0:
                # Fix case sensitivity - convert column names to lowercase
                print("🔧 Converting column names to lowercase...")
                for col_name in data_df.columns:
                    data_df = data_df.withColumnRenamed(col_name, col_name.lower())
                
                # For existing tables, clear data first to avoid schema conflicts
                if table_exists and not overwrite_existing:
                    print("🧹 Clearing existing table data...")
                    spark.sql(f"DELETE FROM {target_table}")
                
                # Write data to Databricks table  
                if table_exists and not overwrite_existing:
                    # Use insertInto for existing tables
                    data_df.write \
                        .format("delta") \
                        .mode("append") \
                        .insertInto(target_table)
                else:
                    # Use saveAsTable with explicit overwrite for new/overwritten tables
                    data_df.write \
                        .format("delta") \
                        .mode("overwrite") \
                        .option("overwriteSchema", "true") \
                        .saveAsTable(target_table)
                
                print(f"✅ SUCCESS: Loaded {row_count} rows into {target_table}")
                
                # Show sample data
                print("📋 Sample data:")
                spark.sql(f"SELECT * FROM {target_table} LIMIT 3").show()
                
                # Show final row count
                final_count = spark.sql(f"SELECT COUNT(*) as cnt FROM {target_table}").collect()[0][0]
                print(f"📊 Final table contains {final_count} rows")
                
            else:
                print("ℹ️  No data to copy (empty table)")
                
        except Exception as e:
            print(f"❌ ERROR copying data: {e}")
            print("💡 Try using overwrite_existing=True to recreate the table")

# 1. Create table structure only:'
# replicate_oracle_table("DWH_OWNER", "USER_CONFIG_PARAMETERS", "dwh_owner")')

# 2. Create table with data:'
replicate_oracle_table("ADS_OWNER", "EVENT_STATUS", "ads_owner", include_data=True, overwrite_existing=True)

#replicate_oracle_table("ADS_ETL_OWNER", "DLK_ADS_LOV_RDS_ANALYTICALEVENTSTATUS", "ads_etl_owner", include_data=True, overwrite_existing=False)


