In [1]:
# Cell 1: Load extensions and import libraries
%load_ext autoreload
%autoreload 2

import os
from pyspark.dbutils import DBUtils # For Databricks utilities

In [3]:
# Cell 2: Configure database connection using Databricks secrets
secret_scope = "gustavo_lima_adw"

# Fetch credentials and connection details from secrets
mssql_host = dbutils.secrets.get(secret_scope, "mssql_host")
mssql_port = dbutils.secrets.get(secret_scope, "mssql_port")
mssql_database = dbutils.secrets.get(secret_scope, "mssql_database")
username = dbutils.secrets.get(secret_scope, "username")
password = dbutils.secrets.get(secret_scope, "password")
schema_source = "Sales"

# Define JDBC connection properties
connection_properties = {
    "user": username,
    "password": password,
    "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
    "encrypt": 'true',
    "trustServerCertificate": 'true'
}

# Build JDBC URL
jdbc_url = f"jdbc:sqlserver://{mssql_host}:{mssql_port};databaseName={mssql_database};encrypt=true;trustServerCertificate=true"

In [None]:
# Cell 3: Fetch list of tables from the source schema
tables_df = spark.read.jdbc(
    url=jdbc_url,
    table=f"(SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema_source}' AND table_type = 'BASE TABLE') AS tables",
    properties=connection_properties
)

tables = [row.table_name for row in tables_df.collect()]
print(f"Tables found in schema '{schema_source}': {tables}")

In [None]:
# Cell 4: Extract and save tables to RAW catalog
import time

def table_exists(table_name):
    return spark.catalog.tableExists(f"gustavo_lima_raw.schema.{table_name.lower()}")

def save_table_with_retry(df, target_table, retries=3, delay=5):
    for attempt in range(retries):
        try:
            df.write.format("delta") \
                .mode("overwrite") \
                .saveAsTable(target_table)
            print(f"Table '{target_table}' saved to RAW catalog.")
            break
        except Exception as e:
            if attempt < retries - 1:
                print(f"Error saving table '{target_table}'. Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print(f"Failed to save table '{target_table}' after {retries} attempts.")
                raise e

# Extract and save each table
for table in tables:
    print(f"Extraindo {table}...")  
    df = spark.read.format("jdbc") \
        .option("url", jdbc_url) \
        .option("dbtable", f"{schema_source}.{table}") \
        .option("user", username) \
        .option("password", password) \
        .load()    
  
    target_table = f"gustavo_lima_raw.schema.{table.lower()}"
    if table_exists(table.lower()):
        print(f"A tabela '{target_table}' already exists. Overwriting...")
    
    save_table_with_retry(df, target_table)
