## Validando a SparkSession

In [0]:
spark

## Conectando Azure ADLS Gen2 no Databricks

### Mostrando os pontos de montagem no cluster Databricks

In [0]:
display(dbutils.fs.mounts())

mountPoint,source,encryptionType
/databricks-datasets,databricks-datasets,
/mnt/datalakefbca7dc4e981b9cb/landing-zone,wasbs://landing-zone@datalakefbca7dc4e981b9cb.blob.core.windows.net,
/databricks/mlflow-tracking,databricks/mlflow-tracking,sse-s3
/databricks-results,databricks-results,sse-s3
/mnt/datalakefbca7dc4e981b9cb/gold,wasbs://gold@datalakefbca7dc4e981b9cb.blob.core.windows.net,
/databricks/mlflow-registry,databricks/mlflow-registry,sse-s3
/mnt/datalake7eadf73a479de9f7/landing-zone,wasbs://landing-zone@datalake7eadf73a479de9f7.blob.core.windows.net,
/mnt/datalakefbca7dc4e981b9cb/silver,wasbs://silver@datalakefbca7dc4e981b9cb.blob.core.windows.net,
/mnt/datalakec8dc20f3798c1da9/bronze,wasbs://bronze@datalakec8dc20f3798c1da9.blob.core.windows.net,
/mnt/datalakec8dc20f3798c1da9/landing-zone,wasbs://landing-zone@datalakec8dc20f3798c1da9.blob.core.windows.net,


### Definindo função para montar ADLS com SAS token

In [0]:
storageAccountName = "datalake7eadf73a479de9f7"
sasToken = "sv=2024-11-04&ss=bfqt&srt=sco&sp=rwdlacupyx&se=2025-06-16T03:33:35Z&st=2025-06-15T19:33:35Z&spr=https&sig=fBJfCWK99G%2FGTBY81Y8eNQYEpOxgrzjVSaavlAB0np4%3D"

def mount_adls(blobContainerName):
    try:
        dbutils.fs.mount(
            source = "wasbs://{}@{}.blob.core.windows.net".format(blobContainerName, storageAccountName),
            mount_point = f"/mnt/{storageAccountName}/{blobContainerName}",
            extra_configs = {'fs.azure.sas.' + blobContainerName + '.' + storageAccountName + '.blob.core.windows.net': sasToken}
        )
        print("OK!")
    except Exception as e:
        print("Falha", e)

### Montando containers

In [0]:
mount_adls('landing-zone')
mount_adls('bronze')

Falha An error occurred while calling o1893.mount.
: java.rmi.RemoteException: java.lang.IllegalArgumentException: requirement failed: Directory already mounted: /mnt/datalake7eadf73a479de9f7/landing-zone; nested exception is: 
	java.lang.IllegalArgumentException: requirement failed: Directory already mounted: /mnt/datalake7eadf73a479de9f7/landing-zone
	at com.databricks.backend.daemon.data.client.DbfsClient.send0(DbfsClient.scala:135)
	at com.databricks.backend.daemon.data.client.DbfsClient.sendIdempotent(DbfsClient.scala:69)
	at com.databricks.backend.daemon.dbutils.DBUtilsCore.createOrUpdateMount(DBUtilsCore.scala:1053)
	at com.databricks.backend.daemon.dbutils.DBUtilsCore.$anonfun$mount$1(DBUtilsCore.scala:1079)
	at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:560)
	at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:657)
	at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(Usa

### Processando todos os CSVs da landing-zone

In [0]:
def process_all_csvs():
    landing_zone_path = f"/mnt/{storageAccountName}/landing-zone"
    bronze_path = f"/mnt/{storageAccountName}/bronze"
    
    files = dbutils.fs.ls(landing_zone_path)
    csv_files = [file for file in files if file.name.lower().endswith('.csv')]
    
    print(f"Encontrados {len(csv_files)} arquivos CSV")
    
    dataframes = {}
    
    for file in csv_files:
        try:
            table_name = file.name.replace('.csv', '').lower()
            print(f"Processando: {file.name} -> {table_name}")
            
            df = spark.read.option("inferSchema", "true").option("header", "true").csv(file.path)
            df.write.format('delta').mode('overwrite').save(f"{bronze_path}/{table_name}")
            dataframes[table_name] = df
            
        except Exception as e:
            print(f"Erro em {file.name}: {str(e)}")
    
    return dataframes

In [0]:
dataframes_processados = process_all_csvs()

Encontrados 14 arquivos CSV
Processando: achievement_unlocked.csv -> achievement_unlocked
Processando: achievements.csv -> achievements
Processando: developers.csv -> developers
Processando: dlcs.csv -> dlcs
Processando: game_genders.csv -> game_genders
Processando: game_platforms.csv -> game_platforms
Processando: game_tags.csv -> game_tags
Processando: games.csv -> games
Processando: genders.csv -> genders
Processando: platforms.csv -> platforms
Processando: purchases.csv -> purchases
Processando: reviews.csv -> reviews
Processando: tags.csv -> tags
Processando: users.csv -> users


### Criando tabelas Delta externas

In [0]:
def create_external_tables():
    bronze_path = f"/mnt/{storageAccountName}/bronze"
    
    try:
        bronze_dirs = dbutils.fs.ls(bronze_path)
        
        for dir_info in bronze_dirs:
            if dir_info.isDir():
                table_name = dir_info.name.rstrip('/')
                
                spark.sql(f"DROP TABLE IF EXISTS {table_name}")
                spark.sql(f"CREATE TABLE {table_name} USING DELTA LOCATION '{dir_info.path}'")
                print(f"Tabela externa: {table_name}")
                    
    except Exception as e:
        print(f"Erro: {str(e)}")

In [0]:
create_external_tables()

Tabela externa: achievement_unlocked
Tabela externa: achievements
Tabela externa: developers
Tabela externa: dlcs
Tabela externa: game_genders
Tabela externa: game_platforms
Tabela externa: game_tags
Tabela externa: games
Tabela externa: genders
Tabela externa: platforms
Tabela externa: purchases
Tabela externa: reviews
Tabela externa: tags
Tabela externa: users


### Criando tabelas Delta gerenciadas

In [0]:
def create_managed_tables():
    database_name = "pipeline"
    spark.sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
    
    bronze_path = f"/mnt/{storageAccountName}/bronze"
    
    try:
        bronze_dirs = dbutils.fs.ls(bronze_path)
        
        for dir_info in bronze_dirs:
            if dir_info.isDir():
                table_name = dir_info.name.rstrip('/')
                
                df = spark.read.format('delta').load(dir_info.path)
                df.write.format('delta').mode('overwrite').saveAsTable(f"{database_name}.{table_name}")
                print(f"Tabela gerenciada: {database_name}.{table_name}")
                    
    except Exception as e:
        print(f"Erro: {str(e)}")

In [0]:
create_managed_tables()

Tabela gerenciada: pipeline.achievement_unlocked
Tabela gerenciada: pipeline.achievements
Tabela gerenciada: pipeline.developers
Tabela gerenciada: pipeline.dlcs
Tabela gerenciada: pipeline.game_genders
Tabela gerenciada: pipeline.game_platforms
Tabela gerenciada: pipeline.game_tags
Tabela gerenciada: pipeline.games
Tabela gerenciada: pipeline.genders
Tabela gerenciada: pipeline.platforms
Tabela gerenciada: pipeline.purchases
Tabela gerenciada: pipeline.reviews
Tabela gerenciada: pipeline.tags
Tabela gerenciada: pipeline.users


### Validando tabelas criadas

In [0]:
print("=== TABELAS GERENCIADAS ===")
spark.sql("SHOW TABLES IN pipeline").show()

=== TABELAS GERENCIADAS ===
+--------+--------------------+-----------+
|database|           tableName|isTemporary|
+--------+--------------------+-----------+
|pipeline|achievement_unlocked|      false|
|pipeline|        achievements|      false|
|pipeline|          developers|      false|
|pipeline|                dlcs|      false|
|pipeline|        game_genders|      false|
|pipeline|      game_platforms|      false|
|pipeline|           game_tags|      false|
|pipeline|               games|      false|
|pipeline|             genders|      false|
|pipeline|           platforms|      false|
|pipeline|           purchases|      false|
|pipeline|             reviews|      false|
|pipeline|                tags|      false|
|pipeline|               users|      false|
+--------+--------------------+-----------+



In [0]:
display(dbutils.fs.ls(f"/mnt/{storageAccountName}/bronze"))

path,name,size,modificationTime
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/achievement_unlocked/,achievement_unlocked/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/achievements/,achievements/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/developers/,developers/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/dlcs/,dlcs/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/game_genders/,game_genders/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/game_platforms/,game_platforms/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/game_tags/,game_tags/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/games/,games/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/genders/,genders/,0,0
dbfs:/mnt/datalake7eadf73a479de9f7/bronze/platforms/,platforms/,0,0
