In [0]:
container_name = "adf"
storage_account = "storagedatalakehouse"
key = dbutils.secrets.get(scope="senhas-data-lake-house", key="storage-lakehouse")

dbutils.fs.mount(
  source = f"wasbs://{container_name}@{storage_account}.blob.core.windows.net",
  mount_point = "/mnt/adfraw",
  extra_configs = {f"fs.azure.account.key.{storage_account}.blob.core.windows.net":key})

Out[1]: True

In [0]:
%sql
SHOW SCHEMAS

databaseName
dataquality
default
dev
raw
refined
sensitive
work


In [0]:
from datetime import datetime

data = str(datetime.now().date())

print(data)

2023-03-30


In [0]:
%sql
DROP DATABASE bronze cascade

In [0]:
display(dbutils.fs.mounts())

mountPoint,source,encryptionType
/mnt/adfraw,wasbs://adfraw@storagedatalakehouse.blob.core.windows.net,
/mnt/iotdata,wasbs://lakehouse2@storagedatalakehouse.blob.core.windows.net,
/databricks-datasets,databricks-datasets,
/mnt/batch,wasbs://batch@storagedatalakehouse.blob.core.windows.net,
/databricks/mlflow-tracking,databricks/mlflow-tracking,
/databricks-results,databricks-results,
/mnt/raw,wasbs://lakehouse@storagedatalakehouse.blob.core.windows.net/raw,
/databricks/mlflow-registry,databricks/mlflow-registry,
/mnt/adf/posicao_estoque,wasbs://adf@storagedatalakehouse.blob.core.windows.net/posicao_estoque,
/,DatabricksRoot,


In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
posicao_estoque = posicao_estoque.withColumn('DATA', to_date(col('DATA'), 'yyyy-MM-dd')) \
    .withColumn('CMV', regexp_replace(col('CMV'), ',', '.').cast(DoubleType()))

In [0]:
%sql
CREATE DATABASE bronze

In [0]:
posicao_estoque.write \
    .partitionBy(['data_extracao', 'cod_loja', 'cod_produto']) \
    .mode("overwrite") \
    .saveAsTable("bronze.posicao_estoque_teste")

In [0]:
%sql
OPTIMIZE bronze.posicao_estoque_teste

path,metrics
dbfs:/user/hive/warehouse/bronze.db/posicao_estoque_teste,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 1000, null, 0, 1000, 1000, true, 0, 0, 1679937841218, 1679937844882, 8, 0, null)"


In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
und_produto = spark.sql("SELECT DISTINCT Material, UndDoItem FROM raw.pedidos_loja_main_raw")

Out[66]: 2110

In [0]:
und_produto.join(und_produto.groupBy('Material')\
          .count().where('count = 1'),
        on=['Material'],
        how='left_anti').display()

Material,UndDoItem
3141479,3000
3141479,6000
3110215,1000
3110215,6000
3140890,2000
3140890,1000
1017344,1000
1017344,6000
3128965,1000
3128965,4000


In [0]:
produtos_und = produtos.join(und_produto, produtos['produto'] == und_produto['cod_produto'], how='left')

In [0]:
%sql
DROP TABLE dev.dados_externos_renda_nominal_work

In [0]:
%sql
DESCRIBE HISTORY dev.dados_externos_renda_nominal_dev

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,2023-03-28T04:59:26.000+0000,3431736339542622,contato@rodolfomoreira.com.br,CREATE OR REPLACE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [""cod_loja""], properties -> {})",,List(326164700055505),0327-144450-lmwdjngs,,WriteSerializable,False,"Map(numFiles -> 100, numOutputRows -> 100, numOutputBytes -> 270456)",,Databricks-Runtime/11.3.x-scala2.12


In [0]:
%sql
RESTORE TABLE dev.dados_externos_renda_nominal_dev TO VERSION AS OF 0;

In [0]:
dados_externos_renda_nominal_work.write \
    .option("path", "/mnt/dev/dados_externos_renda_nominal_dev") \
    .partitionBy(['cod_loja']) \
    .mode('overwrite') \
    .saveAsTable('dev.dados_externos_renda_nominal_dev')

In [0]:
dbutils.fs.mounts()

Out[2]: [MountInfo(mountPoint='/mnt/adfraw', source='wasbs://adfraw@storagedatalakehouse.blob.core.windows.net', encryptionType=''),
 MountInfo(mountPoint='/mnt/iotdata', source='wasbs://lakehouse2@storagedatalakehouse.blob.core.windows.net', encryptionType=''),
 MountInfo(mountPoint='/databricks-datasets', source='databricks-datasets', encryptionType=''),
 MountInfo(mountPoint='/mnt/work', source='wasbs://lakehouse@storagedatalakehouse.blob.core.windows.net/work', encryptionType=''),
 MountInfo(mountPoint='/mnt/dev', source='wasbs://lakehouse@storagedatalakehouse.blob.core.windows.net/dev', encryptionType=''),
 MountInfo(mountPoint='/mnt/batch', source='wasbs://batch@storagedatalakehouse.blob.core.windows.net', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-tracking', source='databricks/mlflow-tracking', encryptionType=''),
 MountInfo(mountPoint='/databricks-results', source='databricks-results', encryptionType=''),
 MountInfo(mountPoint='/mnt/raw', source='wasbs://lake

In [0]:
%sql
REVOKE ALL PRIVILEGES ON SCHEMA dev FROM `rodolfo.moreira@rodolfomoreira.com.br`

In [0]:
%sql
GRANT SELECT ON SCHEMA work TO `rodolfo.moreira@rodolfomoreira.com.br`

In [0]:
%sql
GRANT USAGE ON DATABASE work TO `rodolfo.moreira@rodolfomoreira.com.br`

In [0]:
%sql
SELECT
  data_extracao,
  cod_loja,
  cod_produto,
  venda_quantidade,
  custo_mercadoria_vendida,
  ROUND((custo_mercadoria_vendida / venda_quantidade),2) as custo_mercadoria_vendida_unitario,
  venda_total,
  ROUND((venda_total / venda_quantidade),2) as venda_unitaria,
  margem,
  ROUND((margem / venda_quantidade),2) as margem_unitaria
FROM work.posicao_estoque_work
WHERE
  custo_mercadoria_vendida > 0
AND
  venda_quantidade > 1
LIMIT 100

data_extracao,cod_loja,cod_produto,venda_quantidade,custo_mercadoria_vendida,custo_mercadoria_vendida_unitario,venda_total,venda_unitaria,margem,margem_unitaria
2023-03-29,94,3116320,2,5.37,2.69,9.98,4.99,3.91,1.96
2023-03-29,61,3109717,2,46.42,23.21,119.98,59.99,40.86,20.43
2023-03-29,15,3129884,12,99.99,8.33,125.88,10.49,14.24,1.19
2023-03-29,84,3128676,2,27.59,13.8,71.98,35.99,25.49,12.75
2023-03-29,79,2987764,2,6.28,3.14,9.98,4.99,1.08,0.54
2023-03-29,14,3129884,49,295.1,6.02,495.51,10.11,24.09,0.49
2023-03-29,57,3105851,2,20.54,10.27,33.58,16.79,3.88,1.94
2023-03-29,35,3129240,15,36.63,2.44,51.61,3.44,10.22,0.68
2023-03-29,106,2987764,6,21.27,3.55,24.95,4.16,1.37,0.23
2023-03-29,23,3129884,31,181.79,5.86,304.99,9.84,-15.45,-0.5


In [0]:
@dlt.expect("valid timestamp", "col(“timestamp”) > '2023-01-01'")

[0;36m  File [0;32m"<command-585402679406961>"[0;36m, line [0;32m1[0m
[0;31m    @dlt.expect("valid timestamp", "col(“timestamp”) > '2023-01-01'")[0m
[0m                                                                     ^[0m
[0;31mSyntaxError[0m[0;31m:[0m unexpected EOF while parsing
