# Tabela Posição Estoque Work

## Imports

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

## Análise Inicial

In [0]:
posicao_estoque = spark.sql("SELECT * FROM raw.posicao_estoque_raw LIMIT 100")
posicao_estoque.display()

DATA,CMV,ESTOQUE_VALOR,ESTOQUE_QTDE,VENDA_VALOR,VENDA_QTDE,MARGEM_FINAL,VENDA_LIQUIDA_FINAL,PRODUTO,LOJA
2023-03-20,1534,0,0,3699,10,1425,2959,3102314,20
2023-03-20,844,1689,20,1999,10,97,1814,3137689,28
2023-03-20,0,1679,20,0,0,0,0,1027257,6
2023-03-20,0,1016,20,0,0,0,0,1027257,89
2023-03-20,0,1348,20,0,0,0,0,1027257,16
2023-03-20,0,1679,20,0,0,0,0,1027257,42
2023-03-20,0,0,0,0,0,0,0,3141816,18
2023-03-20,0,1016,20,0,0,0,0,1027257,48
2023-03-20,0,0,0,0,0,0,0,3141816,45
2023-03-20,0,0,0,0,0,0,0,3141816,60


## Casting

In [0]:
posicao_estoque = posicao_estoque.withColumn('DATA', to_date('DATA', 'yyyy-MM-dd')) \
    .withColumn('CMV', regexp_replace('CMV', ',', '.').cast(DoubleType())) \
    .withColumn('ESTOQUE_VALOR', regexp_replace('ESTOQUE_VALOR', ',', '.').cast(DoubleType())) \
    .withColumn('ESTOQUE_QTDE', regexp_replace('ESTOQUE_QTDE', ',', '.').cast(IntegerType())) \
    .withColumn('VENDA_VALOR', regexp_replace('VENDA_VALOR', ',', '.').cast(DoubleType())) \
    .withColumn('VENDA_QTDE', regexp_replace('VENDA_QTDE', ',', '.').cast(IntegerType())) \
    .withColumn('MARGEM_FINAL', regexp_replace('MARGEM_FINAL', ',', '.').cast(DoubleType())) \
    .withColumn('VENDA_LIQUIDA_FINAL', regexp_replace('VENDA_LIQUIDA_FINAL', ',', '.').cast(DoubleType())) \
    .withColumn('PRODUTO', regexp_replace('PRODUTO', ',', '.').cast(IntegerType())) \
    .withColumn('LOJA', regexp_replace('LOJA', ',', '.').cast(IntegerType()))

## Renomeando e Organizando as Colunas

In [0]:
posicao_estoque = posicao_estoque.withColumnRenamed('DATA', 'data_extracao') \
    .withColumnRenamed('CMV', 'custo_mercadoria_vendida') \
    .withColumnRenamed('ESTOQUE_VALOR', 'estoque_valor') \
    .withColumnRenamed('ESTOQUE_QTDE', 'estoque_quantidade') \
    .withColumnRenamed('VENDA_VALOR', 'venda_total') \
    .withColumnRenamed('VENDA_QTDE', 'venda_quantidade') \
    .withColumnRenamed('MARGEM_FINAL', 'margem') \
    .withColumnRenamed('VENDA_LIQUIDA_FINAL', 'venda_liquida') \
    .withColumnRenamed('PRODUTO', 'cod_produto') \
    .withColumnRenamed('LOJA', 'cod_loja')

In [0]:
posicao_estoque = posicao_estoque[['data_extracao',
 'cod_loja',
 'cod_produto',
 'custo_mercadoria_vendida',
 'estoque_valor',
 'estoque_quantidade',
 'venda_total',
 'venda_quantidade',
 'margem',
 'venda_liquida']]

In [0]:
posicao_estoque.display()

data_extracao,cod_loja,cod_produto,custo_mercadoria_vendida,estoque_valor,estoque_quantidade,venda_total,venda_quantidade,margem,venda_liquida
2023-03-20,20,3102314,15.34,0.0,0,36.99,1,14.25,29.59
2023-03-20,28,3137689,8.44,16.89,2,19.99,1,9.7,18.14
2023-03-20,6,1027257,0.0,16.79,2,0.0,0,0.0,0.0
2023-03-20,89,1027257,0.0,10.16,2,0.0,0,0.0,0.0
2023-03-20,16,1027257,0.0,13.48,2,0.0,0,0.0,0.0
2023-03-20,42,1027257,0.0,16.79,2,0.0,0,0.0,0.0
2023-03-20,18,3141816,0.0,0.0,0,0.0,0,0.0,0.0
2023-03-20,48,1027257,0.0,10.16,2,0.0,0,0.0,0.0
2023-03-20,45,3141816,0.0,0.0,0,0.0,0,0.0,0.0
2023-03-20,60,3141816,0.0,0.0,0,0.0,0,0.0,0.0


## Removendo linhas duplicadas

In [0]:
posicao_estoque = posicao_estoque.coalesce(1).dropDuplicates()

## Salvando Dados

In [0]:
posicao_estoque.write \
    .partitionBy(['data_extracao', 'cod_loja', 'cod_produto']) \
    .mode('overwrite') \
    .saveAsTable('dev.posicao_estoque_main_dev')

## Otimizando

In [0]:
%sql
OPTIMIZE dev.posicao_estoque_main_dev

path,metrics
dbfs:/mnt/dev/posicao_estoque_main_dev,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 100, null, 0, 100, 100, true, 0, 0, 1679945246112, 1679945248585, 8, 0, null)"
