# **Fluxo módulo - Metodologia Pessoa Física**

O objetivo deste notebook é:
1. Executar o fluxo completo do módulo.

    Autores: Rodrigo Henrique


## **Preparação de Variáveis e Inicialização da Sessão Spark**

In [None]:
###### ###### IMPORTACAO
###############################################################################
import Spark, Kinit
import os
from dotenv import load_dotenv

from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

###############################################################################
###### CONFIGURACOES PARAMETROS PARA INICIAR SESSAO SPARK
###############################################################################
HDP_VERSION = 3  # type: int
SESSION_NAME = "metodologia_Pessoa_Fisica"  # type: str
LANGUAGE = "python"

###############################################################################
###### CONSTANTE QUE SERVE PARA DEFINIR PARAMETROS A SEREM UTILIZADOS CONFORME 
###### AMBIENTE (MODELAGEM OU SCORAGE/PRODUCAO) NO QUAL O JOB SERA EXECUTADO 
###############################################################################
AMBIENTE = os.environ["AMBIENTE"] 

if AMBIENTE == "MODELAGEM":
    load_dotenv("env.env", override=True)
    !kdestroy
    TABELA_ENTRADA = "CRD_ENTRADA"
    TABELA_SAIDA = "CRD_SAIDA"
    CHECKPOINT_PATH = '/checkpoints_modelo_53"
    INPUT_PATH = "/dados_batch_ancpf"
    ARQ_PATH = "/Lote_PF"
    URL = os.environ["URL"] 
    JCN = os.environ["JCN"] 
    print("Credenciais de Desenvolvimento Carregadas")
else:
    load_dotenv()
    TABELA_ENTRADA = "HIVE.CRD_ENTRADA"
    TABELA_SAIDA = "HIVE.CRD_SAIDA"
    USER = os.environ["KEYTAB"]
    CHECKPOINT_PATH = '/checkpoints_modelo_53"
    INPUT_PATH = "/dados_batch_ancpf"
    ARQ_PATH = "/Lote_PF"
    URL = os.environ["URL"] 
    JCN = os.environ["JCN"] 
    print("Credenciais de Produção Carregadas")
    
ENV_VAR = {
    'TABELA_ENTRADA': TABELA_ENTRADA,
    'TABELA_SAIDA': TABELA_SAIDA,
    'AMBIENTE': AMBIENTE,
    'DB_USER': os.environ['DB_USER'],
    'DB_PASSWORD': os.environ['DB_PASSWORD'],
    'DB_HOST': os.environ['DB_HOST'],
    'DB_PORTA': os.environ['DB_PORTA'],
    'DB_DATABASE': os.environ['DB_DATABASE'],
    'DB_DRIVER': os.environ['DRIVER'],
    'CHECKPOINT_PATH': CHECKPOINT_PATH,
    'INPUT_PATH': INPUT_PATH,
    'ARQ_PATH': ARQ_PATH,
    'URL': URL,
    'JCN': JCN,
}

###############################################################################
###### CRIACAO DA SESSAO SPARK 
###############################################################################
spark = Spark(
    session_name=SESSION_NAME,
    driver_memory = "16g",
    executor_memory = "32g",
    driver_cores = 8,
    executor_cores = 5,
    username=USER,
    language = LANGUAGE,
    hdp=3,
    db=True,
    env=ENV_VAR,
    spark_conf = {
    "spark.sql.shuffle.partitions": "20",
    "spark.memory.fraction": "0.30",
    "spark.executor.memoryoverhead": "16g",
    "spark.sql.sources.partitionOverwriteMode": "dynamic",
    "spark.sql.hive.caseSensitiveInferenceMode": "NEVER_INFER",
    "spark.shuffle.service.enabled": "true",
    "spark.shuffle.service.port": 7447, 
    "spark.shuffle.service.name": "spark2_shuffle", 
    "spark.shuffle.useOldFetchProtocol": "true",
    "spark.shuffle.sort.bypassMergeThreshold": "40",
    "spark.sql.autoBroadcastJoinThreshold": "128",
    "spark.sql.parquet.filterPushdown": "true",
    "spark.sql.broadcastTimeout": "120s",
    "spark.sql.ui.retainedExecutions": "10",
    "spark.cleaner.referenceTracking.cleanCheckpoints": "true",
    "spark.executor.extraJavaOptions": "-XX:ReservedCodeCacheSize=2g -XX:+UseG1GC",
    "spark.sql.optimizer.excludedRules": "org.apache.spark.sql.catalyst.optimizer.CollapseProject",
    },
    spark_version=3
    )

## **Importações Necessárias**

In [2]:
%%spark

import shutil
import math
import os
import sys
import json
import pyspark.sql.functions as f
import pyspark.sql.functions as F
from pyspark.sql.window import Window
from pyspark.sql import Row
from typing import List, Dict
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark import StorageLevel

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

## **Definição de Constantes de Parâmetros**

### **Exclusão Checkpoint anteriores**

In [None]:
import Hdfs

lista = hdfs.list(os.environ['CHECKPOINT_PATH'])

try:
    cont = 0
    for arquivo in lista:
        hdfs.delete(os.environ['CHECKPOINT_PATH']+arquivo,True)
        cont += 1
        print(f'Foram excluidos {cont} checkpoints')
except:
    pass       

### **Definição Checkpoint**

In [3]:
%%spark

# Constantes para identificar as origens e os detinos dos dados.

spark.sparkContext.setCheckpointDir(os.environ["CHECKPOINT_PATH"])

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

### **Define o dicionário de constantes utilizado nas consultas DB**

In [4]:
%%spark

# Montagem de dicionário Python contendo todos os valores definidos
dicionarioDeConstantes = {
    "TABELA_ENTRADA":os.environ['TABELA_ENTRADA'],
    "URL":os.environ['URL'],
    "JCN":os.environ['JCN'],
    "URL":f"{os.environ['DRIVER']:{os.environ['DB_PORTA']}/{os.environ['DB_DATABASE']}",
    "JCN":os.environ['DB_DRIVER'],
    "DB_USER":os.environ['DB_USER'],
    "DB_PASSWORD":os.environ['DB_PASSWORD'],
    "DB_HOST": os.environ['DB_HOST'],
    "DB_PORTA": os.environ['DB_PORTA'],
    "DB_DATABASE": os.environ['DB_DATABASE']
}

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

# **Recebimento e tratamento dos dados de recebidos**

In [None]:
%%time

%run leitura_posicional.ipynb

In [5]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 30.6 ms, sys: 3.86 ms, total: 34.5 ms
Wall time: 243 ms


## **Funções Auxiliares**

In [6]:
%run utils.ipynb

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

## Rotativo

### **Módulos do Cálculo Rotativo**

In [7]:
%%time

%run modulo_calculo_rotativo.ipynb

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23.3 ms, sys: 2 ms, total: 25.3 ms
Wall time: 37 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.9 ms, sys: 1.92 ms, total: 26.8 ms
Wall time: 33.2 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.6 ms, sys: 22 µs, total: 26.6 ms
Wall time: 31.3 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.7 ms, sys: 3.11 ms, total: 27.8 ms
Wall time: 31.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23.5 ms, sys: 4.32 ms, total: 27.8 ms
Wall time: 31.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.8 ms, sys: 308 µs, total: 25.1 ms
Wall time: 30.3 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23.6 ms, sys: 2.56 ms, total: 26.2 ms
Wall time: 31.7 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23.8 ms, sys: 3.79 ms, total: 27.6 ms
Wall time: 33.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.9 ms, sys: 1.98 ms, total: 26.8 ms
Wall time: 32.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 33.2 ms, sys: 0 ns, total: 33.2 ms
Wall time: 44.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.6 ms, sys: 1.67 ms, total: 27.2 ms
Wall time: 31.2 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.1 ms, sys: 958 µs, total: 26.1 ms
Wall time: 29.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.2 ms, sys: 2.8 ms, total: 29 ms
Wall time: 35.6 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.7 ms, sys: 52 µs, total: 27.8 ms
Wall time: 34 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 29.6 ms, sys: 703 µs, total: 30.3 ms
Wall time: 36.8 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.9 ms, sys: 109 µs, total: 25 ms
Wall time: 29 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 21.1 ms, sys: 3.01 ms, total: 24.1 ms
Wall time: 28.8 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.6 ms, sys: 1.05 ms, total: 27.6 ms
Wall time: 34.1 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23 ms, sys: 946 µs, total: 24 ms
Wall time: 28.7 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 20.9 ms, sys: 3.97 ms, total: 24.9 ms
Wall time: 28.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 22.6 ms, sys: 905 µs, total: 23.6 ms
Wall time: 27.6 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23.5 ms, sys: 4.91 ms, total: 28.5 ms
Wall time: 33.1 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 31.8 ms, sys: 1.85 ms, total: 33.7 ms
Wall time: 37.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.7 ms, sys: 269 µs, total: 25 ms
Wall time: 29.4 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 22.2 ms, sys: 3.02 ms, total: 25.2 ms
Wall time: 29.8 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 30.6 ms, sys: 3.07 ms, total: 33.7 ms
Wall time: 43.6 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 837 ms, sys: 64.7 ms, total: 902 ms
Wall time: 1.05 s


### **Inicialização Fluxo Rotativo**


In [None]:
%%time
%%spark

processaRotativoDF = processaRotativo(df).checkpoint()

In [9]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.6 ms, sys: 3.7 ms, total: 28.3 ms
Wall time: 30.7 ms


In [None]:
%%spark

processaRotativoDF = processaRotativoDF.na.fill(0)

## **Parcelado**

### **Módulos do Cálculo Parcelado**

In [12]:
%%time

%run modulo_calculo_parcelado.ipynb

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.6 ms, sys: 2.06 ms, total: 29.6 ms
Wall time: 34.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27 ms, sys: 928 µs, total: 27.9 ms
Wall time: 31.6 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 23.2 ms, sys: 4.02 ms, total: 27.2 ms
Wall time: 30.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.3 ms, sys: 784 µs, total: 28 ms
Wall time: 31.8 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24 ms, sys: 1.57 ms, total: 25.5 ms
Wall time: 29.7 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.9 ms, sys: 0 ns, total: 27.9 ms
Wall time: 31.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.1 ms, sys: 1.58 ms, total: 25.7 ms
Wall time: 29.7 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.8 ms, sys: 2.93 ms, total: 28.7 ms
Wall time: 32.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.8 ms, sys: 1.09 ms, total: 26.9 ms
Wall time: 30.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.5 ms, sys: 1.83 ms, total: 27.3 ms
Wall time: 31 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.9 ms, sys: 1.05 ms, total: 26.9 ms
Wall time: 30.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.4 ms, sys: 193 µs, total: 27.6 ms
Wall time: 31.4 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 28.3 ms, sys: 1.19 ms, total: 29.5 ms
Wall time: 34.1 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 28.4 ms, sys: 3.14 ms, total: 31.6 ms
Wall time: 41.4 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.1 ms, sys: 2.95 ms, total: 29.1 ms
Wall time: 33.7 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.3 ms, sys: 4.06 ms, total: 30.4 ms
Wall time: 33.3 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.1 ms, sys: 1 ms, total: 28.1 ms
Wall time: 31.4 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 33 ms, sys: 2.12 ms, total: 35.1 ms
Wall time: 37.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 38.8 ms, sys: 3.61 ms, total: 42.4 ms
Wall time: 45.9 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 25.4 ms, sys: 2.87 ms, total: 28.3 ms
Wall time: 33.3 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.1 ms, sys: 2.16 ms, total: 28.2 ms
Wall time: 33.5 ms


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 1.46 s, sys: 105 ms, total: 1.56 s
Wall time: 1.99 s


### **Tratamento Inicial dos dados**

In [14]:
%%spark

import pyspark.sql.functions as F

parcelado1 = procedimentosIniciaisTramentoDados(processaRotativoDF)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

### **Inserção dos dados faltantes**

In [15]:
%%spark

import pyspark.sql.functions as F


InsercaoVariaveisFaltantesDF = parcelado1.withColumn("cd_zero",lit(0))\
                                         .withColumn("vl_slim_15_20_apvd",lit(0))\
                                         .withColumn("in_cli_novo_prtt",lit("S"))\
                                         .withColumn('ic_ajst_recruta', col("ic_ajst_rrt"))\
                                         .withColumn('ic_mtp_recruta', col("ic_mtp_rrt"))\
                                         .withColumn('ic_red_recruta',col("ic_red_rrt"))\
                                         .withColumn("wgda_teto_pstc_cdc", lit(0))\
                                         .withColumn("cd_epgr_cli",col("ls_epgr_cli.codigoEmpregadorCliente"))\
                                         .withColumn("cd_ntz",col("ls_epgr_cli.codigonatureza"))\
                                         .withColumn("cd_rgr_rend_lqdo",col("ls_epgr_cli.indicadorRegraRendimentoLiquidacao"))\
                                         .withColumn("cd_ocp_epgr_cli",col("ls_epgr_cli.codigoOcupacaoEmpregadorCliente"))\
                                         .withColumn("vl_rend_lqdo",col("ls_epgr_cli.valorRendimentoLiquido"))\
                                         .withColumn("in_ocp_ppl_scdr",col("ls_epgr_cli.indicadorOcupacaoPrincipalSecundario"))\
                                         .withColumn("dt_inc_scdr",col("ls_epgr_cli.dataInicioSecundario"))\
                                         .withColumn("vl_rend_epgr_cli",col("ls_epgr_cli.valorRendimento"))\
                                         .withColumn("dt_rend_epgr_cli",col("ls_epgr_cli.dataRendimento"))\
                                         .withColumn("cd_epgr_engaj",col("ls_epgr_cmtd.codigoEmpregadorComprometido"))\
                                         .withColumn("cd_slim_rnvc",col("ls_slim_rnv.codigoSublimiteRenovacao"))\
                                         .withColumn("cd_epgr_cgn_flh",col("ls_epgr_cgn_flh.codigoEmpregadorConsignacaoFolha"))\
                                         .withColumn("nr_cli_cvn", col("ls_cvn.numeroClienteConvenio"))\
                                         .withColumn("ic_red_pstc",col("ls_cvn.indiceReducaoPrestacao"))\
                                         .withColumn("cd_slim_crd",col("ls_cvn.codigoSublimiteCredito"))\
                                         .withColumn("ic_ajst_cvn",col("ls_cvn.indiceAjusteConvenio"))\
                                         .withColumn("ic_mtp_cvn", col("ls_cvn.indiceMultiplicadorConvenio"))\
                                         .withColumn("qt_max_mm_fnto", col("ls_cvn.quantidadeMaximoMesFinanciamento"))\
                                         .withColumn("vl_max_cvn", col("ls_cvn.valorMaximoConvenio"))\
                                         .withColumn("vl_mnr_taxa_jur",col("ls_cvn.valorMenorTaxaJuro"))\
                                         .withColumn("cd_epgr_rgm_espl",col("ls_epgr_rgm_espl.codigoEmpregadorRegimeEspecial"))\
                                         .select("*",
                                             col("cd_zero").alias("wgda_vl_ren_brto"),
                                             col("cd_zero").alias("vl_slim_21_apvd"),
                                             col("cd_zero").alias("vl_slim_83_apvd"),
                                             col("cd_zero").alias("vl_slim_111_apvd"),
                                             col("cd_zero").alias("vl_slim_115_apvd"),
                                             col("cd_zero").alias("vl_slim_116_apvd"),
                                             col("cd_zero").alias("vl_slim_117_apvd"),
                                             col("cd_zero").alias("vl_slim_119_apvd"),
                                             col("cd_zero").alias("vl_slim_129_apvd"),
                                             col("cd_zero").alias("vl_slim_130_apvd"),
                                             col("cd_zero").alias("vl_slim_131_apvd"),
                                             col("cd_zero").alias("vl_slim_137_apvd"),
                                             col("cd_zero").alias("vl_slim_140_apvd"),
                                             col("cd_zero").alias("vl_slim_142_apvd"),
                                             col("cd_zero").alias("vl_slim_155_apvd"),
                                             col("cd_zero").alias("vl_slim_156_apvd"),
                                             col("cd_zero").alias("vl_slim_157_apvd"),
                                             col("cd_zero").alias("vl_slim_159_apvd"),
                                             col("cd_zero").alias("vl_slim_160_apvd"),
                                             col("cd_zero").alias("vl_slim_161_apvd"),
                                             col("cd_zero").alias("vl_slim_162_apvd"),
                                             col("cd_zero").alias("vl_slim_163_apvd"),
                                             col("cd_zero").alias("vl_slim_171_apvd"),
                                             col("cd_zero").alias("vl_slim_173_apvd"),
                                             col("cd_zero").alias("vl_slim_179_apvd"),
                                             col("cd_zero").alias("vl_slim_180_apvd"),
                                             col("cd_zero").alias("vl_slim_181_apvd"),
                                             col("cd_zero").alias("vl_slim_228_apvd"),
                                             col("cd_zero").alias("vl_grupo_45_apvd"),
                                             col("cd_zero").alias("vl_grupo_46_apvd"),
                                             col("cd_zero").alias("vl_grupo_55_apvd"),
                                             col("cd_zero").alias("vl_grupo_60_apvd"),
                                             col("cd_zero").alias("vl_grupo_64_apvd"),
                                             col("cd_zero").alias("vl_grupo_65_apvd"),
                                             col("cd_zero").alias("vl_pstc_cdc_apvd"),
                                             col("cd_zero").alias("vl_pstc_vclo_apvd"),
                                             col("cd_zero").alias("vl_pstc_rao_apvd"),
                                             col("cd_zero").alias("vl_pstc_ecf_apvd"),
                                             col("cd_zero").alias("vl_pstc_fun_bb_apvd"),
                                             col("cd_zero").alias("vl_slim_21_clcd"),
                                             col("cd_zero").alias("vl_slim_83_clcd"),
                                             col("cd_zero").alias("vl_slim_111_clcd"),
                                             col("cd_zero").alias("vl_slim_115_clcd"),
                                             col("cd_zero").alias("vl_slim_116_clcd"),
                                             col("cd_zero").alias("vl_slim_117_clcd"),
                                             col("cd_zero").alias("vl_slim_119_clcd"),
                                             col("cd_zero").alias("vl_slim_129_clcd"),
                                             col("cd_zero").alias("vl_slim_130_clcd"),
                                             col("cd_zero").alias("vl_slim_131_clcd"),
                                             col("cd_zero").alias("vl_slim_137_clcd"),
                                             col("cd_zero").alias("vl_slim_140_clcd"),
                                             col("cd_zero").alias("vl_slim_142_clcd"),
                                             col("cd_zero").alias("vl_slim_155_clcd"),
                                             col("cd_zero").alias("vl_slim_156_clcd"),
                                             col("cd_zero").alias("vl_slim_157_clcd"),
                                             col("cd_zero").alias("vl_slim_159_clcd"),
                                             col("cd_zero").alias("vl_slim_160_clcd"),
                                             col("cd_zero").alias("vl_slim_161_clcd"),
                                             col("cd_zero").alias("vl_slim_162_clcd"),
                                             col("cd_zero").alias("vl_slim_163_clcd"),
                                             col("cd_zero").alias("vl_slim_171_clcd"),
                                             col("cd_zero").alias("vl_slim_173_clcd"),
                                             col("cd_zero").alias("vl_slim_179_clcd"),
                                             col("cd_zero").alias("vl_slim_180_clcd"),
                                             col("cd_zero").alias("vl_slim_181_clcd"),
                                             col("cd_zero").alias("vl_slim_228_clcd"),
                                             col("cd_zero").alias("vl_grupo_45_clcd"),
                                             col("cd_zero").alias("vl_grupo_46_clcd"),
                                             col("cd_zero").alias("vl_grupo_55_clcd"),
                                             col("cd_zero").alias("vl_grupo_60_clcd"),
                                             col("cd_zero").alias("vl_grupo_64_clcd"),
                                             col("cd_zero").alias("vl_grupo_65_clcd"),
                                             col("cd_zero").alias("vl_pstc_cdc_clcd"),
                                             col("cd_zero").alias("vl_pstc_vclo_clcd"),
                                             col("cd_zero").alias("vl_pstc_rao_clcd"),
                                             col("cd_zero").alias("vl_pstc_ecf_clcd"),
                                             col("cd_zero").alias("vl_pstc_fun_bb_clcd"),
                                             col("cd_zero").alias("aux_cd_slim"),
                                             col("cd_zero").alias("gda_maior_mtp"),
                                             col("cd_zero").alias("vl_renda_funci"),
                                             col("cd_zero").alias("vl_renda_pres"),
                                             col("cd_zero").alias("vl_pstc_ecf_demais"),
                                             col("cd_zero").alias("vl_ren_utzd_pstc_ecf"),
                                             col("cd_zero").alias("ren_utzd_pstc_ecf"),
                                             col("cd_zero").alias("ic_ajst_cvn_137"),
                                             col("cd_zero").alias("ic_ajst_cvn_171"),
                                             col("cd_zero").alias("vl_pstc_137"),
                                             col("cd_zero").alias("vl_max_cvn_083"),
                                             col("cd_zero").alias("vl_max_cvn_137"),
                                             col("cd_zero").alias("vl_max_cvn_171"),
                                             col("cd_zero").alias("vl_pstc_cdc_prtt"),
                                             col("cd_zero").alias("in_grava_cvn"),
                                             col("cd_zero").alias("in_cli_novo"),
                                             col("cd_zero").alias("vl_pstc_083"),
                                             col("cd_zero").alias("waux_coef_ren"),
                                             col("cd_zero").alias("in_pbco_alvo_115"),
                                             col("cd_zero").alias("in_tem_lim_ant"),
                                             col("cd_zero").alias("in_cli_adpc_clireab"),
                                             col("cd_zero").alias("waux_pstc_slim_171"),
                                             col("cd_zero").alias("waux_ic_ajst_cvn_ecf_seld"),
                                             col("cd_zero").alias("waux_ic_ajst_cvn_crt_seld"),
                                             col("cd_zero").alias("waux_ic_ajst_cvn_sal_seld"),
                                             col("cd_zero").alias("ws_vl_pstc_far_clcd_045"),
                                             col("cd_zero").alias("ws_vl_pstc_far_apvd_045"),
                                             col("cd_zero").alias("vl_ftr_ajst_slim_179"),
                                             col("cd_zero").alias("vl_ftr_ajst_slim_228"),
                                             col("cd_zero").alias("vl_pstc_ecf_rec"),
                                             col("cd_zero").alias("aux_cd_cliente"),
                                             col("cd_zero").alias("maior_slim_cdc_apvd"),
                                             col("cd_zero").alias("maior_slim_cdc_clcd"),
                                             col("cd_zero").alias("maior_slim_funci_apvd"),
                                             col("cd_zero").alias("maior_slim_funci_clcd"),
                                             col("cd_zero").alias("maior_slim_ecf_apvd"),
                                             col("cd_zero").alias("maior_slim_ecf_clcd"),
                                             col("cd_zero").alias("waux_pc_cdc"),
                                             col("cd_zero").alias("ws_psct_cdc_83_c"),
                                             col("cd_zero").alias("ws_psct_cdc_115_c"),
                                             col("cd_zero").alias("aux_pstc_slim"),
                                             col("cd_zero").alias("aux_mtp_slim"),
                                             col("cd_zero").alias("aux_far_slim"),
                                             col("cd_zero").alias("aux_vl_lim_ref"),
                                             col("cd_zero").alias("aux_redt_vcli"),
                                             col("cd_zero").alias("wix_crit"),
                                             col("cd_zero").alias("vl_grupo_5564_apvd"),
                                             col("cd_zero").alias("vl_grupo_5564_clcd"),
                                             col("cd_zero").alias("vl_bloco_14_apvd"),
                                             col("cd_zero").alias("vl_bloco_14_clcd"),
                                                )
                                         
parcelado2 = InsercaoVariaveisFaltantesDF

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

### **Inicialização Fluxo Parcelado**

In [16]:
%%spark

spark.conf.set("spark.sql.shuffle.partitions", "20")

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [17]:
%%time
%%spark

parcelado3 = ProcessaParcelado(parcelado2).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Entrando na função ProcessaPrestacao - Bloco 1
Entrando na função ancs0160
Entrando na funcao aplicaRegraPstcProven
Entrando na funcao calculaPrestacao
Saindo da função prestacaoCalculada tempo de excução: 0.24293065071105957 
Saindo da função RegraPstcProven tempo de excução: 0.39870309829711914 
Entrando na funcao aplicaRegraPstcCorren
Entrando na calculaPrestacao
Saindo da função prestacaoCalculada tempo de excução: 0.22305703163146973 
Entrando na funcao RecompRendaLqda
Entrando na funcao apuraTeto
Entrando na função calculaVlTeto 
Saindo da função calculaVlTeto tempo de excução: 0.09732437133789062 
Saindo da função regraApuraTeto tempo de excução: 0.4004242420196533 
Saindo da função regraRecompRendaLqda tempo de excução: 0.7609043121337891 
Entrando na funcao trata_Snl_Alerta
Saindo da função regraTrataSnAlerta tempo de excução: 6.071078062057495 
Entrando na funcao zera_Prestacoes
Saindo da função regraZeraPrestacao tempo de excução: 2.1018710136413574 
Saindo da função RegraPs

In [18]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 32.6 ms, sys: 5.09 ms, total: 37.6 ms
Wall time: 243 ms


In [19]:
%%time
%%spark

print("")
print("Entrando na função RegraPstcEcfFunci - Bloco 2")
parcelado4 = RegraPstcEcfFunci(parcelado3).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função RegraPstcEcfFunci - Bloco 2
Entrando na função processar
Entrando na funcao inicializa_Vrv_Trab
Sainda da função inicializa_Vrv_Trab tempo de execução 2.020948648452759 
Entrando na funcao buscaMCI
Sainda da função buscaMCI tempo de execução 0.333268404006958 
Entrando na funcao pesquisaEpgrDfrt30Pc
Entrando na funcao rotina_Principal
Sainda da função rotina_PrincipalDF tempo de execução 15.919095754623413 
Sainda da função pesquisaEpgrDfrt30Pc tempo de execução 17.452534675598145 
Entrando na funcao calcularPstcPF
Entrando na funcao calculaRndLiqPres
Saindo da função selecionarRndCvn tempo de excução: 47.03688383102417 
Entrando na funcao calculaPstcFinal
Saindo da função calculaPstcFinal tempo de excução: 23.682544708251953 
Entrando na funcao verifZeraPrestacao
Saindo da função verifZeraPrestacao tempo de excução: 7.359094619750977 
Saindo da função calcularPstcPFDF tempo de excução: 192.21284079551697 
Saindo da função processar tempo de excução: 243.10492491722

In [20]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.3 ms, sys: 5.92 ms, total: 30.2 ms
Wall time: 31.9 ms


In [21]:
%%time
%%spark

print("")
print("Entrando na função aplicarRegraPstcVclo Bloco 3")
parcelado5 = aplicarRegraPstcVclo(parcelado4).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função aplicarRegraPstcVclo Bloco 3
Entrando na funcao calculaPrestacaoVCLO
Saindo da função prestacaoCalculada tempo de excução: 0.20999383926391602 
Entrando na funcao calculaPrestacaoVCLO
Saindo da função regraTrataSnAlerta_Vclo tempo de excução: 3.743278741836548 
Saindo da função RegraPstcVclo tempo de excução: 12.734370708465576CPU times: user 626 ms, sys: 62.7 ms, total: 688 ms
Wall time: 4min 6s


In [22]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 28.8 ms, sys: 5.83 ms, total: 34.6 ms
Wall time: 35 ms


In [23]:
%%time
%%spark

print("")  
print("Entrando na função slimClcdCdc Bloco 4")
parcelado6 = slimClcdCdc(parcelado5).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função slimClcdCdc Bloco 4
Entrando na funcao processaCalculo - 173 clcd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.40465760231018066 
Saindo da função processaCalculo tempo de excução: 0.7215671539306641 
Entrando na funcao processaCalculo - 131 clcd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.8026213645935059 
Saindo da função processaCalculo tempo de excução: 1.3709895610809326 
Entrando na funcao processaCalculo - 116 clcd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 1.0139210224151611 
Saindo da função processaCalculo tempo de excução: 1.889063835144043 
Entrando na funcao vrf_Rnd_Slr_Bnf
Saindo da função vrf_Rnd_Slr_BnfDF tempo de excução: 15.361585140228271 
Entrando na funcao processaCalculo - 83 clcd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 10.22116994857788 
Saindo da

In [24]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.5 ms, sys: 4.08 ms, total: 28.6 ms
Wall time: 30.6 ms


In [25]:
%%time
%%spark

print("")
print("Entrando na função slimApvdCdc Blcoo 5")
parcelado7 = slimApvdCdc(parcelado6).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função slimApvdCdc Blcoo 5
Entrando na funcao processaCalculo - 173 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.49417734146118164 
Saindo da função processaCalculo tempo de excução: 0.8705697059631348 
Entrando na funcao processaCalculo - 131 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.53045654296875 
Saindo da função processaCalculo tempo de excução: 0.9603750705718994 
Entrando na funcao processaCalculo - 116 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.7535810470581055 
Saindo da função processaCalculo tempo de excução: 1.4097280502319336 
Entrando na funcao vrf_Rnd_Slr_Bnf
Saindo da função vrf_Rnd_Slr_BnfDF tempo de excução: 4.668126583099365 
Entrando na funcao processaCalculo - 83 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 1.8842759132385254 
Saindo da 

In [26]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 24.2 ms, sys: 2.04 ms, total: 26.2 ms
Wall time: 28.6 ms


In [27]:
%%time
%%spark

print("")
print("Entrando na função calculaGrupoCDC Bloco 6")
parcelado8 = calculaGrupoCDC(parcelado7).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaGrupoCDC Bloco 6
Saindo da função calculaGrupoCDC tempo de excução: 1.8605878353118896CPU times: user 225 ms, sys: 18.6 ms, total: 243 ms
Wall time: 1min 17s


In [28]:
%%time
%%spark

print("")
print("Entrando na função slim_Vclo Bloco 7")
parcelado9 = slim_Vclo(parcelado8).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função slim_Vclo Bloco 7
Entrando na função calculaSlim 
Saindo da função calculaSlim tempo de excução: 0.18163585662841797 
Saindo da função slim_Vclo tempo de excução: 1.3679020404815674CPU times: user 132 ms, sys: 10.5 ms, total: 143 ms
Wall time: 37.4 s


In [29]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 35.5 ms, sys: 1.88 ms, total: 37.3 ms
Wall time: 38.3 ms


In [30]:
%%time
%%spark

print("")
print("Entrando na função slim_ecf Bloco 8")
parcelado10 = slim_ecf(parcelado9).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função slim_ecf Bloco 8
Entrando na processaCalculo - 137 apvd
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.1445927619934082 
Saindo da função processaCalculo tempo de excução: 0.6036739349365234 
Entrando na processaCalculo - 137 clcd
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.13337230682373047 
Saindo da função processaCalculo tempo de excução: 0.6324083805084229 
Entrando na processaCalculo - 155 apvd
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.23195672035217285 
Saindo da função processaCalculo tempo de excução: 0.7749605178833008 
Entrando na processaCalculo - 155 clcd
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.22062444686889648 
Saindo da função processaCalculo tempo de excução: 1.0424528121948242 
Entrando na processaCalculo - 171 apvd
Entrando na funcao calculaSlim - regra 1 
Sai

In [31]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 28.1 ms, sys: 1.11 ms, total: 29.2 ms
Wall time: 30.7 ms


In [32]:
%%time
%%spark

print("")  
print("Entrando na função calculaGrupoECF Bloco 9")
parcelado11 = calculaGrupoECF(parcelado10).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaGrupoECF Bloco 9
Saindo da função calculaGrupoECF tempo de excução: 0.17722845077514648CPU times: user 106 ms, sys: 8.23 ms, total: 114 ms
Wall time: 27.4 s


In [33]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 26.3 ms, sys: 1.13 ms, total: 27.4 ms
Wall time: 29.2 ms


In [34]:
%%time
%%spark

print("")  
print("Entrando na função slim_Funci Bloco 10")
parcelado12 = slim_Funci(parcelado11).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função slim_Funci Bloco 10
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.12721681594848633 
Saindo da função processaCalculo tempo de excução: 0.5062353610992432 
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.11584734916687012 
Saindo da função processaCalculo tempo de excução: 0.5520591735839844 
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.5419552326202393 
Saindo da função processaCalculo tempo de excução: 0.9812507629394531 
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.6233298778533936 
Saindo da função processaCalculo tempo de excução: 1.172004222869873 
Entrando na funcao calculaSlim - regra 1 
Saindo da função calculaSlim tempo de excução: 0.2926161289215088 
Saindo da função processaCalculo tempo de excução: 1.0959198474884033 
Entrando na funcao calculaSlim - regra 1 
Saindo da f

In [35]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 31.1 ms, sys: 6.05 ms, total: 37.1 ms
Wall time: 37.6 ms


In [36]:
%%time
%%spark

print("")  
print("Entrando na função calculaGrupoRAO Bloco 11")
parcelado13 = calculaGrupoRao(parcelado12).cache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaGrupoRAO Bloco 11
Saindo da função calculoGrupoRao tempo de excução: 0.4907798767089844CPU times: user 36.2 ms, sys: 4.89 ms, total: 41.1 ms
Wall time: 1.25 s


In [37]:
%%time
%%spark

print("")  
print("Entrando na função calculaGrupoFunc Bloco 12")
parcelado14 = calculaGrupoFunc(parcelado13).cache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaGrupoFunc Bloco 12
Saindo da função calculoGrupoFunc tempo de excução: 1.0931453704833984CPU times: user 47.4 ms, sys: 3 ms, total: 50.4 ms
Wall time: 2.26 s


In [38]:
%%time
%%spark

print("")  
print("Entrando na função calculaGrupoVCLO Bloco 13")
parcelado15 = calculaGrupoVCLO(parcelado14).cache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaGrupoVCLO Bloco 13
Saindo da função calculoGrupoVCLO tempo de excução: 0.2585928440093994CPU times: user 40.2 ms, sys: 5.89 ms, total: 46.1 ms
Wall time: 750 ms


In [39]:
%%time
%%spark

print("")  
print("Entrando na função calculaGrupoRDEX Bloco 14")
parcelado16 = calculaGrupoRDEX(parcelado15).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaGrupoRDEX Bloco 14
Saindo da função calculoGrupoRDEX tempo de excução: 0.7204775810241699CPU times: user 167 ms, sys: 20.1 ms, total: 188 ms
Wall time: 51.5 s


In [40]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 35.9 ms, sys: 3.69 ms, total: 39.6 ms
Wall time: 41.6 ms


In [41]:
%%time
%%spark

print("")  
print("Entrando na função calculaBloco Bloco 15")
parcelado17 = calculaBloco(parcelado16).cache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função calculaBloco Bloco 15
Saindo da função calculoBloco tempo de excução: 0.8636205196380615CPU times: user 49.3 ms, sys: 8.94 ms, total: 58.2 ms
Wall time: 2.27 s


In [42]:
%%spark

spark.conf.set("spark.sql.shuffle.partitions", "40")

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [43]:
%%time
%%spark

print("")  
print("Entrando na função verifGTFraude Bloco 16")
parcelado18 = verifGTFraude(parcelado17).checkpoint()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


Entrando na função verifGTFraude Bloco 16
Entrando na slimApvdCdc 
Entrando na funcao processaCalculo - 173 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 1.855384349822998 
Saindo da função processaCalculo tempo de excução: 3.8853251934051514 
Entrando na funcao processaCalculo - 131 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 0.8714227676391602 
Saindo da função processaCalculo tempo de excução: 2.878351926803589 
Entrando na funcao processaCalculo - 116 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 4.175079584121704 
Saindo da função processaCalculo tempo de excução: 5.54755973815918 
Entrando na funcao vrf_Rnd_Slr_Bnf
Saindo da função vrf_Rnd_Slr_BnfDF tempo de excução: 15.222918272018433 
Entrando na funcao processaCalculo - 83 apvd
Entrando na funcao calculaSlim - regra none 
Saindo da função calculaSlim tempo de excução: 13.81

In [44]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 28.3 ms, sys: 3.97 ms, total: 32.3 ms
Wall time: 131 ms


In [115]:
%%time
%%spark

validaDF = parcelado18

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 27.4 ms, sys: 1.1 ms, total: 28.5 ms
Wall time: 30.5 ms


## **Ajustando para saida**

In [116]:
%%spark

validaDF = validaDF.withColumn("vl_pstc_crd_slr",col("vl_pstc_083"))\
                   .withColumn("ic_mtp_clc_slim_137",col("vl_mtp_clc_slim_137"))\
                   .withColumn("vl_pstc_chq_fndo_clcd",col("vl_pstc_ecf_clcd"))\
                   .withColumn("vl_pstc_chq_fndo_apvd",col("vl_pstc_ecf_apvd"))\
                   .withColumn("vl_pstc_fin_apvd",lit(0))\
                   .withColumn("vl_pstc_fin_clcd",lit(0))\
                   .withColumn("nr_idfr_rsco",lit(0))\
                   .withColumn("vl_pstc_chq_sem_fndo_otr", col("vl_pstc_ecf_demais"))\
                   .withColumn("ts_saida", to_timestamp("ts_saida", "yyyy-MM-dd-HH.mm.ss.SSSSSS"))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [117]:
%%spark
validaDF = validaDF.selectExpr(
                                "cast(cd_cli as int) as CD_CLI",
                                "cast(nr_cpf_cnpj as decimal(17,2)) as NR_CPF_CNPJ",
                                "current_timestamp() as TS_CLC_SMLC",
                                "cast(cd_mtdl_anl_crd as char(2)) as CD_MTDL_ANL_CRD",
                                "cast(nr_idfr_slim_crd as decimal(31,0)) as NR_IDFR_SLIM_CRD",
                                "cast(nr_idfr_rsco as decimal(31,0)) as NR_IDFR_RSCO",
                                "cast(vl_slim_20_clcd as decimal(17,2))  as VL_CRT_CRD_CLCD",
                                "cast(vl_slim_178_clcd as decimal(17,2)) as VL_CRT_EXNO_CLCD",
                                "cast(vl_slim_20_apvd as decimal(17,2))  as VL_CRT_CRD_APVD",
                                "cast(vl_slim_178_apvd as decimal(17,2)) as VL_CRT_EXNO_APVD",
                                "cast(vl_slim_15_clcd as decimal(17,2))  as VL_CHQ_ESPL_CLCD",
                                "cast(vl_slim_15_apvd as decimal(17,2))  as VL_CHQ_ESPL_APVD",
                                "cast(vl_slim_53_clcd as decimal(17,2))  as VL_ADTT_DEPS_CLCD",
                                "cast(vl_slim_53_apvd as decimal(17,2))  as VL_ADTT_DEPS_APVD",
                                "cast(vl_grupo_63_clcd as decimal(17,2)) as VL_GR_CRT_CLCD",
                                "cast(vl_grupo_63_apvd as decimal(17,2)) as VL_GR_CRT_APVD",
                                "cast(vl_grupo_62_clcd as decimal(17,2)) as VL_GR_CHQ_CLCD",
                                "cast(vl_grupo_62_apvd as decimal(17,2)) as VL_GR_CHQ_APVD",
                                "cast(vl_bloco_11_clcd as decimal(17,2)) as VL_BLOC_RTV_CLCD",
                                "cast(vl_bloco_11_apvd as decimal(17,2)) as VL_BLOC_RTV_APVD",
                                "cast(vl_slim_21_apvd as decimal(17,2))  as VL_LSG_VCLO_APVD",
                                "cast(vl_slim_21_clcd as decimal(17,2))  as VL_LSG_VCLO_CLCD",
                                "cast(vl_slim_83_apvd as decimal(17,2))  as VL_CRD_SLR_APVD",
                                "cast(vl_slim_83_clcd as decimal(17,2))  as VL_CRD_SLR_CLCD",
                                "cast(vl_slim_111_apvd as decimal(17,2)) as VL_CRD_RSCT_APVD",
                                "cast(vl_slim_111_clcd as decimal(17,2)) as VL_CRD_RSCT_CLCD",
                                "cast(vl_slim_115_apvd as decimal(17,2)) as VL_CRD_BNF_APVD",
                                "cast(vl_slim_115_clcd as decimal(17,2)) as VL_CRD_BNF_CLCD",
                                "cast(vl_slim_116_apvd as decimal(17,2)) as VL_CRD_AUTC_APVD",
                                "cast(vl_slim_116_clcd as decimal(17,2)) as VL_CRD_AUTC_CLCD",
                                "cast(vl_slim_117_apvd as decimal(17,2)) as VL_CDC_VCLO_APVD",
                                "cast(vl_slim_117_clcd as decimal(17,2)) as VL_CDC_VCLO_CLCD",
                                "cast(vl_slim_119_apvd as decimal(17,2)) as VL_FNTO_BEM_APVD",
                                "cast(vl_slim_119_clcd as decimal(17,2)) as VL_FNTO_BEM_CLCD",
                                "cast(vl_slim_129_apvd as decimal(17,2)) as VL_CRD_MTRL_APVD",
                                "cast(vl_slim_129_clcd as decimal(17,2)) as VL_CRD_MTRL_CLCD",
                                "cast(vl_slim_131_apvd as decimal(17,2)) as VL_CRD_PRON_APVD",
                                "cast(vl_slim_131_clcd as decimal(17,2)) as VL_CRD_PRON_CLCD",
                                "cast(vl_slim_137_apvd as decimal(17,2)) as VL_CGN_FLH_APVD",
                                "cast(vl_slim_137_clcd as decimal(17,2)) as VL_CGN_FLH_CLCD",
                                "cast(vl_slim_140_apvd as decimal(17,2)) as VL_SLR_FUN_APVD",
                                "cast(vl_slim_140_clcd as decimal(17,2)) as VL_SLR_FUN_CLCD",
                                "cast(vl_slim_142_apvd as decimal(17,2)) as VL_CRD_RNVC_APVD",
                                "cast(vl_slim_142_clcd as decimal(17,2)) as VL_CRD_RNVC_CLCD",
                                "cast(vl_slim_155_apvd as decimal(17,2)) as VL_RSCT_CGN_APVD",
                                "cast(vl_slim_155_clcd as decimal(17,2)) as VL_RSCT_CGN_CLCD",
                                "cast(vl_slim_156_apvd as decimal(17,2)) as VL_PC_CECR_APVD",
                                "cast(vl_slim_156_clcd as decimal(17,2)) as VL_PC_CECR_CLCD",
                                "cast(vl_slim_157_apvd as decimal(17,2)) as VL_MIC_FUN_APVD",
                                "cast(vl_slim_157_clcd as decimal(17,2)) as VL_MIC_FUN_CLCD",
                                "cast(vl_slim_159_apvd as decimal(17,2)) as VL_CRT_CHQ_APVD",
                                "cast(vl_slim_159_clcd as decimal(17,2)) as VL_CRT_CHQ_CLCD",
                                "cast(vl_slim_160_apvd as decimal(17,2)) as VL_TRNS_CRT_APVD",
                                "cast(vl_slim_160_clcd as decimal(17,2)) as VL_TRNS_CRT_CLCD",
                                "cast(vl_slim_161_apvd as decimal(17,2)) as VL_CRD_ACSB_APVD",
                                "cast(vl_slim_161_clcd as decimal(17,2)) as VL_CRD_ACSB_CLCD",
                                "cast(vl_slim_162_apvd as decimal(17,2)) as VL_VCLO_FUN_APVD",
                                "cast(vl_slim_162_clcd as decimal(17,2)) as VL_VCLO_FUN_CLCD",
                                "cast(vl_slim_163_apvd as decimal(17,2)) as VL_CRD_PCR_APVD",
                                "cast(vl_slim_163_clcd as decimal(17,2)) as VL_CRD_PCR_CLCD",
                                "cast(vl_slim_171_apvd as decimal(17,2)) as VL_CRT_FLH_APVD",
                                "cast(vl_slim_171_clcd as decimal(17,2)) as VL_CRT_FLH_CLCD",
                                "cast(vl_slim_173_apvd as decimal(17,2)) as VL_CRD_SSTB_APVD",
                                "cast(vl_slim_173_clcd as decimal(17,2)) as VL_CRD_SSTB_CLCD",
                                "cast(vl_slim_179_apvd as decimal(17,2)) as VL_CPR_DVD_APVD",
                                "cast(vl_slim_179_clcd as decimal(17,2)) as VL_CPR_DVD_CLCD",
                                "cast(vl_slim_180_apvd as decimal(17,2)) as VL_REDE_EPTM_APVD",
                                "cast(vl_slim_180_clcd as decimal(17,2)) as VL_REDE_EPTM_CLCD",
                                "cast(vl_slim_181_apvd as decimal(17,2)) as VL_REDE_FNTO_APVD",
                                "cast(vl_slim_181_clcd as decimal(17,2)) as VL_REDE_FNTO_CLCD",
                                "cast(vl_slim_228_apvd as decimal(17,2)) as VL_RNVC_FUN_APVD",
                                "cast(vl_slim_228_clcd as decimal(17,2)) as VL_RNVC_FUN_CLCD",
                                "cast(vl_pstc_cdc_apvd as decimal(17,2)) as VL_PSTC_CDC_APVD",
                                "cast(vl_pstc_cdc_clcd as decimal(17,2)) as VL_PSTC_CDC_CLCD",
                                "cast(vl_pstc_fin_apvd as decimal(17,2)) as VL_PSTC_FIN_APVD",
                                "cast(vl_pstc_fin_clcd as decimal(17,2)) as VL_PSTC_FIN_CLCD",
                                "cast(vl_pstc_rao_apvd as decimal(17,2)) as VL_PSTC_RCPO_APVD", 
                                "cast(vl_pstc_rao_clcd as decimal(17,2)) as VL_PSTC_RCPO_CLCD",
                                "cast(vl_pstc_vclo_apvd as decimal(17,2)) as VL_PSTC_VCLO_APVD",
                                "cast(vl_pstc_vclo_clcd as decimal(17,2)) as VL_PSTC_VCLO_CLCD", 
                                "cast(vl_pstc_cdc_prtt as decimal(17,2)) as VL_PSTC_CDC_PRTT",
                                "cast(vl_ren_slr_bnf as decimal(17,2)) as VL_REN_SLR_BNF",
                                "cast(vl_grupo_45_apvd as decimal(17,2)) as VL_GR_PCLD_APVD",
                                "cast(vl_grupo_45_clcd as decimal(17,2)) as VL_GR_PCLD_CLCD",
                                "cast(vl_grupo_46_apvd as decimal(17,2)) as VL_GR_RSCT_APVD",
                                "cast(vl_grupo_46_clcd as decimal(17,2)) as VL_GR_RSCT_CLCD",
                                "cast(vl_grupo_55_apvd as decimal(17,2)) as VL_GR_CGN_APVD",
                                "cast(vl_grupo_55_clcd as decimal(17,2)) as VL_GR_CGN_CLCD",
                                "cast(vl_grupo_60_apvd as decimal(17,2)) as VL_GR_VCLO_APVD",
                                "cast(vl_grupo_60_clcd as decimal(17,2)) as VL_GR_VCLO_CLCD",
                                "cast(vl_grupo_64_apvd as decimal(17,2)) as VL_GR_FUN_APVD",
                                "cast(vl_grupo_64_clcd as decimal(17,2)) as VL_GR_FUN_CLCD",
                                "cast(vl_grupo_65_apvd as decimal(17,2)) as VL_GR_REDE_APVD",
                                "cast(vl_grupo_65_clcd as decimal(17,2)) as VL_GR_REDE_CLCD",
                                "cast(vl_bloco_14_apvd as decimal(17,2)) as VL_BLOC_CDC_APVD",
                                "cast(vl_bloco_14_clcd as decimal(17,2)) as VL_BLOC_CDC_CLCD",
                                "cast(vl_slim_130_clcd as decimal(17,2)) as VL_BB_CRD_BNF_CLCD",
                                "cast(vl_slim_130_apvd as decimal(17,2)) as VL_BB_CRD_BNF_APVD",
                                "cast(in_grava_cvn as smallint) as cd_grvr_cvn",
                                "cast(vl_pstc_crd_slr as int)  as VL_PSTC_CRD_SLR",
                                "cast(round(vl_ren_utzd_pstc_ecf,2) as decimal(17,2)) as VL_REN_PSTC_ECF",
                                "cast(round(vl_renda_funci,2) as decimal(17,2)) as VL_REN_FUN_BB",
                                "cast(round(vl_renda_liq_ttl,2) as decimal(17,2)) as VL_REN_LQDO_TTL",
                                "cast(round(vl_renda_pres,2) as decimal(17,2)) as VL_REN_PTDO",
                                "cast(vl_pstc_chq_fndo_clcd as decimal(11,2)) as VL_PSTC_ECF_CLCD",
                                "cast(vl_pstc_chq_fndo_apvd as decimal(11,2)) as VL_PSTC_ECF_APVD",
                                "cast(ic_ajst_cvn_137 as decimal(9,6)) as IC_AJST_CVN_CGN",
                                "cast(vl_pstc_137 as int) as VL_PSTC_CGN",
                                "cast(ic_ajst_cvn_171 as decimal(9,6)) as IC_AJST_CRT_FLH",
                                "cast(vl_max_cvn_083 as int) as VL_MAX_CVN_CRD_SLR",
                                "cast(vl_max_cvn_137 as int) as VL_MAX_CVN_CGN",
                                "cast(vl_max_cvn_171 as int) as VL_MAX_CVN_CRT_FLH",
                                "cast(vl_pstc_fun_bb_clcd as decimal(15,2)) as VL_PSTC_FUN_CLCD",
                                "cast(vl_pstc_chq_sem_fndo_otr as decimal(15,2)) as VL_PSTC_ECF_OTR",
                                "cast(vl_pstc_fun_bb_apvd as decimal(15,2)) as VL_PSTC_FUN_APVD",
                                "cast(round(ic_mtp_clc_slim_137,6) as decimal(16,9)) as VL_MTP_CRD_CGN",
                                "cast(ts_saida as timestamp) as TS_SAID", 
                                "cast(cd_est_lmcr_saida as int) as CD_EST_LMCR_SAID",
                                "cast(cd_tip_evt_saida as int) as CD_TIP_EVT_SAID",
                                "cast(cd_ntz_evt_saida as int) as CD_NTZ_EVT_SAID"
                            )


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

### **Persistindo ultima versao antes do posicional**

In [None]:
%%time
%%spark

validaDF.write.mode("overwrite").insertInto(os.environ["TABELA_SAIDA"] )

In [74]:
%%time
%%spark

spark.catalog.clearCache()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

CPU times: user 13.7 ms, sys: 4.36 ms, total: 18.1 ms
Wall time: 24 ms


# **Módulo de Retorno do Arquivo**

In [None]:
%%time

%run retorno_arquivo.ipynb

# Gera Json Python

In [None]:
%%spark

#books_batch

json_principal = df_tratado.drop(*['timestampRecepcaoArquivo', 'estadoLimiteCreditoEntrada', 'tipoEventoEntrada', 'naturezaEventoEntrada'])\
                             .where('codigoCliente = 458837875')

json_principal = json_principal.withColumn('jsonCol', F.to_json(F.struct([json_principal[x].alias(x) for x in json_principal.columns]))).select('jsonCol')

json_principal.coalesce(1).write.mode('append').format('text').save('/books_batch')

# Encerramento da sessão

In [116]:
# ENCERRANDO A SESSÃO 

%spark cleanup