## Libraries


In [1]:
import pandas as pd
from src.feature_engineering import features
import numpy as np

## Data


In [2]:
clients = pd.read_csv("data/portfolio_clientes.csv")
tpv = pd.read_csv("data/portfolio_tpv.csv")
comunicados = pd.read_csv("data/portfolio_comunicados.csv")
geral = pd.read_csv("data/portfolio_geral.csv")

### Apenas contratos que possuem comunicados

In [3]:
unique_contratos = comunicados["contrato_id"].unique()
geral_comunicados = geral[geral["contrato_id"].isin(unique_contratos)]

In [4]:
comunicados_grouped = (
    comunicados.groupby(["contrato_id", "dt_ref_portfolio", "data_acao"])[
        ["tipo_acao", "status", "acao"]
    ]
    .agg(list)
    .reset_index()
)

In [5]:
geral_comunicados_grouped = geral_comunicados.merge(
    right=comunicados_grouped, how="left", on=["contrato_id", "dt_ref_portfolio"]
)

In [6]:
geral_and_comunicados_sorted_df = geral_comunicados_grouped.sort_values(
    ["contrato_id", "dt_ref_portfolio"]
)

### Criação de Features DSP e DSPP

In [7]:
contrato_dsp_features = (
    geral_and_comunicados_sorted_df.groupby(["contrato_id"])["dsp"]
    .agg(
        [
            features.total_success_dsp5,
            features.total_success_dsp10,
            features.total_success_dsp15,
            features.total_success_dsp30,
            features.total_success_dsp60,
            features.total_success_dsp90,
            features.prop_success_dsp5,
            features.prop_success_dsp10,
            features.prop_success_dsp15,
            features.prop_success_dsp30,
            features.prop_success_dsp60,
            features.prop_success_dsp90,
        ]
    )
    .reset_index()
)

In [8]:
contrato_dspp_features = (
    geral_and_comunicados_sorted_df.groupby(["contrato_id"])["dspp"]
    .agg(
        [
            features.total_success_dspp15,
            features.total_success_dspp30,
            features.total_success_dspp45,
            features.prop_success_dspp15,
            features.prop_success_dspp30,
            features.prop_success_dspp45,
        ]
    )
    .reset_index()
)

In [9]:
contrato_dsp_dspp = contrato_dsp_features.merge(
    right=contrato_dspp_features, on="contrato_id", how="inner"
)

### Score DSP e DSPP

In [10]:
means_dsp = []
means_dspp = []
for i, row in contrato_dsp_dspp.iterrows():
    means_dsp.append(np.nanmean(row[7:13]))
    means_dspp.append(np.nanmean(row[16:19]))

contrato_dsp_dspp["score_dsp"] = means_dsp
contrato_dsp_dspp["score_dspp"] = means_dspp

  means_dspp.append(np.nanmean(row[16:19]))
  means_dsp.append(np.nanmean(row[7:13]))


### Entregou? Não entregou? Leu?

In [11]:
acionamentos_delivery = (
    geral_and_comunicados_sorted_df.groupby(["contrato_id"])["status"]
    .agg([features.get_entregue, features.get_lido, features.get_nao_entregue])
    .reset_index()
)

In [12]:
contrato_dsp_dspp_qtd_acoes = contrato_dsp_dspp.merge(
    right=acionamentos_delivery, how="inner", on="contrato_id"
)

### Valor devedor esperado

In [13]:
# features de vlr_saldo_devedor
vlr_saldo_devedor_inicial = geral_and_comunicados_sorted_df.drop_duplicates(
    ["contrato_id"]
)[["contrato_id", "vlr_saldo_devedor_esperado"]]

c_dsp_dspp_qtd_acoes_devedor = contrato_dsp_dspp_qtd_acoes.merge(
    right=vlr_saldo_devedor_inicial, how="inner", on="contrato_id"
)

### Dados cadastrais

In [14]:
x_contrato_id_nr_documento = geral_and_comunicados_sorted_df.drop_duplicates(
    ["contrato_id", "nr_documento"]
)[["contrato_id", "nr_documento"]]

In [15]:
c_dsp_dspp_qtd_acoes_devedor_w_doc = c_dsp_dspp_qtd_acoes_devedor.merge(
    right=x_contrato_id_nr_documento, how="inner", on="contrato_id"
)

In [16]:
# clientes_unique
clientes_unique_nr_doc = (
    clients.groupby("nr_documento")[
        ["tipo_empresa", "cidade", "estado", "subsegmento", "segmento"]
    ]
    .agg(lambda x: list(x) if len(x) > 1 else x)
    .reset_index()
)

In [17]:
c_dsp_dspp_qtd_acoes_devedor_w_doc_and_clients = (
    c_dsp_dspp_qtd_acoes_devedor_w_doc.merge(
        right=clientes_unique_nr_doc, on="nr_documento", how="inner"
    )
)

### TPV

In [18]:
qtd_trans_tpv = tpv.groupby("nr_documento")[["qtd_transacoes", "vlr_tpv"]].agg(
    ["mean", "min", "max", np.median, "sum"]
)

In [19]:
final_df = c_dsp_dspp_qtd_acoes_devedor_w_doc_and_clients.merge(
    right=qtd_trans_tpv, how="left", on="nr_documento"
)

  final_df = c_dsp_dspp_qtd_acoes_devedor_w_doc_and_clients.merge(


In [20]:
final_df.to_csv("data/to_analysis.csv", index=False)