In [1]:
import pandas as pd
from pandas import CategoricalDtype
import plotly.express as px
import numpy as np

In [2]:
df = pd.read_csv("data/to_analysis.csv")

In [4]:
df.columns

Index(['contrato_id', 'total_success_dsp5', 'total_success_dsp10',
       'total_success_dsp15', 'total_success_dsp30', 'total_success_dsp60',
       'total_success_dsp90', 'prop_success_dsp5', 'prop_success_dsp10',
       'prop_success_dsp15', 'prop_success_dsp30', 'prop_success_dsp60',
       'prop_success_dsp90', 'total_success_dspp15', 'total_success_dspp30',
       'total_success_dspp45', 'prop_success_dspp15', 'prop_success_dspp30',
       'prop_success_dspp45', 'score_dsp', 'score_dspp', 'get_entregue',
       'get_lido', 'get_nao_entregue', 'vlr_saldo_devedor_esperado',
       'nr_documento', 'tipo_empresa', 'cidade', 'estado', 'subsegmento',
       'segmento', '('qtd_transacoes', 'mean')', '('qtd_transacoes', 'min')',
       '('qtd_transacoes', 'max')', '('qtd_transacoes', 'median')',
       '('qtd_transacoes', 'sum')', '('vlr_tpv', 'mean')',
       '('vlr_tpv', 'min')', '('vlr_tpv', 'max')', '('vlr_tpv', 'median')',
       '('vlr_tpv', 'sum')'],
      dtype='object')

# Scores vs Segmento e Subsegmento (DSP e DSPP)

In [21]:
segmentos = [
    "Alimentação",
    "Varejo",
    "Bens duráveis",
    "Serviços recorrentes",
    "Viagens e entretenimento",
    "Serviços",
    "Supermercado/Farmácia",
    "Posto",
    "Outros",
]
subsegmentos = [
    "Academias",
    "Alimentação Rápida",
    "Atacadista de Alimento",
    "Atacadistas Gerais",
    "Automotivo",
    "Bares e Restaurantes",
    "Educação",
    "Lazer & Turismo",
    "Lojas Diversas",
    "Material de Construção",
    "None",
    "Outros",
    "Postos de Gasolina",
    "Saúde",
    "Supermercados",
    "Vestuário",
]

In [27]:
# intially by the dsp
df_filtered = df[["nr_documento", "score_dsp", "segmento"]][
    (df["segmento"].isin(segmentos)) & (df["subsegmento"].isin(subsegmentos))
]

Unnamed: 0,nr_documento,score_dsp,segmento
0,7996daab1bbe000bb5d1cc1bf317f390,0.500000,Alimentação
1,1191ebfa94d3ca2e8a02f696aafde4a4,0.291667,Varejo
2,1191ebfa94d3ca2e8a02f696aafde4a4,0.083333,Varejo
3,cd1178c3ed53ebe730ba521617cb574b,0.469336,Bens duráveis
4,40cd8202c632fef1e0d5f43f341990a7,0.166667,Varejo
...,...,...,...
12197,fa7900d2354552bfcd058da26b847ae7,1.000000,Bens duráveis
12198,0eefed947e158b4295d05fdf3954a87f,0.833333,Serviços
12199,67eb23dfd23b5a7a470c35d0db12a282,0.511111,Alimentação
12200,3a909649d774f09a56a0e423935842b0,0.250000,Alimentação


In [31]:
fig = px.box(df_filtered, x="segmento", y="score_dsp")
fig.show()

In [32]:
# intially by the dsp
df_filtered = df[["nr_documento", "score_dsp", "subsegmento"]][
    (df["segmento"].isin(segmentos)) & (df["subsegmento"].isin(subsegmentos))
]

fig = px.box(df_filtered, x="subsegmento", y="score_dsp")
fig.show()

In [36]:
# intially by the dsp
df_filtered = df[["nr_documento", "score_dspp", "segmento", "subsegmento"]][
    (df["segmento"].isin(segmentos)) & (df["subsegmento"].isin(subsegmentos))
]

fig = px.box(df_filtered, x="segmento", y="score_dspp")
fig.show()

fig = px.box(df_filtered, x="subsegmento", y="score_dspp")
fig.show()

# Scores vs Acionamento

In [41]:
df_filtered = df[
    ["nr_documento", "score_dsp", "get_entregue", "get_nao_entregue", "get_lido"]
][(df["segmento"].isin(segmentos)) & (df["subsegmento"].isin(subsegmentos))]

# melting the data
df_filtered_melted = df_filtered.melt(
    id_vars=["nr_documento", "score_dsp"],
    value_vars=["get_entregue", "get_nao_entregue", "get_lido"],
    var_name="acionamento",
)

fig = px.box(df_filtered_melted, x="acionamento", y="score_dsp")
fig.show()

In [43]:
df_filtered = df[
    ["nr_documento", "score_dspp", "get_entregue", "get_nao_entregue", "get_lido"]
][(df["segmento"].isin(segmentos)) & (df["subsegmento"].isin(subsegmentos))]

# melting the data
df_filtered_melted = df_filtered.melt(
    id_vars=["nr_documento", "score_dspp"],
    value_vars=["get_entregue", "get_nao_entregue", "get_lido"],
    var_name="acionamento",
)

fig = px.box(df_filtered_melted, x="acionamento", y="score_dspp")
fig.show()

# Proporção de sucesso por aplicação de filtros

In [83]:
filtro_estado = "SP"
filtro_cidade = "São Paulo"
prop_columns = [column for column in df.columns if column.startswith("prop_")]

df_filtered = df[(df["estado"] == filtro_estado) & (df["cidade"] == filtro_cidade)][
    prop_columns
]
df_melted = df_filtered.melt(value_vars=prop_columns, var_name="props")
to_plot = df_melted.groupby(["props"])["value"].agg(np.nanmedian).reset_index()

In [84]:
prop_categories = CategoricalDtype(
    [
        "prop_success_dsp5",
        "prop_success_dsp10",
        "prop_success_dsp15",
        "prop_success_dsp30",
        "prop_success_dsp60",
        "prop_success_dsp90",
        "prop_success_dspp15",
        "prop_success_dspp30",
        "prop_success_dspp45",
    ],
    ordered=True,
)
to_plot["props"] = to_plot["props"].astype(prop_categories)
to_plot_sorted = to_plot.sort_values("props")

In [85]:
fig = px.bar(to_plot_sorted, x="props", y="value")
fig.show()