In [1]:
import pandas as pd
import numpy as np
import acessos as ac

In [2]:
query = """

select 
l.cpf
, l.mesref
, sum(LTV) as LTV
, max(date_diff('day', cast(pr.dt_cfi_account as date), current_date)) as tempo_relacionamento
from public.rentabilidade_cartoes_diego_camilo l
left join growth_curated_zone.proposal_analysis pr on (pr.cpf = l.cpf)
where l.mesref = '2023-08'
group by 1,2
ORDER BY RAND()
LIMIT 50000


"""

df = ac.df_athena('flavia-costa', query)

In [3]:
len(df)

50000

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
df['Classe_LTV'] = np.where(df.LTV > df.LTV.quantile(0.75) + (10 * (df.LTV.quantile(0.75) - df.LTV.quantile(0.25))), '5. Altamente Rentável',
                   np.where(df.LTV < df.LTV.quantile(0.25) - (10 * (df.LTV.quantile(0.75) - df.LTV.quantile(0.25))), '0. Rentabilidade Muito Negativa',         
                   np.where(df.LTV < 0, '1. Rentabilidade Negativa',          
                   np.where(df.LTV == 0, '2. Rentabilidade Zero',
                   np.where(df.LTV > df.LTV.quantile(0.75), '4. Muito Rentável',
                   np.where(df.LTV > 0, '3. Rentável', 'NI'
                            ))))))

In [7]:
df['Classe_LTV'].value_counts(normalize=True)

1. Rentabilidade Negativa          0.38558
2. Rentabilidade Zero              0.20648
4. Muito Rentável                  0.20376
3. Rentável                        0.12002
5. Altamente Rentável              0.04624
0. Rentabilidade Muito Negativa    0.03792
Name: Classe_LTV, dtype: float64

In [8]:
df['meses_relacionamento'] = round(df.tempo_relacionamento / 30, 0)

In [9]:
df.head()

Unnamed: 0,cpf,mesref,LTV,tempo_relacionamento,Classe_LTV,meses_relacionamento
0,86981960982,2023-08,-5.156091,768.0,1. Rentabilidade Negativa,26.0
1,5750840573,2023-08,22.63544,2132.0,4. Muito Rentável,71.0
2,5319065400,2023-08,9.021088,1965.0,4. Muito Rentável,66.0
3,1992853576,2023-08,-8.004327,2230.0,1. Rentabilidade Negativa,74.0
4,70048524158,2023-08,7.43688,1417.0,4. Muito Rentável,47.0


In [10]:
df['fx_meses_relacionamento'] = np.where(df.meses_relacionamento <= 6, "1.Até 6 meses",
                                np.where(df.meses_relacionamento <= 12, "2.Até 12 meses",         
                                np.where(df.meses_relacionamento <= 24, "3.Até 24 meses",         
                                np.where(df.meses_relacionamento <= 48, "4.Até 48 meses",
                                np.where(df.meses_relacionamento > 48, "5.Acima de 48 meses", "ni"                 
                                         )))))

df['fx_meses_relacionamento'].value_counts(normalize=True)

3.Até 24 meses         0.29208
1.Até 6 meses          0.21194
4.Até 48 meses         0.18322
5.Acima de 48 meses    0.17078
2.Até 12 meses         0.14196
ni                     0.00002
Name: fx_meses_relacionamento, dtype: float64

In [13]:
pd.crosstab(df['fx_meses_relacionamento'], df['Classe_LTV'], normalize= 'index')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
fx_meses_relacionamento,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.Até 6 meses,0.002454,0.402095,0.243748,0.146362,0.176465,0.028876
2.Até 12 meses,0.038743,0.319245,0.173711,0.170471,0.242885,0.054945
3.Até 24 meses,0.047247,0.326417,0.247261,0.133046,0.199945,0.046083
4.Até 48 meses,0.056872,0.419823,0.146491,0.088637,0.228141,0.060037
5.Acima de 48 meses,0.04497,0.484717,0.181989,0.056798,0.185502,0.046024
ni,0.0,0.0,1.0,0.0,0.0,0.0


In [14]:
pd.crosstab(df['fx_meses_relacionamento'], df['Classe_LTV'], normalize= 'columns')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
fx_meses_relacionamento,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.Até 6 meses,0.013713,0.221018,0.250194,0.258457,0.183549,0.132353
2.Até 12 meses,0.145042,0.117537,0.11943,0.201633,0.169219,0.168685
3.Até 24 meses,0.363924,0.247264,0.349768,0.323779,0.286612,0.29109
4.Até 48 meses,0.274789,0.199492,0.129988,0.135311,0.205143,0.237889
5.Acima de 48 meses,0.202532,0.21469,0.150523,0.08082,0.155477,0.169983
ni,0.0,0.0,9.7e-05,0.0,0.0,0.0


In [15]:
df['fx_meses_relacionamento2'] = np.where(df.meses_relacionamento <= 24, "1.Até 24 meses",    
                                np.where(df.meses_relacionamento <= 48, "2.Até 48 meses",
                                np.where(df.meses_relacionamento > 48, "3.Acima de 48 meses", "ni"                 
                                         )))

df['fx_meses_relacionamento2'].value_counts(normalize=True)

1.Até 24 meses         0.64598
2.Até 48 meses         0.18322
3.Acima de 48 meses    0.17078
ni                     0.00002
Name: fx_meses_relacionamento2, dtype: float64

In [16]:
pd.crosstab(df['fx_meses_relacionamento2'], df['Classe_LTV'], normalize= 'columns')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
fx_meses_relacionamento2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.Até 24 meses,0.522679,0.585819,0.719392,0.783869,0.63938,0.592128
2.Até 48 meses,0.274789,0.199492,0.129988,0.135311,0.205143,0.237889
3.Acima de 48 meses,0.202532,0.21469,0.150523,0.08082,0.155477,0.169983
ni,0.0,0.0,9.7e-05,0.0,0.0,0.0


In [17]:
pd.crosstab(df['fx_meses_relacionamento2'], df['Classe_LTV'], normalize= 'index')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
fx_meses_relacionamento2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.Até 24 meses,0.030682,0.34967,0.229945,0.145639,0.201678,0.042385
2.Até 48 meses,0.056872,0.419823,0.146491,0.088637,0.228141,0.060037
3.Acima de 48 meses,0.04497,0.484717,0.181989,0.056798,0.185502,0.046024
ni,0.0,0.0,1.0,0.0,0.0,0.0


Clientes novos concentram mais rentabilidade zero mas não necessariamente possui mais clientes com rentabilidade negativa

### Verificando estabilidade

In [24]:
query2 = """

select * from (
select 
l.cpf
, l.mesref
, sum(LTV) as LTV
, max(date_diff('day', cast(pr.dt_cfi_account as date), current_date)) as tempo_relacionamento
, row_number() over (partition by l.mesref order by RAND()) as rank_mes
from public.rentabilidade_cartoes_diego_camilo l
left join growth_curated_zone.proposal_analysis pr on (pr.cpf = l.cpf)
where substring(cast(l.mesref as varchar), 1, 4) = '2023'
group by 1,2
)
where rank_mes <= 10000

"""

df2 = ac.df_athena('flavia-costa', query2)

In [25]:
len(df2)

80000

In [26]:
df2.mesref.value_counts()

2023-04    10000
2023-02    10000
2023-05    10000
2023-06    10000
2023-07    10000
2023-01    10000
2023-03    10000
2023-08    10000
Name: mesref, dtype: int64

In [27]:
df2['Classe_LTV'] = np.where(df2.LTV > df2.LTV.quantile(0.75) + (10 * (df2.LTV.quantile(0.75) - df2.LTV.quantile(0.25))), '5. Altamente Rentável',
                   np.where(df2.LTV < df2.LTV.quantile(0.25) - (10 * (df2.LTV.quantile(0.75) - df2.LTV.quantile(0.25))), '0. Rentabilidade Muito Negativa',         
                   np.where(df2.LTV < 0, '1. Rentabilidade Negativa',          
                   np.where(df2.LTV == 0, '2. Rentabilidade Zero',
                   np.where(df2.LTV > df.LTV.quantile(0.75), '4. Muito Rentável',
                   np.where(df2.LTV > 0, '3. Rentável', 'NI'
                            ))))))

In [29]:
pd.crosstab(df2['mesref'], df2['Classe_LTV'], normalize= 'index')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
mesref,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01,0.0402,0.3749,0.2192,0.1104,0.2091,0.0462
2023-02,0.0433,0.3836,0.2182,0.1139,0.1966,0.0444
2023-03,0.0392,0.3814,0.2117,0.1112,0.2067,0.0498
2023-04,0.0413,0.378,0.2121,0.1146,0.2014,0.0526
2023-05,0.043,0.3744,0.2111,0.1189,0.203,0.0496
2023-06,0.0376,0.381,0.2052,0.1198,0.2101,0.0463
2023-07,0.0424,0.3684,0.2154,0.117,0.2096,0.0472
2023-08,0.0373,0.3871,0.2024,0.1195,0.2037,0.05


In [28]:
pd.crosstab(df2['mesref'], df2['Classe_LTV'], normalize= 'columns')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
mesref,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01,0.123959,0.123778,0.129299,0.119313,0.127484,0.119658
2023-02,0.133518,0.126651,0.128709,0.123095,0.119863,0.114996
2023-03,0.120876,0.125924,0.124875,0.120177,0.126021,0.128982
2023-04,0.127351,0.124802,0.125111,0.123852,0.12279,0.136234
2023-05,0.132593,0.123613,0.124521,0.128499,0.123765,0.128464
2023-06,0.115942,0.125792,0.121041,0.129472,0.128094,0.119917
2023-07,0.130743,0.121632,0.127057,0.126445,0.127789,0.122248
2023-08,0.115017,0.127806,0.119389,0.129147,0.124192,0.1295


Faixas de rentabilidade tem estabilidade nos meses

In [31]:
df2['meses_relacionamento'] = round(df2.tempo_relacionamento / 30, 0)

df2['fx_meses_relacionamento2'] = np.where(df2.meses_relacionamento <= 24, "1.Até 24 meses",    
                                np.where(df2.meses_relacionamento <= 48, "2.Até 48 meses",
                                np.where(df2.meses_relacionamento > 48, "3.Acima de 48 meses", "ni"                 
                                         )))

df2['fx_meses_relacionamento2'].value_counts(normalize=True)

1.Até 24 meses         0.579037
2.Até 48 meses         0.217275
3.Acima de 48 meses    0.203600
ni                     0.000087
Name: fx_meses_relacionamento2, dtype: float64

In [32]:
pd.crosstab(df2['fx_meses_relacionamento2'], df2['Classe_LTV'], normalize= 'index')

Classe_LTV,0. Rentabilidade Muito Negativa,1. Rentabilidade Negativa,2. Rentabilidade Zero,3. Rentável,4. Muito Rentável,5. Altamente Rentável
fx_meses_relacionamento2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.Até 24 meses,0.031604,0.338191,0.252704,0.139628,0.195972,0.041901
2.Até 48 meses,0.059544,0.391037,0.140433,0.101657,0.242147,0.065182
3.Acima de 48 meses,0.045678,0.480354,0.171906,0.0625,0.191245,0.048318
ni,0.0,0.142857,0.857143,0.0,0.0,0.0


Conclusão sobre novos clientes permanece