# **Bibliotecas e Funções**


In [1]:
%%capture

!pip install shap
!pip install optuna

In [2]:
import pandas as pd
import numpy as np

O arquivo 'functions_pred_cruzada' contém as funções que serão utilizadas para selecionar, pré-processar e treinar e validar os modelos de machine learning

In [3]:
!gdown 1uVoArwRTJmvbyFLyFniwFvWGHMfiNIWO --quiet

from functions_pred_cruzada import *

# **Dados**

In [5]:
# Dados brutos com todos os tipos de câncer
!gdown 1AaEC5jTcInC2fwbVG7BW3BeJfXq5OBlO --quiet

df_geral = pd.read_csv('pacigeral_12_23.csv')
print(df_geral.shape)
df_geral.head(3)

Columns (23,24,25,32,36,37,73,74,75,78,89,90,91) have mixed types. Specify dtype option on import or set low_memory=False.


(1178688, 100)


Unnamed: 0,ESCOLARI,IDADE,SEXO,UFNASC,UFRESID,IBGE,CIDADE,CATEATEND,DTCONSULT,CLINICA,...,REC03,REC04,IBGEATEN,CIDO,DSCCIDO,HABILIT,HABIT11,HABILIT1,HABILIT2,CIDADEH
0,4,17,2,SP,SP,3538709,PIRACICABA,9,2011-06-21,24,...,,,3538709,80003,NEOPLASIA MALIGNA,9,UNACON com Serviços de Radioterapia e de Hemat...,1,1,Piracicaba
1,9,1,2,SP,SP,3535507,PARAGUACU PAULISTA,9,2005-02-25,3,...,,,3506003,80003,NEOPLASIA MALIGNA,14,Inativo,6,5,Bauru
2,9,18,2,SP,SP,3548500,SANTOS,9,2012-05-14,24,...,,,3548500,80003,NEOPLASIA MALIGNA,2,UNACON com Serviço de Radioterapia,1,1,Santos


In [6]:
df_geral.ULTINFO.value_counts()

2    517064
3    343093
4    180941
1    137590
Name: ULTINFO, dtype: int64

In [7]:
df_geral['obito'] = [0 if x < 3 else 1 for x in df_geral.ULTINFO]
df_geral['obito'].value_counts()

0    654654
1    524034
Name: obito, dtype: int64

In [8]:
list_datas = ['DTDIAG', 'DTULTINFO']
df = df_geral.copy()

for col_data in list_datas:
    df[col_data] = pd.to_datetime(df[col_data])

df['ULTIDIAG'] = (df.DTULTINFO - df.DTDIAG).dt.days

df['sobrevida_ano1'] = 0
df['sobrevida_ano3'] = 0
df['sobrevida_ano5'] = 0

df.loc[df.ULTIDIAG > 365, 'sobrevida_ano1'] = 1
df.loc[df.ULTIDIAG > 3*365, 'sobrevida_ano3'] = 1
df.loc[df.ULTIDIAG > 5*365, 'sobrevida_ano5'] = 1

df.head(1)

Unnamed: 0,ESCOLARI,IDADE,SEXO,UFNASC,UFRESID,IBGE,CIDADE,CATEATEND,DTCONSULT,CLINICA,...,HABILIT,HABIT11,HABILIT1,HABILIT2,CIDADEH,obito,ULTIDIAG,sobrevida_ano1,sobrevida_ano3,sobrevida_ano5
0,4,17,2,SP,SP,3538709,PIRACICABA,9,2011-06-21,24,...,9,UNACON com Serviços de Radioterapia e de Hemat...,1,1,Piracicaba,0,2633,1,1,1


**Tipos com maior incidência**

In [9]:
df.TOPOGRUP.value_counts(normalize=True).head(15)

C44    0.228854
C50    0.136985
C61    0.103433
C34    0.047110
C53    0.046320
C18    0.038405
C42    0.037690
C16    0.036604
C20    0.029200
C73    0.023743
C77    0.018551
C15    0.018082
C67    0.017893
C64    0.015958
C32    0.015779
Name: TOPOGRUP, dtype: float64

*   C44 - Pele 22,88%
*   C50 - Mama 13,70%
*   C61 - Próstata 10,34%
*   C34 - Pulmão 4,71%
*   C53 - Colo de Útero 4,63%
*   C18 - Cólon 3,84%
*   C42 - ?? 3,77%
*   C16 - Estômago 3,66%
*   C20 - Reto 2,92%
*   C73 - Tiróide 2,37%
*   C77 - ?? 1,86%
*   C15 - Esôfago 1,81%
*   C67 - Bexiga 1,79%
*   C64 - Rim 1,59%
*   C32 - Laringe 1,58%



In [10]:
topos = np.sort(df.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df[df.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup = pd.DataFrame([[topo, counts[0], counts[1]]],
                                columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup = pd.concat([topogrup] + novo_df, ignore_index=True)

topogrup = topogrup.set_index('TOPOGRUP')

topogrup_ordenado = topogrup.sort_values(by='obito_1', ascending=False)
print(topogrup_ordenado.shape)
topogrup_ordenado.head(10)

(70, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C80,0.111694,0.888306
C15,0.1249,0.8751
C23,0.134347,0.865653
C34,0.156209,0.843791
C13,0.164808,0.835192
C26,0.17052,0.82948
C12,0.183929,0.816071
C25,0.184514,0.815486
C24,0.211816,0.788184
C14,0.212579,0.787421


In [11]:
df_ano1 = df[~((df.obito == 0) & (df.sobrevida_ano1 == 0))].reset_index(drop=True)
topos = np.sort(df_ano1.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_ano1[df_ano1.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_ano1 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                     columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_ano1 = pd.concat([topogrup_ano1] + novo_df, ignore_index=True)

topogrup_ano1 = topogrup_ano1.set_index('TOPOGRUP')

topogrup_ano1_ordenado = topogrup_ano1.sort_values(by='obito_1', ascending=False)
print(topogrup_ano1_ordenado.shape)
topogrup_ano1_ordenado.head(10)

(70, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C80,0.084717,0.915283
C15,0.089618,0.910382
C23,0.09256,0.90744
C34,0.10929,0.89071
C25,0.117778,0.882222
C26,0.132931,0.867069
C13,0.134442,0.865558
C12,0.158815,0.841185
C24,0.159896,0.840104
C14,0.175231,0.824769


In [12]:
df_ano3 = df[~((df.obito == 0) & (df.sobrevida_ano3 == 0))].reset_index(drop=True)
topos = np.sort(df_ano3.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_ano3[df_ano3.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_ano3 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                     columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_ano3 = pd.concat([topogrup_ano3] + novo_df, ignore_index=True)

topogrup_ano3 = topogrup_ano3.set_index('TOPOGRUP')

topogrup_ano3_ordenado = topogrup_ano3.sort_values(by='obito_1', ascending=False)
print(topogrup_ano3_ordenado.shape)
topogrup_ano3_ordenado.head(10)

(70, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C80,0.057906,0.942094
C15,0.05803,0.94197
C39,0.058824,0.941176
C23,0.05966,0.94034
C34,0.063089,0.936911
C25,0.067222,0.932778
C26,0.071197,0.928803
C24,0.10427,0.89573
C13,0.104665,0.895335
C12,0.116168,0.883832


In [13]:
df_ano5 = df[~((df.obito == 0) & (df.sobrevida_ano5 == 0))].reset_index(drop=True)
topos = np.sort(df_ano5.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_ano5[df_ano5.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_ano5 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                     columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_ano5 = pd.concat([topogrup_ano5] + novo_df, ignore_index=True)

topogrup_ano5 = topogrup_ano5.set_index('TOPOGRUP')

topogrup_ano5_ordenado = topogrup_ano5.sort_values(by='obito_1', ascending=False)
print(topogrup_ano5_ordenado.shape)
topogrup_ano5_ordenado.head(10)

(70, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C15,0.038162,0.961838
C34,0.039582,0.960418
C26,0.040134,0.959866
C25,0.041388,0.958612
C80,0.042519,0.957481
C23,0.042523,0.957477
C39,0.058824,0.941176
C24,0.074784,0.925216
C22,0.078903,0.921097
C13,0.079712,0.920288


**Tipos com maior incidência por sexo**

In [14]:
df_masc = df[df.SEXO == 1]
df_fem = df[df.SEXO == 2]

print(df_masc.shape)
print(df_fem.shape)

(585417, 105)
(593271, 105)


**Masculino**

In [15]:
df_masc.TOPOGRUP.value_counts(normalize=True).head(10)

C44    0.242531
C61    0.208253
C34    0.057234
C16    0.047406
C42    0.040819
C18    0.037650
C20    0.032604
C15    0.030290
C32    0.027686
C67    0.026477
Name: TOPOGRUP, dtype: float64

*   C44 - Pele 24,25%
*   C61 - Próstata 20,83%
*   C34 - Pulmão 5,72%
*   C16 - Estômago 4,74%
*   C42 - ?? 4,08%
*   C18 - Cólon 3,76%
*   C20 - Reto 3,26%
*   C15 - Esôfago 3,03%
*   C32 - Laringe 2,77%
*   C67 - Bexiga 2,65%

In [16]:
topos = np.sort(df_masc.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_masc[df_masc.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_masc = pd.DataFrame([[topo, counts[0], counts[1]]],
                                     columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_masc = pd.concat([topogrup_masc] + novo_df, ignore_index=True)

topogrup_masc = topogrup_masc.set_index('TOPOGRUP')

topogrup_masc_ordenado = topogrup_masc.sort_values(by='obito_1', ascending=False)
print(topogrup_masc_ordenado.shape)
topogrup_masc_ordenado.head(10)

(62, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C80,0.113089,0.886911
C15,0.11764,0.88236
C34,0.125052,0.874948
C13,0.155141,0.844859
C25,0.162946,0.837054
C23,0.171073,0.828927
C12,0.177831,0.822169
C14,0.181115,0.818885
C26,0.186747,0.813253
C01,0.201921,0.798079


In [17]:
df_masc_ano1 = df_masc[~((df_masc.obito == 0) & (df_masc.sobrevida_ano1 == 0))].reset_index(drop=True)
topos = np.sort(df_masc_ano1.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_masc_ano1[df_masc_ano1.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_masc_ano1 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                          columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_masc_ano1 = pd.concat([topogrup_masc_ano1] + novo_df, ignore_index=True)

topogrup_masc_ano1 = topogrup_masc_ano1.set_index('TOPOGRUP')

topogrup_masc_ano1_ordenado = topogrup_masc_ano1.sort_values(by='obito_1', ascending=False)
print(topogrup_masc_ano1_ordenado.shape)
topogrup_masc_ano1_ordenado.head(10)

(62, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C15,0.082615,0.917385
C34,0.084133,0.915867
C80,0.085647,0.914353
C25,0.097042,0.902958
C23,0.12623,0.87377
C13,0.126515,0.873485
C26,0.129032,0.870968
C14,0.142626,0.857374
C12,0.153183,0.846817
C01,0.167377,0.832623


In [18]:
df_masc_ano3 = df_masc[~((df_masc.obito == 0) & (df_masc.sobrevida_ano3 == 0))].reset_index(drop=True)
topos = np.sort(df_masc_ano3.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_masc_ano3[df_masc_ano3.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_masc_ano3 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                          columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_masc_ano3 = pd.concat([topogrup_masc_ano3] + novo_df, ignore_index=True)

topogrup_masc_ano3 = topogrup_masc_ano3.set_index('TOPOGRUP')

topogrup_masc_ano3_ordenado = topogrup_masc_ano3.sort_values(by='obito_1', ascending=False)
print(topogrup_masc_ano3_ordenado.shape)
topogrup_masc_ano3_ordenado.head(10)

(62, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C34,0.04744,0.95256
C25,0.051627,0.948373
C15,0.052045,0.947955
C80,0.058168,0.941832
C23,0.084192,0.915808
C39,0.090909,0.909091
C13,0.098492,0.901508
C26,0.10596,0.89404
C12,0.111207,0.888793
C24,0.114786,0.885214


In [19]:
df_masc_ano5 = df_masc[~((df_masc.obito == 0) & (df_masc.sobrevida_ano5 == 0))].reset_index(drop=True)
topos = np.sort(df_masc_ano5.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_masc_ano5[df_masc_ano5.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_masc_ano5 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                          columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_masc_ano5 = pd.concat([topogrup_masc_ano5] + novo_df, ignore_index=True)

topogrup_masc_ano5 = topogrup_masc_ano5.set_index('TOPOGRUP')

topogrup_masc_ano5_ordenado = topogrup_masc_ano5.sort_values(by='obito_1', ascending=False)
print(topogrup_masc_ano5_ordenado.shape)
topogrup_masc_ano5_ordenado.head(10)

(62, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C34,0.029143,0.970857
C25,0.030839,0.969161
C15,0.03372,0.96628
C80,0.042149,0.957851
C26,0.049296,0.950704
C23,0.056637,0.943363
C13,0.07444,0.92556
C22,0.078766,0.921234
C12,0.083963,0.916037
C10,0.08429,0.91571


**Feminino**

In [20]:
df_fem.TOPOGRUP.value_counts(normalize=True).head(10)

C50    0.270308
C44    0.215359
C53    0.092027
C18    0.039149
C73    0.037465
C34    0.037120
C42    0.034603
C54    0.029765
C16    0.025946
C20    0.025841
Name: TOPOGRUP, dtype: float64

*   C50 - Mama 27,03%
*   C44 - Pele 21,54%
*   C53 - Colo de Útero 9,20%
*   C18 - Cólon 3,91%
*   C73 - Tiróide 3,75%
*   C34 - Pulmão 3,71%
*   C42 - ?? 3,46%
*   C54 - Corpo do Útero 2,94%
*   C16 - Estômago 2,60%
*   C20 - Reto 2,57%

In [21]:
topos = np.sort(df_fem.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_fem[df_fem.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_fem = pd.DataFrame([[topo, counts[0], counts[1]]],
                                    columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_fem = pd.concat([topogrup_fem] + novo_df, ignore_index=True)

topogrup_fem = topogrup_fem.set_index('TOPOGRUP')

topogrup_fem_ordenado = topogrup_fem.sort_values(by='obito_1', ascending=False)
print(topogrup_fem_ordenado.shape)
topogrup_fem_ordenado.head(10)

(66, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C80,0.109916,0.890084
C23,0.12065,0.87935
C26,0.155556,0.844444
C15,0.160849,0.839151
C24,0.202294,0.797706
C34,0.203615,0.796385
C25,0.206684,0.793316
C22,0.251667,0.748333
C13,0.25641,0.74359
C12,0.258537,0.741463


In [22]:
df_fem_ano1 = df_fem[~((df_fem.obito == 0) & (df_fem.sobrevida_ano1 == 0))].reset_index(drop=True)
topos = np.sort(df_fem_ano1.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_fem_ano1[df_fem_ano1.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_fem_ano1 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                         columns=cols)
    else:
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_fem_ano1 = pd.concat([topogrup_fem_ano1] + novo_df, ignore_index=True)

topogrup_fem_ano1 = topogrup_fem_ano1.set_index('TOPOGRUP')

topogrup_fem_ano1_ordenado = topogrup_fem_ano1.sort_values(by='obito_1', ascending=False)
print(topogrup_fem_ano1_ordenado.shape)
topogrup_fem_ano1_ordenado.head(10)

(66, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C23,0.080097,0.919903
C80,0.083533,0.916467
C15,0.124417,0.875583
C26,0.136364,0.863636
C25,0.139217,0.860783
C34,0.148393,0.851607
C24,0.151885,0.848115
C22,0.182711,0.817289
C13,0.21148,0.78852
C12,0.228426,0.771574


In [23]:
df_fem_ano3 = df_fem[~((df_fem.obito == 0) & (df_fem.sobrevida_ano3 == 0))].reset_index(drop=True)
topos = np.sort(df_fem_ano3.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_fem_ano3[df_fem_ano3.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_fem_ano3 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                         columns=cols)
    else:
        if counts[1] == 1:
            counts[0] = 0
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_fem_ano3 = pd.concat([topogrup_fem_ano3] + novo_df, ignore_index=True)

topogrup_fem_ano3 = topogrup_fem_ano3.set_index('TOPOGRUP')

topogrup_fem_ano3_ordenado = topogrup_fem_ano3.sort_values(by='obito_1', ascending=False)
print(topogrup_fem_ano3_ordenado.shape)
topogrup_fem_ano3_ordenado.head(10)

(66, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C39,0.0,1.0
C26,0.037975,0.962025
C23,0.05072,0.94928
C80,0.057574,0.942426
C25,0.083566,0.916434
C15,0.088012,0.911988
C34,0.08813,0.91187
C24,0.094675,0.905325
C22,0.118812,0.881188
C13,0.166134,0.833866


In [24]:
df_fem_ano5 = df_fem[~((df_fem.obito == 0) & (df_fem.sobrevida_ano5 == 0))].reset_index(drop=True)
topos = np.sort(df_fem_ano5.TOPOGRUP.unique())
cols = ['TOPOGRUP', 'obito_0', 'obito_1']

for i, topo in enumerate(topos):
    aux = df_fem_ano5[df_fem_ano5.TOPOGRUP == topo]
    counts = aux.obito.value_counts(normalize=True).sort_index()
    if i == 0:
        topogrup_fem_ano5 = pd.DataFrame([[topo, counts[0], counts[1]]],
                                          columns=cols)
    else:
        if counts[1] == 1:
            counts[0] = 0
        nova_linha = [{'TOPOGRUP': topo, 'obito_0': counts[0], 'obito_1': counts[1]}]
        novo_df = [pd.DataFrame([linha], columns=cols) for linha in nova_linha]
        topogrup_fem_ano5 = pd.concat([topogrup_fem_ano5] + novo_df, ignore_index=True)

topogrup_fem_ano5 = topogrup_fem_ano5.set_index('TOPOGRUP')

topogrup_fem_ano5_ordenado = topogrup_fem_ano5.sort_values(by='obito_1', ascending=False)
print(topogrup_fem_ano5_ordenado.shape)
topogrup_fem_ano5_ordenado.head(10)

(66, 2)


Unnamed: 0_level_0,obito_0,obito_1
TOPOGRUP,Unnamed: 1_level_1,Unnamed: 2_level_1
C39,0.0,1.0
C26,0.031847,0.968153
C23,0.03746,0.96254
C80,0.042989,0.957011
C25,0.052573,0.947427
C34,0.056539,0.943461
C15,0.060644,0.939356
C24,0.063074,0.936926
C22,0.079201,0.920799
C13,0.13289,0.86711
