In [1]:
import pandas as pd
import numpy as np
pd.options.display.float_format = '{:.1f}'.format

In [2]:
# recuperar os dataframes a partir dos arquivos pickle

### POR CNES
# df procedimentos por cnes
df_proc_cnes = pd.read_pickle('./data/dfpo1/df_proc_cnes.pkl')
# matriz de desvios médios por cnes
m_desvios_med_n_cnes = pd.read_pickle('./data/dfpo1/m_desvios_med_n_cnes.pkl')

### DF IMPORTANTE
# df desvios da mediana da quantidade de procedimentos por estabelcimento por mes/ano
# este df é o principal a ser analizado por IRIS, pois a matriz não contém os dados de estabelecimentos que podem ser determinantes 
df_desv_med_n_proc_cnes = pd.read_pickle('./data/dfpo1/df_desv_med_n_proc_cnes.pkl')

### POR MUNICÍPIO
# df procedimentos por município
df_proc_mun = pd.read_pickle('./data/dfpo1/df_proc_mun.pkl')
# df populações dos municípios
df_pop = pd.read_pickle('./data/dfpo1/df_pop.pkl')
# matriz de desvios médios por cnes
m_desvios_med_n_mun = pd.read_pickle('./data/dfpo1/m_desvios_med_n_mun.pkl')

### DF IMPORTANTE
# df desvios da mediana da quantidade de procedimentos por município por mes ano
# este df é o principal a ser analizado por IRIS, pois a matriz não contém os dados de municípios que também podem ser determinantes nesse nível mais elevado de agregação de dados, que consideramos o principal
df_desv_med_n_proc_mun = pd.read_pickle('./data/dfpo1/df_desv_med_n_proc_mun.pkl')


In [3]:
# Obtenha uma lista de todas as colunas
all_cols = df_proc_cnes.columns

# Filtre essa lista para obter apenas as colunas que começam com "20"
num_cols = [col for col in all_cols if str(col).startswith('20')]

# Inicializando um novo DataFrame para armazenar os desvios anuais
desvios_med_n_cnes_anual = pd.DataFrame(index=df_proc_cnes.index)

# Iterando sobre os anos
for year in range(2010, 2023):
    # Selecionando as colunas para este ano
    cols_ano = [col for col in num_cols if str(year) == str(col)[:4]]
    
    # Substituindo NaN pela mediana de cada linha para as colunas deste ano
    df_proc_cnes[cols_ano] = df_proc_cnes[cols_ano].apply(lambda x: x.fillna(x.median()), axis=1)
    
    # Calculando o desvio da mediana para cada célula para as colunas deste ano
    desvios_med_n_cnes_ano = df_proc_cnes[cols_ano].apply(lambda x: x - x.median(), axis=1)
    
    # Adicionando os desvios deste ano ao DataFrame de desvios anuais
    desvios_med_n_cnes_anual = pd.concat([desvios_med_n_cnes_anual, desvios_med_n_cnes_ano], axis=1)

# Print the annual deviation DataFrame
desvios_med_n_cnes_anual

Unnamed: 0,201001,201002,201003,201004,201005,201006,201007,201008,201009,201010,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,-4.0,1.0,4.0,-2.0,2.0,-2.0,-2.0,-1.0,2.0,30.0,...,-7.0,11.0,1.0,45.0,146.0,-3.0,52.0,238.0,-9.0,-3.0
1,5.0,-2.0,26.0,-22.0,-37.0,-26.0,-56.0,51.0,42.0,-29.0,...,-14.0,-7.0,-16.0,40.0,75.0,78.0,-24.0,15.0,133.0,7.0
2,2.5,7.5,-5.5,-2.5,-0.5,-3.5,-3.5,8.5,-2.5,2.5,...,-13.0,-17.0,-25.0,43.0,274.0,54.0,87.0,13.0,187.0,-16.0
3,-10.5,-8.5,17.5,-15.5,-0.5,13.5,0.5,4.5,-1.5,-5.5,...,-3.0,-1.0,-1.0,-6.0,74.0,10.0,33.0,-9.0,20.0,3.0
4,-4.0,4.0,-4.0,-2.0,-6.0,-2.0,-12.0,13.0,8.0,5.0,...,-1.0,-3.0,-1.0,-8.0,22.0,247.0,-7.0,6.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,-47.0,21.0,2.0,9.0,28.0,13.0,9.0,-5.0,-2.0,-23.0,...,0.0,0.0,-32.0,0.0,0.0,0.0,0.0,0.0,-50.0,-25.0
55601,-24.5,-2.5,53.5,-8.5,3.5,-4.5,1.5,17.5,7.5,-1.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-21.0,-8.0
55602,0.0,-15.0,0.0,3.0,0.0,-1.0,-17.0,-15.0,22.0,29.0,...,-6.0,14.0,2.0,31.0,-15.0,-6.0,-39.0,-2.0,-28.0,9.0
55603,0.0,-25.0,16.0,44.0,36.0,1.0,-17.0,-24.0,1.0,-4.0,...,3.5,0.5,10.5,72.5,-35.5,-12.5,-112.5,-4.5,-66.5,-0.5


In [4]:
df_pop_reshaped = pd.read_pickle('./data/dfpo1/df_pop_reshaped.pkl')
df_pop_156cols = df_pop_reshaped

In [5]:
df_pop_156cols

anomes,codigo,201001,201002,201003,201004,201005,201006,201007,201008,201009,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
codigo_municipio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
110001,1100015,24392,24392,24392,24392,24392,24392,24392,24392,24392,...,22516,22516,22516,22516,22516,22516,22516,22516,22516,22516
110002,1100023,90353,90353,90353,90353,90353,90353,90353,90353,90353,...,111148,111148,111148,111148,111148,111148,111148,111148,111148,111148
110003,1100031,6313,6313,6313,6313,6313,6313,6313,6313,6313,...,5067,5067,5067,5067,5067,5067,5067,5067,5067,5067
110004,1100049,78574,78574,78574,78574,78574,78574,78574,78574,78574,...,86416,86416,86416,86416,86416,86416,86416,86416,86416,86416
110005,1100056,17029,17029,17029,17029,17029,17029,17029,17029,17029,...,16088,16088,16088,16088,16088,16088,16088,16088,16088,16088
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522200,5222005,12548,12548,12548,12548,12548,12548,12548,12548,12548,...,14088,14088,14088,14088,14088,14088,14088,14088,14088,14088
522205,5222054,7371,7371,7371,7371,7371,7371,7371,7371,7371,...,9002,9002,9002,9002,9002,9002,9002,9002,9002,9002
522220,5222203,4735,4735,4735,4735,4735,4735,4735,4735,4735,...,6451,6451,6451,6451,6451,6451,6451,6451,6451,6451
522230,5222302,5145,5145,5145,5145,5145,5145,5145,5145,5145,...,5941,5941,5941,5941,5941,5941,5941,5941,5941,5941


In [6]:
df_mun_desv_mediana = df_desv_med_n_proc_mun

In [7]:
df_mun_desv_mediana

Unnamed: 0,codigo_municipio,municipio,uf,codigo_procedimento,procedimento,201001,201002,201003,201004,201005,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,110001,ALTA FLORESTA D'OESTE,RO,303010037,TRATAMENTO DE OUTRAS DOENÇAS BACTERIANAS,-4.0,1.0,4.0,-2.0,2.0,...,-4.0,14.0,4.0,48.0,149.0,0.0,55.0,241.0,-6.0,0.0
1,110001,ALTA FLORESTA D'OESTE,RO,303010061,TRATAMENTO DE DOENÇAS INFECCIOSAS INTESTINAIS,79.0,72.0,100.0,52.0,37.0,...,-21.0,-14.0,-23.0,33.0,68.0,71.0,-31.0,8.0,126.0,0.0
2,110001,ALTA FLORESTA D'OESTE,RO,303010193,TRATAMENTO DE OUTRAS DOENÇAS CAUSADAS POR VÍRU...,-5.0,0.0,-13.0,-10.0,-8.0,...,3.0,-1.0,-9.0,59.0,290.0,70.0,103.0,29.0,203.0,0.0
3,110001,ALTA FLORESTA D'OESTE,RO,303060107,TRATAMENTO DE CRISE HIPERTENSIVA,-11.0,-9.0,17.0,-16.0,-1.0,...,-6.0,-4.0,-4.0,-9.0,71.0,7.0,30.0,-12.0,17.0,0.0
4,110001,ALTA FLORESTA D'OESTE,RO,303070064,TRATAMENTO DE DOENCAS DO ESOFAGO ESTOMAGO E DU...,-4.0,4.0,-4.0,-2.0,-6.0,...,-4.0,-6.0,-4.0,-11.0,19.0,244.0,-10.0,3.0,-2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53028,530010,BRASILIA,DF,416120024,MASTECTOMIA RADICAL COM LINFADENECTOMIA AXILAR...,37.5,-21.0,-34.0,-3.7,-0.5,...,-16.0,-6.8,-30.2,-32.2,-22.5,-34.0,-22.2,73.3,-15.0,-2.2
53029,530010,BRASILIA,DF,416120059,SEGMENTECTOMIA/QUADRANTECTOMIA/SETORECTOMIA DE...,-29.0,0.0,21.5,-31.0,-12.0,...,41.6,-1.0,20.5,5.4,-5.2,29.6,13.8,88.0,24.5,12.7
53030,530010,BRASILIA,DF,503010014,AÇÕES RELACIONADAS A DOAÇÃO DE ÓRGÃOS E TECIDO...,-86.8,81.2,-89.8,-76.8,-45.8,...,-24.8,-33.8,-75.6,14.8,-49.8,46.7,-32.2,-50.8,-21.8,-76.2
53031,530010,BRASILIA,DF,505010097,TRANSPLANTE DE CORNEA,-20.0,-42.0,-3.5,-30.5,0.0,...,16.5,65.0,8.5,14.5,4.0,-22.0,-12.0,33.5,94.5,-46.0


In [8]:
df_cnes_desv_mediana = df_desv_med_n_proc_cnes

In [9]:
df_cnes_desv_mediana

Unnamed: 0,codigo_municipio,municipio,uf,cnes,codigo_procedimento,procedimento,201001,201002,201003,201004,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010037,TRATAMENTO DE OUTRAS DOENÇAS BACTERIANAS,-4.0,1.0,4.0,-2.0,...,-4.0,14.0,4.0,48.0,149.0,0.0,55.0,241.0,-6.0,0.0
1,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010061,TRATAMENTO DE DOENÇAS INFECCIOSAS INTESTINAIS,79.0,72.0,100.0,52.0,...,-21.0,-14.0,-23.0,33.0,68.0,71.0,-31.0,8.0,126.0,0.0
2,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010193,TRATAMENTO DE OUTRAS DOENÇAS CAUSADAS POR VÍRU...,-5.0,0.0,-13.0,-10.0,...,3.0,-1.0,-9.0,59.0,290.0,70.0,103.0,29.0,203.0,0.0
3,110001,ALTA FLORESTA D'OESTE,RO,2679477,303060107,TRATAMENTO DE CRISE HIPERTENSIVA,-11.0,-9.0,17.0,-16.0,...,-6.0,-4.0,-4.0,-9.0,71.0,7.0,30.0,-12.0,17.0,0.0
4,110001,ALTA FLORESTA D'OESTE,RO,2679477,303070064,TRATAMENTO DE DOENCAS DO ESOFAGO ESTOMAGO E DU...,-4.0,4.0,-4.0,-2.0,...,-4.0,-6.0,-4.0,-11.0,19.0,244.0,-10.0,3.0,-2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,530010,BRASILIA,DF,3077098,70103013,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,-47.0,21.0,2.0,9.0,...,0.0,0.0,-32.0,0.0,0.0,0.0,0.0,0.0,-50.0,-25.0
55601,530010,BRASILIA,DF,3077098,70103014,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,-13.0,9.0,65.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-21.0,-8.0
55602,530010,BRASILIA,DF,3206874,50301001,AÇÕES RELACIONADAS A DOAÇÃO DE ÓRGÃOS E TECIDO...,0.0,-15.0,0.0,3.0,...,-15.0,5.0,-7.0,22.0,-24.0,-15.0,-48.0,-11.0,-37.0,0.0
55603,530010,BRASILIA,DF,3206874,50401002,PROCESSAMENTO DE CORNEA / ESCLERA,0.0,-25.0,16.0,44.0,...,4.0,1.0,11.0,73.0,-35.0,-12.0,-112.0,-4.0,-66.0,0.0


In [10]:
df_cnes_freq = df_proc_cnes

In [11]:
df_cnes_freq

Unnamed: 0,codigo_municipio,municipio,uf,cnes,codigo_procedimento,procedimento,201001,201002,201003,201004,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010037,TRATAMENTO DE OUTRAS DOENÇAS BACTERIANAS,4.0,9.0,12.0,6.0,...,4.0,22.0,12.0,56.0,157.0,8.0,63.0,249.0,2.0,8.0
1,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010061,TRATAMENTO DE DOENÇAS INFECCIOSAS INTESTINAIS,116.0,109.0,137.0,89.0,...,16.0,23.0,14.0,70.0,105.0,108.0,6.0,45.0,163.0,37.0
2,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010193,TRATAMENTO DE OUTRAS DOENÇAS CAUSADAS POR VÍRU...,12.0,17.0,4.0,7.0,...,20.0,16.0,8.0,76.0,307.0,87.0,120.0,46.0,220.0,17.0
3,110001,ALTA FLORESTA D'OESTE,RO,2679477,303060107,TRATAMENTO DE CRISE HIPERTENSIVA,9.0,11.0,37.0,4.0,...,14.0,16.0,16.0,11.0,91.0,27.0,50.0,8.0,37.0,20.0
4,110001,ALTA FLORESTA D'OESTE,RO,2679477,303070064,TRATAMENTO DE DOENCAS DO ESOFAGO ESTOMAGO E DU...,10.0,18.0,10.0,12.0,...,10.0,8.0,10.0,3.0,33.0,258.0,4.0,17.0,12.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,530010,BRASILIA,DF,3077098,70103013,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,16.0,84.0,65.0,72.0,...,63.0,63.0,31.0,63.0,63.0,63.0,63.0,63.0,13.0,38.0
55601,530010,BRASILIA,DF,3077098,70103014,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,14.0,36.0,92.0,30.0,...,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,6.0,19.0
55602,530010,BRASILIA,DF,3206874,50301001,AÇÕES RELACIONADAS A DOAÇÃO DE ÓRGÃOS E TECIDO...,77.0,62.0,77.0,80.0,...,62.0,82.0,70.0,99.0,53.0,62.0,29.0,66.0,40.0,77.0
55603,530010,BRASILIA,DF,3206874,50401002,PROCESSAMENTO DE CORNEA / ESCLERA,154.0,129.0,170.0,198.0,...,158.0,155.0,165.0,227.0,119.0,142.0,42.0,150.0,88.0,154.0


In [12]:
df_mun_freq = df_proc_mun

In [13]:
df_mun_freq

Unnamed: 0,codigo_municipio,municipio,uf,codigo_procedimento,procedimento,201001,201002,201003,201004,201005,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,110001,ALTA FLORESTA D'OESTE,RO,303010037,TRATAMENTO DE OUTRAS DOENÇAS BACTERIANAS,4.0,9.0,12.0,6.0,10.0,...,4.0,22.0,12.0,56.0,157.0,8.0,63.0,249.0,2.0,8.0
1,110001,ALTA FLORESTA D'OESTE,RO,303010061,TRATAMENTO DE DOENÇAS INFECCIOSAS INTESTINAIS,116.0,109.0,137.0,89.0,74.0,...,16.0,23.0,14.0,70.0,105.0,108.0,6.0,45.0,163.0,37.0
2,110001,ALTA FLORESTA D'OESTE,RO,303010193,TRATAMENTO DE OUTRAS DOENÇAS CAUSADAS POR VÍRU...,12.0,17.0,4.0,7.0,9.0,...,20.0,16.0,8.0,76.0,307.0,87.0,120.0,46.0,220.0,17.0
3,110001,ALTA FLORESTA D'OESTE,RO,303060107,TRATAMENTO DE CRISE HIPERTENSIVA,9.0,11.0,37.0,4.0,19.0,...,14.0,16.0,16.0,11.0,91.0,27.0,50.0,8.0,37.0,20.0
4,110001,ALTA FLORESTA D'OESTE,RO,303070064,TRATAMENTO DE DOENCAS DO ESOFAGO ESTOMAGO E DU...,10.0,18.0,10.0,12.0,8.0,...,10.0,8.0,10.0,3.0,33.0,258.0,4.0,17.0,12.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53028,530010,BRASILIA,DF,416120024,MASTECTOMIA RADICAL COM LINFADENECTOMIA AXILAR...,87.5,29.0,16.0,46.3,49.5,...,34.0,43.2,19.8,17.8,27.5,16.0,27.8,123.3,35.0,47.8
53029,530010,BRASILIA,DF,416120059,SEGMENTECTOMIA/QUADRANTECTOMIA/SETORECTOMIA DE...,6.0,35.0,56.5,4.0,23.0,...,76.6,34.0,55.5,40.4,29.8,64.6,48.8,123.0,59.5,47.7
53030,530010,BRASILIA,DF,503010014,AÇÕES RELACIONADAS A DOAÇÃO DE ÓRGÃOS E TECIDO...,16.0,184.0,13.0,26.0,57.0,...,78.0,69.0,27.2,117.7,53.0,149.5,70.7,52.0,81.0,26.7
53031,530010,BRASILIA,DF,505010097,TRANSPLANTE DE CORNEA,31.0,9.0,47.5,20.5,51.0,...,67.5,116.0,59.5,65.5,55.0,29.0,39.0,84.5,145.5,5.0


### 1. Com os principais dataframes carregados e com nomes padronizados, agora passo a introduzir algumas features que julgo relevantes para o treinamento da rede neural IRIS

#### Dataframes com dados de procedimentos agregados por estabelecimento de saúde (CNES)

1. Tipo de estabelecimento 
2. Porte do estabelecimento (ainda não inclusoa até 18/05/23)
3. Tipo de Unidade
4. Clientela
5. Turno de atendimento

#### Dataframes com dados de procedimentos agregados por município

1. Porte populacional do município (atual)
2. Regional de Saúde (Manscuso vai avaliar a pertinência)


In [14]:
df_cnes_features = pd.read_excel('./data/cnes/tb_cnes_features.xlsx')
df_cnes_features

Unnamed: 0,CO_CNES,CO_DISTRITO_SANITARIO,CO_DISTRITO_ADMINISTRATIVO,CO_MICRO_REGIAO,CO_MUNICIPIO_GESTOR,CO_REGIAO_SAUDE,CO_ESTADO_GESTOR,NO_RAZAO_SOCIAL,NO_FANTASIA,CO_TIPO_ESTABELECIMENTO,TP_UNIDADE,CO_CLIENTELA,CO_TURNO_ATENDIMENTO,CO_CPFDIRETORCLN,CO_MOTIVO_DESAB,NU_LATITUDE,NU_LONGITUDE,CO_NATUREZA_JUR,CO_ATIVIDADE_PRINCIPAL
0,9127801,,,,313460,,31,SECRETARIA DE ESTADO DE ADMINISTRACAO PRISIONAL,PRESIDIO DE JABOTICATUBAS,16.0,2,3.0,4.0,5095926601.0,,-19.5,-43.7,1023,1.0
1,9133224,,,,320520,,32,HENRIQUE TOMMASI NETTO ANALISES CLINICAS LTDA,HENRIQUE TOMMASI POSTO DE COLETA IBES,18.0,39,3.0,3.0,5756217703.0,,-20.4,-40.3,2062,2.0
2,9141952,,,,317010,,31,CURAE SERVICOS MEDICOS LTDA,CURAE IMUNIZACAO E IMUNOTERAPIA,18.0,36,3.0,3.0,13867913854.0,,-19.7,-47.9,2062,2.0
3,9148639,,,,210530,,21,YGM ODONTOLOGIA EIRELI ME,CLINICA ODONTOLOGICA DR YUCATAN MAIA,16.0,36,3.0,3.0,2495875130.0,,-5.5,-47.5,2305,1.0
4,9147616,,,,260410,,26,PREFERITURA MUNICIPAL DE CARUARU,UNIDADE MUNICIPAL DE FISIOTERAPIA,15.0,36,2.0,3.0,4920721404.0,,-8.3,-36.0,1244,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312664,7875231,10.0,,,530010,,53,L2 IP INSTITUTO DE PESQUISAS CLINICAS LTDA,L2 IP,25.0,22,1.0,3.0,38149184104.0,,-15.8,-47.9,2062,17.0
312665,7144172,,,,530010,,53,IMUNOCENTRO CENTRO INTEGRADO DE ALERGIA PEDIAT...,IMUNOCENTRO FILIAL,25.0,36,1.0,3.0,22054006149.0,,-15.8,-47.9,2062,17.0
312666,83623,9901.0,,,530010,,53,PRIME VACINAS LTDA,PRIME VACINAS,25.0,36,1.0,3.0,90660439115.0,,-15.8,-48.0,2062,17.0
312667,9872183,9901.0,,,530010,,53,MULTICLINICA KAIROS LTDA,MULTICLINICA KAIROS,16.0,22,1.0,3.0,61977632149.0,6.0,,,2062,1.0


In [15]:
df_cnes_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 312669 entries, 0 to 312668
Data columns (total 19 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   CO_CNES                     312669 non-null  int64  
 1   CO_DISTRITO_SANITARIO       26139 non-null   float64
 2   CO_DISTRITO_ADMINISTRATIVO  3941 non-null    float64
 3   CO_MICRO_REGIAO             12049 non-null   float64
 4   CO_MUNICIPIO_GESTOR         312669 non-null  int64  
 5   CO_REGIAO_SAUDE             142600 non-null  float64
 6   CO_ESTADO_GESTOR            312669 non-null  int64  
 7   NO_RAZAO_SOCIAL             312667 non-null  object 
 8   NO_FANTASIA                 312624 non-null  object 
 9   CO_TIPO_ESTABELECIMENTO     281310 non-null  float64
 10  TP_UNIDADE                  312669 non-null  int64  
 11  CO_CLIENTELA                307526 non-null  float64
 12  CO_TURNO_ATENDIMENTO        311547 non-null  float64
 13  CO_CPFDIRETORC

In [16]:
ls_to_int = ['CO_DISTRITO_SANITARIO', 'CO_DISTRITO_ADMINISTRATIVO', 'CO_MICRO_REGIAO', 
             'CO_REGIAO_SAUDE', 'CO_TIPO_ESTABELECIMENTO', 'CO_CLIENTELA', 'CO_TURNO_ATENDIMENTO', 
             'CO_CPFDIRETORCLN', 'CO_MOTIVO_DESAB', 'CO_ATIVIDADE_PRINCIPAL']

for col in ls_to_int:
    df_cnes_features[col] = df_cnes_features[col].fillna(-1).astype(int)



In [17]:
df_cnes_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 312669 entries, 0 to 312668
Data columns (total 19 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   CO_CNES                     312669 non-null  int64  
 1   CO_DISTRITO_SANITARIO       312669 non-null  int32  
 2   CO_DISTRITO_ADMINISTRATIVO  312669 non-null  int32  
 3   CO_MICRO_REGIAO             312669 non-null  int32  
 4   CO_MUNICIPIO_GESTOR         312669 non-null  int64  
 5   CO_REGIAO_SAUDE             312669 non-null  int32  
 6   CO_ESTADO_GESTOR            312669 non-null  int64  
 7   NO_RAZAO_SOCIAL             312667 non-null  object 
 8   NO_FANTASIA                 312624 non-null  object 
 9   CO_TIPO_ESTABELECIMENTO     312669 non-null  int32  
 10  TP_UNIDADE                  312669 non-null  int64  
 11  CO_CLIENTELA                312669 non-null  int32  
 12  CO_TURNO_ATENDIMENTO        312669 non-null  int32  
 13  CO_CPFDIRETORC

In [18]:
df_cnes_features.columns = df_cnes_features.columns.str.lower()
df_cnes_features.columns

Index(['co_cnes', 'co_distrito_sanitario', 'co_distrito_administrativo',
       'co_micro_regiao', 'co_municipio_gestor', 'co_regiao_saude',
       'co_estado_gestor', 'no_razao_social', 'no_fantasia',
       'co_tipo_estabelecimento', 'tp_unidade', 'co_clientela',
       'co_turno_atendimento', 'co_cpfdiretorcln', 'co_motivo_desab',
       'nu_latitude', 'nu_longitude', 'co_natureza_jur',
       'co_atividade_principal'],
      dtype='object')

In [19]:
df_cnes_features = df_cnes_features[['co_cnes', 'co_distrito_sanitario', 'co_distrito_administrativo',
       'co_micro_regiao', 'co_municipio_gestor', 'co_regiao_saude', 'co_estado_gestor', 'no_razao_social',
       'no_fantasia', 'co_tipo_estabelecimento', 'tp_unidade', 'co_clientela', 'co_turno_atendimento',
       'co_natureza_jur','co_atividade_principal', 'co_cpfdiretorcln', 'co_motivo_desab',
       'nu_latitude', 'nu_longitude']]

In [20]:
df_cnes_features

Unnamed: 0,co_cnes,co_distrito_sanitario,co_distrito_administrativo,co_micro_regiao,co_municipio_gestor,co_regiao_saude,co_estado_gestor,no_razao_social,no_fantasia,co_tipo_estabelecimento,tp_unidade,co_clientela,co_turno_atendimento,co_natureza_jur,co_atividade_principal,co_cpfdiretorcln,co_motivo_desab,nu_latitude,nu_longitude
0,9127801,-1,-1,-1,313460,-1,31,SECRETARIA DE ESTADO DE ADMINISTRACAO PRISIONAL,PRESIDIO DE JABOTICATUBAS,16,2,3,4,1023,1,-2147483648,-1,-19.5,-43.7
1,9133224,-1,-1,-1,320520,-1,32,HENRIQUE TOMMASI NETTO ANALISES CLINICAS LTDA,HENRIQUE TOMMASI POSTO DE COLETA IBES,18,39,3,3,2062,2,-2147483648,-1,-20.4,-40.3
2,9141952,-1,-1,-1,317010,-1,31,CURAE SERVICOS MEDICOS LTDA,CURAE IMUNIZACAO E IMUNOTERAPIA,18,36,3,3,2062,2,-2147483648,-1,-19.7,-47.9
3,9148639,-1,-1,-1,210530,-1,21,YGM ODONTOLOGIA EIRELI ME,CLINICA ODONTOLOGICA DR YUCATAN MAIA,16,36,3,3,2305,1,-2147483648,-1,-5.5,-47.5
4,9147616,-1,-1,-1,260410,-1,26,PREFERITURA MUNICIPAL DE CARUARU,UNIDADE MUNICIPAL DE FISIOTERAPIA,15,36,2,3,1244,4,-2147483648,-1,-8.3,-36.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312664,7875231,10,-1,-1,530010,-1,53,L2 IP INSTITUTO DE PESQUISAS CLINICAS LTDA,L2 IP,25,22,1,3,2062,17,-2147483648,-1,-15.8,-47.9
312665,7144172,-1,-1,-1,530010,-1,53,IMUNOCENTRO CENTRO INTEGRADO DE ALERGIA PEDIAT...,IMUNOCENTRO FILIAL,25,36,1,3,2062,17,-2147483648,-1,-15.8,-47.9
312666,83623,9901,-1,-1,530010,-1,53,PRIME VACINAS LTDA,PRIME VACINAS,25,36,1,3,2062,17,-2147483648,-1,-15.8,-48.0
312667,9872183,9901,-1,-1,530010,-1,53,MULTICLINICA KAIROS LTDA,MULTICLINICA KAIROS,16,22,1,3,2062,1,-2147483648,6,,


In [21]:
# Divide df_cnes_freq em duas partes
df_cnes_freq_1 = df_cnes_freq[['codigo_municipio', 'municipio', 'uf', 'cnes', 'codigo_procedimento', 'procedimento']]
df_cnes_freq_2 = df_cnes_freq.drop(columns=['codigo_municipio', 'municipio', 'uf', 'cnes', 'codigo_procedimento', 'procedimento'])

# Mescla df_cnes_freq_1 com df_cnes_features
df_merged = pd.merge(df_cnes_freq_1, df_cnes_features, how='left', left_on='cnes', right_on='co_cnes')

# Concatena df_merged com df_cnes_freq_2
df_cnes_iris_pre = pd.concat([df_merged, df_cnes_freq_2], axis=1)
df_cnes_iris_pre = df_cnes_iris_pre.drop(columns=['co_cnes', 'co_municipio_gestor', 'co_estado_gestor'])
df_cnes_iris_pre


Unnamed: 0,codigo_municipio,municipio,uf,cnes,codigo_procedimento,procedimento,co_distrito_sanitario,co_distrito_administrativo,co_micro_regiao,co_regiao_saude,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010037,TRATAMENTO DE OUTRAS DOENÇAS BACTERIANAS,-1,-1,-1,-1,...,4.0,22.0,12.0,56.0,157.0,8.0,63.0,249.0,2.0,8.0
1,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010061,TRATAMENTO DE DOENÇAS INFECCIOSAS INTESTINAIS,-1,-1,-1,-1,...,16.0,23.0,14.0,70.0,105.0,108.0,6.0,45.0,163.0,37.0
2,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010193,TRATAMENTO DE OUTRAS DOENÇAS CAUSADAS POR VÍRU...,-1,-1,-1,-1,...,20.0,16.0,8.0,76.0,307.0,87.0,120.0,46.0,220.0,17.0
3,110001,ALTA FLORESTA D'OESTE,RO,2679477,303060107,TRATAMENTO DE CRISE HIPERTENSIVA,-1,-1,-1,-1,...,14.0,16.0,16.0,11.0,91.0,27.0,50.0,8.0,37.0,20.0
4,110001,ALTA FLORESTA D'OESTE,RO,2679477,303070064,TRATAMENTO DE DOENCAS DO ESOFAGO ESTOMAGO E DU...,-1,-1,-1,-1,...,10.0,8.0,10.0,3.0,33.0,258.0,4.0,17.0,12.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,530010,BRASILIA,DF,3077098,70103013,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,-1,-1,-1,-1,...,63.0,63.0,31.0,63.0,63.0,63.0,63.0,63.0,13.0,38.0
55601,530010,BRASILIA,DF,3077098,70103014,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,-1,-1,-1,-1,...,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,6.0,19.0
55602,530010,BRASILIA,DF,3206874,50301001,AÇÕES RELACIONADAS A DOAÇÃO DE ÓRGÃOS E TECIDO...,10,-1,-1,-1,...,62.0,82.0,70.0,99.0,53.0,62.0,29.0,66.0,40.0,77.0
55603,530010,BRASILIA,DF,3206874,50401002,PROCESSAMENTO DE CORNEA / ESCLERA,10,-1,-1,-1,...,158.0,155.0,165.0,227.0,119.0,142.0,42.0,150.0,88.0,154.0


In [22]:
df_cnes_iris_pre = df_cnes_iris_pre.drop(columns=['municipio',
                                                  'uf', 
                                                  'procedimento', 
                                                  'no_razao_social',
                                                  'no_fantasia'])

In [23]:
df_cnes_iris_pre = df_cnes_iris_pre.drop(columns=[ 'co_cpfdiretorcln', 'co_motivo_desab', 'nu_latitude','nu_longitude',])

In [24]:
list(df_cnes_iris_pre.columns)

['codigo_municipio',
 'cnes',
 'codigo_procedimento',
 'co_distrito_sanitario',
 'co_distrito_administrativo',
 'co_micro_regiao',
 'co_regiao_saude',
 'co_tipo_estabelecimento',
 'tp_unidade',
 'co_clientela',
 'co_turno_atendimento',
 'co_natureza_jur',
 'co_atividade_principal',
 201001,
 201002,
 201003,
 201004,
 201005,
 201006,
 201007,
 201008,
 201009,
 201010,
 201011,
 201012,
 201101,
 201102,
 201103,
 201104,
 201105,
 201106,
 201107,
 201108,
 201109,
 201110,
 201111,
 201112,
 201201,
 201202,
 201203,
 201204,
 201205,
 201206,
 201207,
 201208,
 201209,
 201210,
 201211,
 201212,
 201301,
 201302,
 201303,
 201304,
 201305,
 201306,
 201307,
 201308,
 201309,
 201310,
 201311,
 201312,
 201401,
 201402,
 201403,
 201404,
 201405,
 201406,
 201407,
 201408,
 201409,
 201410,
 201411,
 201412,
 201501,
 201502,
 201503,
 201504,
 201505,
 201506,
 201507,
 201508,
 201509,
 201510,
 201511,
 201512,
 201601,
 201602,
 201603,
 201604,
 201605,
 201606,
 201607,
 20160

In [25]:
df_cnes_iris_pre[['co_distrito_sanitario', 
                  'co_distrito_administrativo', 
                  'co_micro_regiao', 
                  'co_regiao_saude', 
                  'co_tipo_estabelecimento', 
                  'tp_unidade', 
                  'co_clientela', 
                  'co_turno_atendimento', 
                  'co_natureza_jur', 
                  'co_atividade_principal']].nunique()

co_distrito_sanitario         27
co_distrito_administrativo    48
co_micro_regiao               62
co_regiao_saude               80
co_tipo_estabelecimento       17
tp_unidade                    16
co_clientela                   4
co_turno_atendimento           7
co_natureza_jur               27
co_atividade_principal        25
dtype: int64

In [26]:
df_cnes_iris_pre = df_cnes_iris_pre.drop(columns=['co_distrito_sanitario', 
                                                  'co_distrito_administrativo', 
                                                  'co_micro_regiao', 
                                                  'co_regiao_saude', 
                                                  'co_natureza_jur', 
                                                  'co_atividade_principal'])

In [27]:
# Importando a biblioteca necessária
from sklearn.preprocessing import OneHotEncoder

# Instanciando o OneHotEncoder
encoder = OneHotEncoder(sparse=False)

# Seleciona as colunas que serão transformadas pelo OneHotEncoder
categorical_cols = ['co_tipo_estabelecimento', 'tp_unidade', 'co_clientela', 'co_turno_atendimento']

# Ajusta o OneHotEncoder e transforme as colunas
onehot_cols = encoder.fit_transform(df_cnes_iris_pre[categorical_cols])

# O resultado é uma matriz numpy. Para transformá-la de volta em um DataFrame:
onehot_df = pd.DataFrame(onehot_cols, columns=encoder.get_feature_names_out(categorical_cols))

# Concatena este DataFrame onehot_df com seu DataFrame original (sem as colunas categóricas)
df_cnes_iris_pre = pd.concat([df_cnes_iris_pre.drop(categorical_cols, axis=1), onehot_df], axis=1)




In [28]:
df_cnes_iris_pre

Unnamed: 0,codigo_municipio,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,...,co_clientela_1,co_clientela_2,co_clientela_3,co_turno_atendimento_1,co_turno_atendimento_2,co_turno_atendimento_3,co_turno_atendimento_4,co_turno_atendimento_5,co_turno_atendimento_6,co_turno_atendimento_7
0,110001,2679477,303010037,4.0,9.0,12.0,6.0,10.0,6.0,6.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,110001,2679477,303010061,116.0,109.0,137.0,89.0,74.0,85.0,55.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,110001,2679477,303010193,12.0,17.0,4.0,7.0,9.0,6.0,6.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,110001,2679477,303060107,9.0,11.0,37.0,4.0,19.0,33.0,20.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,110001,2679477,303070064,10.0,18.0,10.0,12.0,8.0,12.0,2.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,530010,3077098,70103013,16.0,84.0,65.0,72.0,91.0,76.0,72.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
55601,530010,3077098,70103014,14.0,36.0,92.0,30.0,42.0,34.0,40.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
55602,530010,3206874,50301001,77.0,62.0,77.0,80.0,77.0,76.0,60.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
55603,530010,3206874,50401002,154.0,129.0,170.0,198.0,190.0,155.0,137.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [29]:
list(df_cnes_iris_pre.columns)

['codigo_municipio',
 'cnes',
 'codigo_procedimento',
 201001,
 201002,
 201003,
 201004,
 201005,
 201006,
 201007,
 201008,
 201009,
 201010,
 201011,
 201012,
 201101,
 201102,
 201103,
 201104,
 201105,
 201106,
 201107,
 201108,
 201109,
 201110,
 201111,
 201112,
 201201,
 201202,
 201203,
 201204,
 201205,
 201206,
 201207,
 201208,
 201209,
 201210,
 201211,
 201212,
 201301,
 201302,
 201303,
 201304,
 201305,
 201306,
 201307,
 201308,
 201309,
 201310,
 201311,
 201312,
 201401,
 201402,
 201403,
 201404,
 201405,
 201406,
 201407,
 201408,
 201409,
 201410,
 201411,
 201412,
 201501,
 201502,
 201503,
 201504,
 201505,
 201506,
 201507,
 201508,
 201509,
 201510,
 201511,
 201512,
 201601,
 201602,
 201603,
 201604,
 201605,
 201606,
 201607,
 201608,
 201609,
 201610,
 201611,
 201612,
 201701,
 201702,
 201703,
 201704,
 201705,
 201706,
 201707,
 201708,
 201709,
 201710,
 201711,
 201712,
 201801,
 201802,
 201803,
 201804,
 201805,
 201806,
 201807,
 201808,
 201809,
 

In [30]:
from sklearn.preprocessing import MinMaxScaler

# selecionando apenas as colunas com as séries temporais
temporal_cols = [col for col in df_cnes_iris_pre.columns if str(col).startswith('20')]
scaler = MinMaxScaler()
df_cnes_iris_pre[temporal_cols] = scaler.fit_transform(df_cnes_iris_pre[temporal_cols])


In [31]:
pd.options.display.float_format = '{:.8f}'.format
df_cnes_iris_pre

Unnamed: 0,codigo_municipio,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,...,co_clientela_1,co_clientela_2,co_clientela_3,co_turno_atendimento_1,co_turno_atendimento_2,co_turno_atendimento_3,co_turno_atendimento_4,co_turno_atendimento_5,co_turno_atendimento_6,co_turno_atendimento_7
0,110001,2679477,303010037,0.00000515,0.00001159,0.00002102,0.00000829,0.00001252,0.00001069,0.00000840,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000
1,110001,2679477,303010061,0.00014938,0.00014036,0.00023992,0.00012294,0.00009265,0.00015140,0.00007702,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000
2,110001,2679477,303010193,0.00001545,0.00002189,0.00000701,0.00000967,0.00001127,0.00001069,0.00000840,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000
3,110001,2679477,303060107,0.00001159,0.00001417,0.00006480,0.00000553,0.00002379,0.00005878,0.00002801,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000
4,110001,2679477,303070064,0.00001288,0.00002318,0.00001751,0.00001658,0.00001002,0.00002137,0.00000280,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,530010,3077098,70103013,0.00002060,0.00010817,0.00011383,0.00009945,0.00011393,0.00013537,0.00010082,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000
55601,530010,3077098,70103014,0.00001803,0.00004636,0.00016112,0.00004144,0.00005258,0.00006056,0.00005601,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000
55602,530010,3206874,50301001,0.00009916,0.00007984,0.00013485,0.00011050,0.00009640,0.00013537,0.00008402,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000
55603,530010,3206874,50401002,0.00019831,0.00016612,0.00029771,0.00027350,0.00023788,0.00027608,0.00019184,...,0.00000000,0.00000000,1.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,1.00000000,0.00000000


A configuração específica do array 3D dependerá de como o DataFrame original está organizado e da maneira como você planeja usar os dados. No entanto, usando a configuração proposta, poderíamos ter o seguinte:

- A dimensão x representa os estabelecimentos de saúde (CNES). Portanto, o comprimento dessa dimensão seria igual ao número de estabelecimentos únicos no dataframe.

- A dimensão y representa os procedimentos. Como mencionado, neste caso, cada array 3D é específico para um único procedimento, então a dimensão y teria apenas um valor (o procedimento específico para aquele array).

- A dimensão z representa o tempo. No seu caso, parece que você tem 156 pontos de tempo (de 201001 a 202212), então essa dimensão teria um comprimento de 156.

Cada elemento no array 3D representaria a quantidade do procedimento específico (definido pela dimensão y) realizado no estabelecimento de saúde específico (definido pela dimensão x) em um ponto de tempo específico (definido pela dimensão z).

Por favor, observe que a estrutura exata do array 3D e como os dados estão organizados dentro dele dependerá de como o DataFrame original df_cnes_iris_pre está estruturado e dos valores contidos nele.

Além disso, observe que a estrutura exata pode variar com base em suas necessidades específicas. Por exemplo, se você descobrir que é mais útil ter os procedimentos como a dimensão x e os estabelecimentos de saúde como a dimensão y, você poderia reorganizar o array para se ajustar a esse formato. A chave é que o array 3D permite que você represente três variáveis ​​distintas (neste caso, estabelecimentos de saúde, procedimentos e tempo) em uma única estrutura de dados.

In [32]:
list(df_cnes_iris_pre.columns)

['codigo_municipio',
 'cnes',
 'codigo_procedimento',
 201001,
 201002,
 201003,
 201004,
 201005,
 201006,
 201007,
 201008,
 201009,
 201010,
 201011,
 201012,
 201101,
 201102,
 201103,
 201104,
 201105,
 201106,
 201107,
 201108,
 201109,
 201110,
 201111,
 201112,
 201201,
 201202,
 201203,
 201204,
 201205,
 201206,
 201207,
 201208,
 201209,
 201210,
 201211,
 201212,
 201301,
 201302,
 201303,
 201304,
 201305,
 201306,
 201307,
 201308,
 201309,
 201310,
 201311,
 201312,
 201401,
 201402,
 201403,
 201404,
 201405,
 201406,
 201407,
 201408,
 201409,
 201410,
 201411,
 201412,
 201501,
 201502,
 201503,
 201504,
 201505,
 201506,
 201507,
 201508,
 201509,
 201510,
 201511,
 201512,
 201601,
 201602,
 201603,
 201604,
 201605,
 201606,
 201607,
 201608,
 201609,
 201610,
 201611,
 201612,
 201701,
 201702,
 201703,
 201704,
 201705,
 201706,
 201707,
 201708,
 201709,
 201710,
 201711,
 201712,
 201801,
 201802,
 201803,
 201804,
 201805,
 201806,
 201807,
 201808,
 201809,
 

In [33]:
df_cnes_iris_pre.drop(columns=['codigo_municipio',
                                'co_tipo_estabelecimento_1',
                                'co_tipo_estabelecimento_2',
                                'co_tipo_estabelecimento_3',
                                'co_tipo_estabelecimento_6',
                                'co_tipo_estabelecimento_7',
                                'co_tipo_estabelecimento_8',
                                'co_tipo_estabelecimento_9',
                                'co_tipo_estabelecimento_10',
                                'co_tipo_estabelecimento_12',
                                'co_tipo_estabelecimento_13',
                                'co_tipo_estabelecimento_14',
                                'co_tipo_estabelecimento_15',
                                'co_tipo_estabelecimento_16',
                                'co_tipo_estabelecimento_17',
                                'co_tipo_estabelecimento_18',
                                'co_tipo_estabelecimento_19',
                                'co_tipo_estabelecimento_25',
                                'tp_unidade_2',
                                'tp_unidade_4',
                                'tp_unidade_5',
                                'tp_unidade_7',
                                'tp_unidade_15',
                                'tp_unidade_20',
                                'tp_unidade_21',
                                'tp_unidade_36',
                                'tp_unidade_39',
                                'tp_unidade_43',
                                'tp_unidade_62',
                                'tp_unidade_68',
                                'tp_unidade_69',
                                'tp_unidade_70',
                                'tp_unidade_73',
                                'tp_unidade_84',
                                'co_clientela_-1',
                                'co_clientela_1',
                                'co_clientela_2',
                                'co_clientela_3',
                                'co_turno_atendimento_1',
                                'co_turno_atendimento_2',
                                'co_turno_atendimento_3',
                                'co_turno_atendimento_4',
                                'co_turno_atendimento_5',
                                'co_turno_atendimento_6',
                                'co_turno_atendimento_7'], 
                                inplace=True)

In [34]:
df_cnes_iris_pre

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.00000515,0.00001159,0.00002102,0.00000829,0.00001252,0.00001069,0.00000840,0.00002120,...,0.00000369,0.00002012,0.00001499,0.00005110,0.00014303,0.00000708,0.00005552,0.00022169,0.00000179,0.00000865
1,2679477,303010061,0.00014938,0.00014036,0.00023992,0.00012294,0.00009265,0.00015140,0.00007702,0.00049069,...,0.00001477,0.00002104,0.00001748,0.00006387,0.00009565,0.00009560,0.00000529,0.00004007,0.00014597,0.00004002
2,2679477,303010193,0.00001545,0.00002189,0.00000701,0.00000967,0.00001127,0.00001069,0.00000840,0.00005452,...,0.00001846,0.00001463,0.00000999,0.00006934,0.00027968,0.00007701,0.00010576,0.00004096,0.00019702,0.00001839
3,2679477,303060107,0.00001159,0.00001417,0.00006480,0.00000553,0.00002379,0.00005878,0.00002801,0.00007269,...,0.00001292,0.00001463,0.00001998,0.00001004,0.00008290,0.00002390,0.00004407,0.00000712,0.00003314,0.00002163
4,2679477,303070064,0.00001288,0.00002318,0.00001751,0.00001658,0.00001002,0.00002137,0.00000280,0.00008178,...,0.00000923,0.00000732,0.00001249,0.00000274,0.00003006,0.00022838,0.00000353,0.00001514,0.00001075,0.00001514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.00002060,0.00010817,0.00011383,0.00009945,0.00011393,0.00013537,0.00010082,0.00017568,...,0.00005816,0.00005762,0.00003871,0.00005748,0.00005739,0.00005577,0.00005552,0.00005609,0.00001164,0.00004110
55601,3077098,70103014,0.00001803,0.00004636,0.00016112,0.00004144,0.00005258,0.00006056,0.00005601,0.00016962,...,0.00002492,0.00002470,0.00003372,0.00002464,0.00002460,0.00002390,0.00002380,0.00002404,0.00000537,0.00002055
55602,3206874,50301001,0.00009916,0.00007984,0.00013485,0.00011050,0.00009640,0.00013537,0.00008402,0.00018780,...,0.00005723,0.00007500,0.00008741,0.00009033,0.00004828,0.00005488,0.00002556,0.00005876,0.00003582,0.00008329
55603,3206874,50401002,0.00019831,0.00016612,0.00029771,0.00027350,0.00023788,0.00027608,0.00019184,0.00039376,...,0.00014586,0.00014177,0.00020605,0.00020712,0.00010841,0.00012570,0.00003702,0.00013355,0.00007881,0.00016658


In [35]:
df_X1 = df_cnes_iris_pre

In [36]:
df_pop_cnes_iris_pre = pd.read_pickle('./data/dfpo1/df_proc_cnes_milhab.pkl')

In [37]:
df_pop_cnes_iris_pre

Unnamed: 0,codigo_municipio,municipio,uf,cnes,codigo_procedimento,procedimento,201001,201002,201003,201004,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010037,TRATAMENTO DE OUTRAS DOENÇAS BACTERIANAS,0.16398819,0.36897343,0.49196458,0.24598229,...,0.17765145,0.97708296,0.53295434,2.48712027,6.97281933,0.35530290,2.79801030,11.05880263,0.08882572,0.35530290
1,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010061,TRATAMENTO DE DOENÇAS INFECCIOSAS INTESTINAIS,4.75565759,4.46867826,5.61659561,3.64873729,...,0.71060579,1.02149583,0.62178007,3.10890034,4.66335051,4.79658909,0.26647717,1.99857879,7.23929650,1.64327589
2,110001,ALTA FLORESTA D'OESTE,RO,2679477,303010193,TRATAMENTO DE OUTRAS DOENÇAS CAUSADAS POR VÍRU...,0.49196458,0.69694982,0.16398819,0.28697934,...,0.88825724,0.71060579,0.35530290,3.37537751,13.63474862,3.86391899,5.32954344,2.04299165,9.77082963,0.75501865
3,110001,ALTA FLORESTA D'OESTE,RO,2679477,303060107,TRATAMENTO DE CRISE HIPERTENSIVA,0.36897343,0.45096753,1.51689078,0.16398819,...,0.62178007,0.71060579,0.71060579,0.48854148,4.04157044,1.19914727,2.22064310,0.35530290,1.64327589,0.88825724
4,110001,ALTA FLORESTA D'OESTE,RO,2679477,303070064,TRATAMENTO DE DOENCAS DO ESOFAGO ESTOMAGO E DU...,0.40997048,0.73794687,0.40997048,0.49196458,...,0.44412862,0.35530290,0.44412862,0.13323859,1.46562444,11.45851839,0.17765145,0.75501865,0.53295434,0.62178007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,530010,BRASILIA,DF,3077098,70103013,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,0.00622529,0.03268279,0.02529025,0.02801382,...,0.02035985,0.02035985,0.01001834,0.02035985,0.02035985,0.02035985,0.02035985,0.02035985,0.00420124,0.01228055
55601,530010,BRASILIA,DF,3077098,70103014,APARELHO DE AMPLIFICAÇÃO SONORA INDIVIDUAL (AA...,0.00544713,0.01400691,0.03579544,0.01167243,...,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00193903,0.00614027
55602,530010,BRASILIA,DF,3206874,50301001,AÇÕES RELACIONADAS A DOAÇÃO DE ÓRGÃOS E TECIDO...,0.02995922,0.02412301,0.02995922,0.03112647,...,0.02003668,0.02650013,0.02262206,0.03199405,0.01712813,0.02003668,0.00937200,0.02132937,0.01292689,0.02488426
55603,530010,BRASILIA,DF,3206874,50401002,PROCESSAMENTO DE CORNEA / ESCLERA,0.05991845,0.05019143,0.06614374,0.07703801,...,0.05106122,0.05009170,0.05332342,0.07336010,0.03845750,0.04589046,0.01357323,0.04847584,0.02843916,0.04976853


In [38]:
df_pop_cnes_iris_pre.drop(columns=['codigo_municipio',
                                'municipio',
                                'uf', 
                                'procedimento'], 
                                inplace=True)

In [39]:
df_pop_cnes_iris_pre

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.16398819,0.36897343,0.49196458,0.24598229,0.40997048,0.24598229,0.24598229,0.28697934,...,0.17765145,0.97708296,0.53295434,2.48712027,6.97281933,0.35530290,2.79801030,11.05880263,0.08882572,0.35530290
1,2679477,303010061,4.75565759,4.46867826,5.61659561,3.64873729,3.03378157,3.48474910,2.25483765,6.64152181,...,0.71060579,1.02149583,0.62178007,3.10890034,4.66335051,4.79658909,0.26647717,1.99857879,7.23929650,1.64327589
2,2679477,303010193,0.49196458,0.69694982,0.16398819,0.28697934,0.36897343,0.24598229,0.24598229,0.73794687,...,0.88825724,0.71060579,0.35530290,3.37537751,13.63474862,3.86391899,5.32954344,2.04299165,9.77082963,0.75501865
3,2679477,303060107,0.36897343,0.45096753,1.51689078,0.16398819,0.77894392,1.35290259,0.81994096,0.98392916,...,0.62178007,0.71060579,0.71060579,0.48854148,4.04157044,1.19914727,2.22064310,0.35530290,1.64327589,0.88825724
4,2679477,303070064,0.40997048,0.73794687,0.40997048,0.49196458,0.32797639,0.49196458,0.08199410,1.10692030,...,0.44412862,0.35530290,0.44412862,0.13323859,1.46562444,11.45851839,0.17765145,0.75501865,0.53295434,0.62178007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.00622529,0.03268279,0.02529025,0.02801382,0.03540636,0.02957014,0.02801382,0.02256669,...,0.02035985,0.02035985,0.01001834,0.02035985,0.02035985,0.02035985,0.02035985,0.02035985,0.00420124,0.01228055
55601,3077098,70103014,0.00544713,0.01400691,0.03579544,0.01167243,0.01634140,0.01322875,0.01556323,0.02178853,...,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00193903,0.00614027
55602,3206874,50301001,0.02995922,0.02412301,0.02995922,0.03112647,0.02995922,0.02957014,0.02334485,0.02412301,...,0.02003668,0.02650013,0.02262206,0.03199405,0.01712813,0.02003668,0.00937200,0.02132937,0.01292689,0.02488426
55603,3206874,50401002,0.05991845,0.05019143,0.06614374,0.07703801,0.07392536,0.06030753,0.05330407,0.05058051,...,0.05106122,0.05009170,0.05332342,0.07336010,0.03845750,0.04589046,0.01357323,0.04847584,0.02843916,0.04976853


In [40]:
df_X2 = df_pop_cnes_iris_pre

In [41]:
df_cnes_freq.drop(columns=['codigo_municipio',
                                'municipio',
                                'uf', 
                                'procedimento'], 
                                inplace=True)

df_cnes_freq

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,4.00000000,9.00000000,12.00000000,6.00000000,10.00000000,6.00000000,6.00000000,7.00000000,...,4.00000000,22.00000000,12.00000000,56.00000000,157.00000000,8.00000000,63.00000000,249.00000000,2.00000000,8.00000000
1,2679477,303010061,116.00000000,109.00000000,137.00000000,89.00000000,74.00000000,85.00000000,55.00000000,162.00000000,...,16.00000000,23.00000000,14.00000000,70.00000000,105.00000000,108.00000000,6.00000000,45.00000000,163.00000000,37.00000000
2,2679477,303010193,12.00000000,17.00000000,4.00000000,7.00000000,9.00000000,6.00000000,6.00000000,18.00000000,...,20.00000000,16.00000000,8.00000000,76.00000000,307.00000000,87.00000000,120.00000000,46.00000000,220.00000000,17.00000000
3,2679477,303060107,9.00000000,11.00000000,37.00000000,4.00000000,19.00000000,33.00000000,20.00000000,24.00000000,...,14.00000000,16.00000000,16.00000000,11.00000000,91.00000000,27.00000000,50.00000000,8.00000000,37.00000000,20.00000000
4,2679477,303070064,10.00000000,18.00000000,10.00000000,12.00000000,8.00000000,12.00000000,2.00000000,27.00000000,...,10.00000000,8.00000000,10.00000000,3.00000000,33.00000000,258.00000000,4.00000000,17.00000000,12.00000000,14.00000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,16.00000000,84.00000000,65.00000000,72.00000000,91.00000000,76.00000000,72.00000000,58.00000000,...,63.00000000,63.00000000,31.00000000,63.00000000,63.00000000,63.00000000,63.00000000,63.00000000,13.00000000,38.00000000
55601,3077098,70103014,14.00000000,36.00000000,92.00000000,30.00000000,42.00000000,34.00000000,40.00000000,56.00000000,...,27.00000000,27.00000000,27.00000000,27.00000000,27.00000000,27.00000000,27.00000000,27.00000000,6.00000000,19.00000000
55602,3206874,50301001,77.00000000,62.00000000,77.00000000,80.00000000,77.00000000,76.00000000,60.00000000,62.00000000,...,62.00000000,82.00000000,70.00000000,99.00000000,53.00000000,62.00000000,29.00000000,66.00000000,40.00000000,77.00000000
55603,3206874,50401002,154.00000000,129.00000000,170.00000000,198.00000000,190.00000000,155.00000000,137.00000000,130.00000000,...,158.00000000,155.00000000,165.00000000,227.00000000,119.00000000,142.00000000,42.00000000,150.00000000,88.00000000,154.00000000


In [43]:
df_cnes_freq_copy = df_cnes_freq.copy()

# Assegurando que todos os nomes de colunas são strings
df_cnes_freq_copy.columns = df_cnes_freq_copy.columns.astype(str)

for year in range(2010, 2023):
    # Lista com os nomes das colunas dos meses do ano atual
    months = [f"{year}{month:02d}" for month in range(1, 13)]
    
    # Interseção dos meses com as colunas do DataFrame
    months_in_df = list(set(months).intersection(set(df_cnes_freq_copy.columns)))
    
    # Calcule a mediana para o ano atual e adicione ao DataFrame
    median_for_year = df_cnes_freq_copy[months_in_df].median(axis=1)
    df_cnes_freq_copy[str(year) + '_median'] = median_for_year

In [44]:
df_cnes_freq_copy

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,2013_median,2014_median,2015_median,2016_median,2017_median,2018_median,2019_median,2020_median,2021_median,2022_median
0,2679477,303010037,4.00000000,9.00000000,12.00000000,6.00000000,10.00000000,6.00000000,6.00000000,7.00000000,...,8.00000000,14.00000000,9.50000000,9.50000000,11.00000000,8.00000000,6.00000000,8.00000000,8.00000000,11.00000000
1,2679477,303010061,116.00000000,109.00000000,137.00000000,89.00000000,74.00000000,85.00000000,55.00000000,162.00000000,...,42.00000000,55.50000000,41.50000000,29.50000000,27.50000000,23.00000000,18.00000000,7.00000000,11.00000000,30.00000000
2,2679477,303010193,12.00000000,17.00000000,4.00000000,7.00000000,9.00000000,6.00000000,6.00000000,18.00000000,...,18.50000000,30.00000000,36.00000000,26.00000000,15.00000000,13.50000000,13.00000000,11.50000000,11.00000000,33.00000000
3,2679477,303060107,9.00000000,11.00000000,37.00000000,4.00000000,19.00000000,33.00000000,20.00000000,24.00000000,...,26.00000000,25.00000000,26.00000000,18.00000000,26.50000000,24.00000000,22.50000000,12.00000000,12.00000000,17.00000000
4,2679477,303070064,10.00000000,18.00000000,10.00000000,12.00000000,8.00000000,12.00000000,2.00000000,27.00000000,...,14.00000000,18.00000000,20.00000000,17.00000000,15.50000000,13.00000000,8.00000000,6.00000000,6.00000000,11.00000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,16.00000000,84.00000000,65.00000000,72.00000000,91.00000000,76.00000000,72.00000000,58.00000000,...,63.50000000,68.00000000,80.00000000,58.00000000,60.50000000,63.00000000,63.00000000,63.00000000,63.00000000,63.00000000
55601,3077098,70103014,14.00000000,36.00000000,92.00000000,30.00000000,42.00000000,34.00000000,40.00000000,56.00000000,...,48.00000000,24.00000000,37.50000000,25.00000000,26.50000000,27.00000000,27.00000000,27.00000000,27.00000000,27.00000000
55602,3206874,50301001,77.00000000,62.00000000,77.00000000,80.00000000,77.00000000,76.00000000,60.00000000,62.00000000,...,70.50000000,92.00000000,100.50000000,87.00000000,75.00000000,81.00000000,85.50000000,38.50000000,72.00000000,68.00000000
55603,3206874,50401002,154.00000000,129.00000000,170.00000000,198.00000000,190.00000000,155.00000000,137.00000000,130.00000000,...,144.50000000,183.00000000,221.50000000,199.50000000,154.00000000,154.50000000,168.50000000,96.50000000,148.00000000,154.50000000


In [45]:
for year in range(2010, 2023):
    # Lista com os nomes das colunas dos meses do ano atual
    months = [f"{year}{month:02d}" for month in range(1, 13)]
    
    # Interseção dos meses com as colunas do DataFrame
    months_in_df = list(set(months).intersection(set(df_cnes_freq_copy.columns)))
    
    # Calcule a diferença entre os dados originais e a mediana
    for month in months_in_df:
        df_cnes_freq_copy[month] -= df_cnes_freq_copy[str(year) + '_median']


In [46]:
df_cnes_freq_copy

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,2013_median,2014_median,2015_median,2016_median,2017_median,2018_median,2019_median,2020_median,2021_median,2022_median
0,2679477,303010037,-4.00000000,1.00000000,4.00000000,-2.00000000,2.00000000,-2.00000000,-2.00000000,-1.00000000,...,8.00000000,14.00000000,9.50000000,9.50000000,11.00000000,8.00000000,6.00000000,8.00000000,8.00000000,11.00000000
1,2679477,303010061,5.00000000,-2.00000000,26.00000000,-22.00000000,-37.00000000,-26.00000000,-56.00000000,51.00000000,...,42.00000000,55.50000000,41.50000000,29.50000000,27.50000000,23.00000000,18.00000000,7.00000000,11.00000000,30.00000000
2,2679477,303010193,2.50000000,7.50000000,-5.50000000,-2.50000000,-0.50000000,-3.50000000,-3.50000000,8.50000000,...,18.50000000,30.00000000,36.00000000,26.00000000,15.00000000,13.50000000,13.00000000,11.50000000,11.00000000,33.00000000
3,2679477,303060107,-10.50000000,-8.50000000,17.50000000,-15.50000000,-0.50000000,13.50000000,0.50000000,4.50000000,...,26.00000000,25.00000000,26.00000000,18.00000000,26.50000000,24.00000000,22.50000000,12.00000000,12.00000000,17.00000000
4,2679477,303070064,-4.00000000,4.00000000,-4.00000000,-2.00000000,-6.00000000,-2.00000000,-12.00000000,13.00000000,...,14.00000000,18.00000000,20.00000000,17.00000000,15.50000000,13.00000000,8.00000000,6.00000000,6.00000000,11.00000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,-47.00000000,21.00000000,2.00000000,9.00000000,28.00000000,13.00000000,9.00000000,-5.00000000,...,63.50000000,68.00000000,80.00000000,58.00000000,60.50000000,63.00000000,63.00000000,63.00000000,63.00000000,63.00000000
55601,3077098,70103014,-24.50000000,-2.50000000,53.50000000,-8.50000000,3.50000000,-4.50000000,1.50000000,17.50000000,...,48.00000000,24.00000000,37.50000000,25.00000000,26.50000000,27.00000000,27.00000000,27.00000000,27.00000000,27.00000000
55602,3206874,50301001,0.00000000,-15.00000000,0.00000000,3.00000000,0.00000000,-1.00000000,-17.00000000,-15.00000000,...,70.50000000,92.00000000,100.50000000,87.00000000,75.00000000,81.00000000,85.50000000,38.50000000,72.00000000,68.00000000
55603,3206874,50401002,0.00000000,-25.00000000,16.00000000,44.00000000,36.00000000,1.00000000,-17.00000000,-24.00000000,...,144.50000000,183.00000000,221.50000000,199.50000000,154.00000000,154.50000000,168.50000000,96.50000000,148.00000000,154.50000000


In [47]:
df_X3 = df_cnes_freq_copy[df_cnes_freq_copy.columns.drop(list(df_cnes_freq_copy.filter(regex='_median')))]
df_X3

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,-4.00000000,1.00000000,4.00000000,-2.00000000,2.00000000,-2.00000000,-2.00000000,-1.00000000,...,-7.00000000,11.00000000,1.00000000,45.00000000,146.00000000,-3.00000000,52.00000000,238.00000000,-9.00000000,-3.00000000
1,2679477,303010061,5.00000000,-2.00000000,26.00000000,-22.00000000,-37.00000000,-26.00000000,-56.00000000,51.00000000,...,-14.00000000,-7.00000000,-16.00000000,40.00000000,75.00000000,78.00000000,-24.00000000,15.00000000,133.00000000,7.00000000
2,2679477,303010193,2.50000000,7.50000000,-5.50000000,-2.50000000,-0.50000000,-3.50000000,-3.50000000,8.50000000,...,-13.00000000,-17.00000000,-25.00000000,43.00000000,274.00000000,54.00000000,87.00000000,13.00000000,187.00000000,-16.00000000
3,2679477,303060107,-10.50000000,-8.50000000,17.50000000,-15.50000000,-0.50000000,13.50000000,0.50000000,4.50000000,...,-3.00000000,-1.00000000,-1.00000000,-6.00000000,74.00000000,10.00000000,33.00000000,-9.00000000,20.00000000,3.00000000
4,2679477,303070064,-4.00000000,4.00000000,-4.00000000,-2.00000000,-6.00000000,-2.00000000,-12.00000000,13.00000000,...,-1.00000000,-3.00000000,-1.00000000,-8.00000000,22.00000000,247.00000000,-7.00000000,6.00000000,1.00000000,3.00000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,-47.00000000,21.00000000,2.00000000,9.00000000,28.00000000,13.00000000,9.00000000,-5.00000000,...,0.00000000,0.00000000,-32.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,-50.00000000,-25.00000000
55601,3077098,70103014,-24.50000000,-2.50000000,53.50000000,-8.50000000,3.50000000,-4.50000000,1.50000000,17.50000000,...,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,-21.00000000,-8.00000000
55602,3206874,50301001,0.00000000,-15.00000000,0.00000000,3.00000000,0.00000000,-1.00000000,-17.00000000,-15.00000000,...,-6.00000000,14.00000000,2.00000000,31.00000000,-15.00000000,-6.00000000,-39.00000000,-2.00000000,-28.00000000,9.00000000
55603,3206874,50401002,0.00000000,-25.00000000,16.00000000,44.00000000,36.00000000,1.00000000,-17.00000000,-24.00000000,...,3.50000000,0.50000000,10.50000000,72.50000000,-35.50000000,-12.50000000,-112.50000000,-4.50000000,-66.50000000,-0.50000000


In [48]:
df_X1

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.00000515,0.00001159,0.00002102,0.00000829,0.00001252,0.00001069,0.00000840,0.00002120,...,0.00000369,0.00002012,0.00001499,0.00005110,0.00014303,0.00000708,0.00005552,0.00022169,0.00000179,0.00000865
1,2679477,303010061,0.00014938,0.00014036,0.00023992,0.00012294,0.00009265,0.00015140,0.00007702,0.00049069,...,0.00001477,0.00002104,0.00001748,0.00006387,0.00009565,0.00009560,0.00000529,0.00004007,0.00014597,0.00004002
2,2679477,303010193,0.00001545,0.00002189,0.00000701,0.00000967,0.00001127,0.00001069,0.00000840,0.00005452,...,0.00001846,0.00001463,0.00000999,0.00006934,0.00027968,0.00007701,0.00010576,0.00004096,0.00019702,0.00001839
3,2679477,303060107,0.00001159,0.00001417,0.00006480,0.00000553,0.00002379,0.00005878,0.00002801,0.00007269,...,0.00001292,0.00001463,0.00001998,0.00001004,0.00008290,0.00002390,0.00004407,0.00000712,0.00003314,0.00002163
4,2679477,303070064,0.00001288,0.00002318,0.00001751,0.00001658,0.00001002,0.00002137,0.00000280,0.00008178,...,0.00000923,0.00000732,0.00001249,0.00000274,0.00003006,0.00022838,0.00000353,0.00001514,0.00001075,0.00001514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.00002060,0.00010817,0.00011383,0.00009945,0.00011393,0.00013537,0.00010082,0.00017568,...,0.00005816,0.00005762,0.00003871,0.00005748,0.00005739,0.00005577,0.00005552,0.00005609,0.00001164,0.00004110
55601,3077098,70103014,0.00001803,0.00004636,0.00016112,0.00004144,0.00005258,0.00006056,0.00005601,0.00016962,...,0.00002492,0.00002470,0.00003372,0.00002464,0.00002460,0.00002390,0.00002380,0.00002404,0.00000537,0.00002055
55602,3206874,50301001,0.00009916,0.00007984,0.00013485,0.00011050,0.00009640,0.00013537,0.00008402,0.00018780,...,0.00005723,0.00007500,0.00008741,0.00009033,0.00004828,0.00005488,0.00002556,0.00005876,0.00003582,0.00008329
55603,3206874,50401002,0.00019831,0.00016612,0.00029771,0.00027350,0.00023788,0.00027608,0.00019184,0.00039376,...,0.00014586,0.00014177,0.00020605,0.00020712,0.00010841,0.00012570,0.00003702,0.00013355,0.00007881,0.00016658


In [49]:
df_X2

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.16398819,0.36897343,0.49196458,0.24598229,0.40997048,0.24598229,0.24598229,0.28697934,...,0.17765145,0.97708296,0.53295434,2.48712027,6.97281933,0.35530290,2.79801030,11.05880263,0.08882572,0.35530290
1,2679477,303010061,4.75565759,4.46867826,5.61659561,3.64873729,3.03378157,3.48474910,2.25483765,6.64152181,...,0.71060579,1.02149583,0.62178007,3.10890034,4.66335051,4.79658909,0.26647717,1.99857879,7.23929650,1.64327589
2,2679477,303010193,0.49196458,0.69694982,0.16398819,0.28697934,0.36897343,0.24598229,0.24598229,0.73794687,...,0.88825724,0.71060579,0.35530290,3.37537751,13.63474862,3.86391899,5.32954344,2.04299165,9.77082963,0.75501865
3,2679477,303060107,0.36897343,0.45096753,1.51689078,0.16398819,0.77894392,1.35290259,0.81994096,0.98392916,...,0.62178007,0.71060579,0.71060579,0.48854148,4.04157044,1.19914727,2.22064310,0.35530290,1.64327589,0.88825724
4,2679477,303070064,0.40997048,0.73794687,0.40997048,0.49196458,0.32797639,0.49196458,0.08199410,1.10692030,...,0.44412862,0.35530290,0.44412862,0.13323859,1.46562444,11.45851839,0.17765145,0.75501865,0.53295434,0.62178007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.00622529,0.03268279,0.02529025,0.02801382,0.03540636,0.02957014,0.02801382,0.02256669,...,0.02035985,0.02035985,0.01001834,0.02035985,0.02035985,0.02035985,0.02035985,0.02035985,0.00420124,0.01228055
55601,3077098,70103014,0.00544713,0.01400691,0.03579544,0.01167243,0.01634140,0.01322875,0.01556323,0.02178853,...,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00872565,0.00193903,0.00614027
55602,3206874,50301001,0.02995922,0.02412301,0.02995922,0.03112647,0.02995922,0.02957014,0.02334485,0.02412301,...,0.02003668,0.02650013,0.02262206,0.03199405,0.01712813,0.02003668,0.00937200,0.02132937,0.01292689,0.02488426
55603,3206874,50401002,0.05991845,0.05019143,0.06614374,0.07703801,0.07392536,0.06030753,0.05330407,0.05058051,...,0.05106122,0.05009170,0.05332342,0.07336010,0.03845750,0.04589046,0.01357323,0.04847584,0.02843916,0.04976853


In [50]:
# Cria a lista das colunas que devem ser normalizadas (as séries temporais)
cols_to_normalize_X2 = [col for col in df_X2.columns if col not in ['cnes', 'codigo_procedimento']]

# Normaliza as colunas relevantes em df_X2
df_X2[cols_to_normalize_X2] = scaler.fit_transform(df_X2[cols_to_normalize_X2])

# Cria a lista das colunas que devem ser normalizadas para df_X3
cols_to_normalize_X3 = [col for col in df_X3.columns if col not in ['cnes', 'codigo_procedimento']]

# Agora fazemos o mesmo para df_X3
df_X3[cols_to_normalize_X3] = scaler.fit_transform(df_X3[cols_to_normalize_X3])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_X3[cols_to_normalize_X3] = scaler.fit_transform(df_X3[cols_to_normalize_X3])


In [51]:
df_X1

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.00000515,0.00001159,0.00002102,0.00000829,0.00001252,0.00001069,0.00000840,0.00002120,...,0.00000369,0.00002012,0.00001499,0.00005110,0.00014303,0.00000708,0.00005552,0.00022169,0.00000179,0.00000865
1,2679477,303010061,0.00014938,0.00014036,0.00023992,0.00012294,0.00009265,0.00015140,0.00007702,0.00049069,...,0.00001477,0.00002104,0.00001748,0.00006387,0.00009565,0.00009560,0.00000529,0.00004007,0.00014597,0.00004002
2,2679477,303010193,0.00001545,0.00002189,0.00000701,0.00000967,0.00001127,0.00001069,0.00000840,0.00005452,...,0.00001846,0.00001463,0.00000999,0.00006934,0.00027968,0.00007701,0.00010576,0.00004096,0.00019702,0.00001839
3,2679477,303060107,0.00001159,0.00001417,0.00006480,0.00000553,0.00002379,0.00005878,0.00002801,0.00007269,...,0.00001292,0.00001463,0.00001998,0.00001004,0.00008290,0.00002390,0.00004407,0.00000712,0.00003314,0.00002163
4,2679477,303070064,0.00001288,0.00002318,0.00001751,0.00001658,0.00001002,0.00002137,0.00000280,0.00008178,...,0.00000923,0.00000732,0.00001249,0.00000274,0.00003006,0.00022838,0.00000353,0.00001514,0.00001075,0.00001514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.00002060,0.00010817,0.00011383,0.00009945,0.00011393,0.00013537,0.00010082,0.00017568,...,0.00005816,0.00005762,0.00003871,0.00005748,0.00005739,0.00005577,0.00005552,0.00005609,0.00001164,0.00004110
55601,3077098,70103014,0.00001803,0.00004636,0.00016112,0.00004144,0.00005258,0.00006056,0.00005601,0.00016962,...,0.00002492,0.00002470,0.00003372,0.00002464,0.00002460,0.00002390,0.00002380,0.00002404,0.00000537,0.00002055
55602,3206874,50301001,0.00009916,0.00007984,0.00013485,0.00011050,0.00009640,0.00013537,0.00008402,0.00018780,...,0.00005723,0.00007500,0.00008741,0.00009033,0.00004828,0.00005488,0.00002556,0.00005876,0.00003582,0.00008329
55603,3206874,50401002,0.00019831,0.00016612,0.00029771,0.00027350,0.00023788,0.00027608,0.00019184,0.00039376,...,0.00014586,0.00014177,0.00020605,0.00020712,0.00010841,0.00012570,0.00003702,0.00013355,0.00007881,0.00016658


In [52]:
df_X2

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.00008895,0.00020015,0.00030346,0.00016080,0.00026313,0.00015918,0.00015649,0.00018257,...,0.00008471,0.00046161,0.00034917,0.00117215,0.00328108,0.00016245,0.00127375,0.00508572,0.00004109,0.00016808
1,2679477,303010061,0.00257968,0.00242401,0.00346452,0.00238513,0.00194714,0.00225500,0.00143450,0.00422527,...,0.00033883,0.00048259,0.00040736,0.00146519,0.00219435,0.00219314,0.00012131,0.00091911,0.00334870,0.00077737
2,2679477,303010193,0.00026686,0.00037806,0.00010115,0.00018759,0.00023681,0.00015918,0.00015649,0.00046947,...,0.00042354,0.00033572,0.00023278,0.00159078,0.00641587,0.00176670,0.00242619,0.00093953,0.00451972,0.00035717
3,2679477,303060107,0.00020015,0.00024462,0.00093567,0.00010720,0.00049994,0.00087547,0.00052164,0.00062597,...,0.00029648,0.00033572,0.00046556,0.00023024,0.00190177,0.00054829,0.00101091,0.00016340,0.00076013,0.00042020
4,2679477,303070064,0.00022239,0.00040029,0.00025288,0.00032159,0.00021050,0.00031835,0.00005216,0.00070421,...,0.00021177,0.00016786,0.00029097,0.00006279,0.00068965,0.00523917,0.00008087,0.00034722,0.00024653,0.00029414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.00000338,0.00001773,0.00001560,0.00001831,0.00002272,0.00001914,0.00001782,0.00001436,...,0.00000971,0.00000962,0.00000656,0.00000960,0.00000958,0.00000931,0.00000927,0.00000936,0.00000194,0.00000581
55601,3077098,70103014,0.00000295,0.00000760,0.00002208,0.00000763,0.00001049,0.00000856,0.00000990,0.00001386,...,0.00000416,0.00000412,0.00000572,0.00000411,0.00000411,0.00000399,0.00000397,0.00000401,0.00000090,0.00000290
55602,3206874,50301001,0.00001625,0.00001309,0.00001848,0.00002035,0.00001923,0.00001914,0.00001485,0.00001535,...,0.00000955,0.00001252,0.00001482,0.00001508,0.00000806,0.00000916,0.00000427,0.00000981,0.00000598,0.00001177
55603,3206874,50401002,0.00003250,0.00002723,0.00004080,0.00005036,0.00004745,0.00003903,0.00003391,0.00003218,...,0.00002435,0.00002367,0.00003494,0.00003457,0.00001810,0.00002098,0.00000618,0.00002229,0.00001316,0.00002354


In [53]:
df_X3

Unnamed: 0,cnes,codigo_procedimento,201001,201002,201003,201004,201005,201006,201007,201008,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
0,2679477,303010037,0.17727303,0.17728062,0.71482831,0.29652652,0.46022536,0.22063579,0.09997533,0.78161983,...,0.77718480,0.64810568,0.90126325,0.45411671,0.75422231,0.63169326,0.64855384,0.70178093,0.85807632,0.78327030
1,2679477,303010061,0.17728669,0.17727607,0.71489114,0.29650284,0.46019389,0.22059245,0.09987404,0.78178856,...,0.77715145,0.64800669,0.90124723,0.45409512,0.75389165,0.63221841,0.64821115,0.70058018,0.85845030,0.78331412
2,2679477,303010193,0.17728290,0.17729048,0.71480118,0.29652592,0.46022334,0.22063308,0.09997252,0.78165065,...,0.77715621,0.64795169,0.90123875,0.45410807,0.75481843,0.63206281,0.64871165,0.70056941,0.85859252,0.78321332
3,2679477,303060107,0.17726317,0.17726620,0.71486686,0.29651053,0.46022334,0.22066377,0.09998002,0.78163767,...,0.77720386,0.64803968,0.90126137,0.45389655,0.75388699,0.63177754,0.64846817,0.70045095,0.85815269,0.78329659
4,2679477,303070064,0.17727303,0.17728517,0.71480546,0.29652652,0.46021890,0.22063579,0.09995658,0.78166526,...,0.77721339,0.64802869,0.90126137,0.45388791,0.75364482,0.63331410,0.64828780,0.70053172,0.85810265,0.78329659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55600,3077098,70103013,0.17720777,0.17731097,0.71482260,0.29653954,0.46024634,0.22066287,0.09999597,0.78160685,...,0.77721816,0.64804518,0.90123215,0.45392245,0.75354236,0.63171271,0.64831937,0.70049941,0.85796833,0.78317388
55601,3077098,70103014,0.17724192,0.17727531,0.71496967,0.29651882,0.46022657,0.22063127,0.09998190,0.78167986,...,0.77721816,0.64804518,0.90126231,0.45392245,0.75354236,0.63171271,0.64831937,0.70049941,0.85804471,0.78324838
55602,3206874,50301001,0.17727910,0.17725634,0.71481689,0.29653244,0.46022374,0.22063759,0.09994720,0.78157440,...,0.77718957,0.64812218,0.90126419,0.45405627,0.75347251,0.63167381,0.64814351,0.70048865,0.85802628,0.78332289
55603,3206874,50401002,0.17727910,0.17724116,0.71486258,0.29658098,0.46025279,0.22064120,0.09994720,0.78154519,...,0.77723483,0.64804793,0.90127220,0.45423542,0.75337703,0.63163167,0.64781210,0.70047518,0.85792488,0.78328125


In [58]:
import xarray as xr

# Combinar os três dataframes ao longo de um novo eixo

# Primeiro, vamos setar 'cnes' e 'codigo_procedimento' como index para cada dataframe
# ao correr mais de uma vez as 3 linhas abaixo na segunda vez resulta em erro porque já são indices 
df_X1.set_index(['cnes', 'codigo_procedimento'], inplace=True)
df_X2.set_index(['cnes', 'codigo_procedimento'], inplace=True)
df_X3.set_index(['cnes', 'codigo_procedimento'], inplace=True)

# Converter os dataframes para xarray DataArray
da_X1 = xr.DataArray(df_X1)
da_X2 = xr.DataArray(df_X2)
da_X3 = xr.DataArray(df_X3)

# Concatener os três DataArrays ao longo de um novo eixo
X3D = xr.concat([da_X1, da_X2, da_X3], dim='measurement')

# Agora, 'X3D' é um DataArray 3D onde cada ponto na série temporal é um vetor de três elementos


In [60]:
df_X1

Unnamed: 0_level_0,Unnamed: 1_level_0,201001,201002,201003,201004,201005,201006,201007,201008,201009,201010,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
cnes,codigo_procedimento,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2679477,303010037,0.00000515,0.00001159,0.00002102,0.00000829,0.00001252,0.00001069,0.00000840,0.00002120,0.00001715,0.00007057,...,0.00000369,0.00002012,0.00001499,0.00005110,0.00014303,0.00000708,0.00005552,0.00022169,0.00000179,0.00000865
2679477,303010061,0.00014938,0.00014036,0.00023992,0.00012294,0.00009265,0.00015140,0.00007702,0.00049069,0.00026234,0.00015229,...,0.00001477,0.00002104,0.00001748,0.00006387,0.00009565,0.00009560,0.00000529,0.00004007,0.00014597,0.00004002
2679477,303010193,0.00001545,0.00002189,0.00000701,0.00000967,0.00001127,0.00001069,0.00000840,0.00005452,0.00001200,0.00002229,...,0.00001846,0.00001463,0.00000999,0.00006934,0.00027968,0.00007701,0.00010576,0.00004096,0.00019702,0.00001839
2679477,303060107,0.00001159,0.00001417,0.00006480,0.00000553,0.00002379,0.00005878,0.00002801,0.00007269,0.00003086,0.00002600,...,0.00001292,0.00001463,0.00001998,0.00001004,0.00008290,0.00002390,0.00004407,0.00000712,0.00003314,0.00002163
2679477,303070064,0.00001288,0.00002318,0.00001751,0.00001658,0.00001002,0.00002137,0.00000280,0.00008178,0.00003772,0.00003529,...,0.00000923,0.00000732,0.00001249,0.00000274,0.00003006,0.00022838,0.00000353,0.00001514,0.00001075,0.00001514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3077098,70103013,0.00002060,0.00010817,0.00011383,0.00009945,0.00011393,0.00013537,0.00010082,0.00017568,0.00010459,0.00007429,...,0.00005816,0.00005762,0.00003871,0.00005748,0.00005739,0.00005577,0.00005552,0.00005609,0.00001164,0.00004110
3077098,70103014,0.00001803,0.00004636,0.00016112,0.00004144,0.00005258,0.00006056,0.00005601,0.00016962,0.00007887,0.00006872,...,0.00002492,0.00002470,0.00003372,0.00002464,0.00002460,0.00002390,0.00002380,0.00002404,0.00000537,0.00002055
3206874,50301001,0.00009916,0.00007984,0.00013485,0.00011050,0.00009640,0.00013537,0.00008402,0.00018780,0.00016975,0.00019686,...,0.00005723,0.00007500,0.00008741,0.00009033,0.00004828,0.00005488,0.00002556,0.00005876,0.00003582,0.00008329
3206874,50401002,0.00019831,0.00016612,0.00029771,0.00027350,0.00023788,0.00027608,0.00019184,0.00039376,0.00026577,0.00027858,...,0.00014586,0.00014177,0.00020605,0.00020712,0.00010841,0.00012570,0.00003702,0.00013355,0.00007881,0.00016658


In [64]:
df_X2


Unnamed: 0_level_0,Unnamed: 1_level_0,201001,201002,201003,201004,201005,201006,201007,201008,201009,201010,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
cnes,codigo_procedimento,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2679477,303010037,0.00008895,0.00020015,0.00030346,0.00016080,0.00026313,0.00015918,0.00015649,0.00018257,0.00026768,0.00099111,...,0.00008471,0.00046161,0.00034917,0.00117215,0.00328108,0.00016245,0.00127375,0.00508572,0.00004109,0.00016808
2679477,303010061,0.00257968,0.00242401,0.00346452,0.00238513,0.00194714,0.00225500,0.00143450,0.00422527,0.00409555,0.00213872,...,0.00033883,0.00048259,0.00040736,0.00146519,0.00219435,0.00219314,0.00012131,0.00091911,0.00334870,0.00077737
2679477,303010193,0.00026686,0.00037806,0.00010115,0.00018759,0.00023681,0.00015918,0.00015649,0.00046947,0.00018738,0.00031298,...,0.00042354,0.00033572,0.00023278,0.00159078,0.00641587,0.00176670,0.00242619,0.00093953,0.00451972,0.00035717
2679477,303060107,0.00020015,0.00024462,0.00093567,0.00010720,0.00049994,0.00087547,0.00052164,0.00062597,0.00048183,0.00036515,...,0.00029648,0.00033572,0.00046556,0.00023024,0.00190177,0.00054829,0.00101091,0.00016340,0.00076013,0.00042020
2679477,303070064,0.00022239,0.00040029,0.00025288,0.00032159,0.00021050,0.00031835,0.00005216,0.00070421,0.00058890,0.00049556,...,0.00021177,0.00016786,0.00029097,0.00006279,0.00068965,0.00523917,0.00008087,0.00034722,0.00024653,0.00029414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3077098,70103013,0.00000338,0.00001773,0.00001560,0.00001831,0.00002272,0.00001914,0.00001782,0.00001436,0.00001550,0.00000990,...,0.00000971,0.00000962,0.00000656,0.00000960,0.00000958,0.00000931,0.00000927,0.00000936,0.00000194,0.00000581
3077098,70103014,0.00000295,0.00000760,0.00002208,0.00000763,0.00001049,0.00000856,0.00000990,0.00001386,0.00001169,0.00000916,...,0.00000416,0.00000412,0.00000572,0.00000411,0.00000411,0.00000399,0.00000397,0.00000401,0.00000090,0.00000290
3206874,50301001,0.00001625,0.00001309,0.00001848,0.00002035,0.00001923,0.00001914,0.00001485,0.00001535,0.00002515,0.00002624,...,0.00000955,0.00001252,0.00001482,0.00001508,0.00000806,0.00000916,0.00000427,0.00000981,0.00000598,0.00001177
3206874,50401002,0.00003250,0.00002723,0.00004080,0.00005036,0.00004745,0.00003903,0.00003391,0.00003218,0.00003938,0.00003713,...,0.00002435,0.00002367,0.00003494,0.00003457,0.00001810,0.00002098,0.00000618,0.00002229,0.00001316,0.00002354


In [65]:
df_X3

Unnamed: 0_level_0,Unnamed: 1_level_0,201001,201002,201003,201004,201005,201006,201007,201008,201009,201010,...,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212
cnes,codigo_procedimento,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2679477,303010037,0.17727303,0.17728062,0.71482831,0.29652652,0.46022536,0.22063579,0.09997533,0.78161983,0.76544556,0.72750956,...,0.77718480,0.64810568,0.90126325,0.45411671,0.75422231,0.63169326,0.64855384,0.70178093,0.85807632,0.78327030
2679477,303010061,0.17728669,0.17727607,0.71489114,0.29650284,0.46019389,0.22059245,0.09987404,0.78178856,0.76564284,0.72713467,...,0.77715145,0.64800669,0.90124723,0.45409512,0.75389165,0.63221841,0.64821115,0.70058018,0.85845030,0.78331412
2679477,303010193,0.17728290,0.17729048,0.71480118,0.29652592,0.46022334,0.22063308,0.09997252,0.78165065,0.76542336,0.72733482,...,0.77715621,0.64795169,0.90123875,0.45410807,0.75481843,0.63206281,0.64871165,0.70056941,0.85859252,0.78321332
2679477,303060107,0.17726317,0.17726620,0.71486686,0.29651053,0.46022334,0.22066377,0.09998002,0.78163767,0.76542829,0.72728399,...,0.77720386,0.64803968,0.90126137,0.45389655,0.75388699,0.63177754,0.64846817,0.70045095,0.85815269,0.78329659
2679477,303070064,0.17727303,0.17728517,0.71480546,0.29652652,0.46021890,0.22063579,0.09995658,0.78166526,0.76547515,0.72735071,...,0.77721339,0.64802869,0.90126137,0.45388791,0.75364482,0.63331410,0.64828780,0.70053172,0.85810265,0.78329659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3077098,70103013,0.17720777,0.17731097,0.71482260,0.29653954,0.46024634,0.22066287,0.09999597,0.78160685,0.76542583,0.72717279,...,0.77721816,0.64804518,0.90123215,0.45392245,0.75354236,0.63171271,0.64831937,0.70049941,0.85796833,0.78317388
3077098,70103014,0.17724192,0.17727531,0.71496967,0.29651882,0.46022657,0.22063127,0.09998190,0.78167986,0.76547268,0.72730941,...,0.77721816,0.64804518,0.90126231,0.45392245,0.75354236,0.63171271,0.64831937,0.70049941,0.85804471,0.78324838
3206874,50301001,0.17727910,0.17725634,0.71481689,0.29653244,0.46022374,0.22063759,0.09994720,0.78157440,0.76554420,0.72750321,...,0.77718957,0.64812218,0.90126419,0.45405627,0.75347251,0.63167381,0.64814351,0.70048865,0.85802628,0.78332289
3206874,50401002,0.17727910,0.17724116,0.71486258,0.29658098,0.46025279,0.22064120,0.09994720,0.78154519,0.76544062,0.72729352,...,0.77723483,0.64804793,0.90127220,0.45423542,0.75337703,0.63163167,0.64781210,0.70047518,0.85792488,0.78328125


In [61]:
da_X1

In [62]:
da_X2

In [63]:
da_X3

In [59]:
X3D

In [67]:
df_X1.to_pickle('./IrisAutoencoder_Pytorch/train_data/df_X1.pkl')
df_X2.to_pickle('./IrisAutoencoder_Pytorch/train_data/df_X2.pkl')
df_X3.to_pickle('./IrisAutoencoder_Pytorch/train_data/df_X3.pkl')