In [1]:
import pandas as pd
import chardet

# Specify relative path to the file
file_path = "../IAPS/banco_completo.csv"

# Detect the encoding of the file
with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())
    file_encoding = result['encoding']

# Load data into a pandas DataFrame with the correct encoding and delimiter specified
daf = pd.read_csv(file_path, encoding=file_encoding, delimiter=';')

# Print basic information about the DataFrame
print(daf.info())

# Or display the first few rows of the DataFrame
print(daf.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4789 entries, 0 to 4788
Data columns (total 50 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ID                          4789 non-null   int64  
 1   dt_nasc                     4789 non-null   object 
 2   sexo                        4789 non-null   object 
 3   racacor                     4789 non-null   int64  
 4   dt_obito                    3302 non-null   object 
 5   causabas                    3302 non-null   object 
 6   qt_apac                     4789 non-null   int64  
 7   qt_aih                      4789 non-null   int64  
 8   obito                       4789 non-null   int64  
 9   obito_internacao            2034 non-null   float64
 10  dt_diag                     4789 non-null   object 
 11  estadio                     4789 non-null   int64  
 12  dt_proc_min                 4789 non-null   object 
 13  idade                       4789 

In [2]:
daf

Unnamed: 0,ID,dt_nasc,sexo,racacor,dt_obito,causabas,qt_apac,qt_aih,obito,obito_internacao,...,regiao_pcn,macro_pcn,estabel_saude,cidade_estabel,estado_estabel,regiao_estabel,macro_estabel,regiao_bras_estabel,finaliquimio,finaliradio
0,62980,02/05/1957,M,3,22/12/2014,C349,1,4,1,1.0,...,Belo Horizonte,Centro,SANTA CASA DE BELO HORIZONTE,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,
1,402050,04/07/1960,M,1,06/06/2012,C349,2,2,1,1.0,...,Frutal,Triângulo do Sul,FUNDACAO PIO XII BARRETOS,BARRETOS,SP,Outros Estados,Outros Estados,Sudeste,Paliativa,
2,605650,20/03/1950,M,1,14/10/2014,C349,2,8,1,1.0,...,Belo Horizonte,Centro,ASSOCIACAO MARIO PENNA,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,Demais,
3,1162820,07/06/1937,F,1,11/02/2014,C349,1,1,1,,...,Sete Lagoas,Centro,HOSPITAL FELICIO ROCHO,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,Paliativa,
4,1279111,30/12/1939,M,99,21/03/2009,C349,4,1,1,1.0,...,Juiz de Fora,Sudeste,HOSPITAL MARIA JOSE BAETA REIS ASCOMCER,Juiz de Fora,MG,Juiz de Fora,Sudeste,Sudeste,Demais,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4784,999248701,01/08/1952,M,1,,,2,0,0,,...,Uberaba,Triângulo do Sul,HOSPITAL DOUTOR HELIO ANGOTTI,Uberaba,MG,Uberaba,Triângulo do Sul,Sudeste,Paliativa,
4785,999359850,30/07/1953,M,3,16/06/2013,C349,1,2,1,,...,Ubá,Sudeste,HOSPITAL DO CANCER DE MURIAE,Muriaé,MG,Muriaé,Sudeste,Sudeste,Paliativa,
4786,999364940,27/08/1940,M,1,17/03/2009,C349,5,0,1,,...,Poços de Caldas,Sul,FUNDACAO PIO XII BARRETOS,BARRETOS,SP,Outros Estados,Outros Estados,Sudeste,Demais,
4787,999442190,12/02/1937,M,1,12/02/2014,C349,4,3,1,1.0,...,Ponte Nova,Leste do Sul,HOSPITAL FELICIO ROCHO,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,Demais


In [8]:
import pandas as pd
from scipy.stats import chi2_contingency

# Assuming you already have 'daf' DataFrame loaded from the CSV file using the correct encoding and delimiter

# Create a new column 'is_hospital_julia_kubitschek' to differentiate the rows
daf['is_hospital_julia_kubitschek'] = daf['estabel_saude'].apply(lambda x: x == 'HOSPITAL JULIA KUBITSCHEK')

# Separate rows based on 'is_hospital_julia_kubitschek' value
hospital_julia_kubitschek_rows = daf[daf['is_hospital_julia_kubitschek']]
not_hospital_julia_kubitschek_rows = daf[~daf['is_hospital_julia_kubitschek']]

# Calculate the contingency table for 'obito' and 'is_hospital_julia_kubitschek'
contingency_table = pd.crosstab(index=daf['obito'], columns=daf['is_hospital_julia_kubitschek'])

# Perform the chi-square test
chi2, p_value, dof, expected = chi2_contingency(contingency_table)

# Print the results
print("Chi-Square Statistic:", chi2)
print("P-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies Table:")
print(expected)


Chi-Square Statistic: 5.859209179748884
P-value: 0.01549575477027298
Degrees of Freedom: 1
Expected Frequencies Table:
[[1479.85842556    7.14157444]
 [3286.14157444   15.85842556]]


In [10]:
print((hospital_julia_kubitschek_rows['obito']==1).sum())
print((hospital_julia_kubitschek_rows['obito']==0).sum())
print((not_hospital_julia_kubitschek_rows['obito']==1).sum())
print((not_hospital_julia_kubitschek_rows['obito']==0).sum())

10
13
3292
1474


In [11]:
hospital_julia_kubitschek_rows

Unnamed: 0,ID,dt_nasc,sexo,racacor,dt_obito,causabas,qt_apac,qt_aih,obito,obito_internacao,...,macro_pcn,estabel_saude,cidade_estabel,estado_estabel,regiao_estabel,macro_estabel,regiao_bras_estabel,finaliquimio,finaliradio,is_hospital_julia_kubitschek
401,91831030,15/03/1960,F,99,,,3,5,0,,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
452,101571811,18/02/1941,M,99,,,2,1,0,,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
613,129661631,19/05/1940,M,99,,,1,5,0,,...,Leste,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
734,153903230,05/07/1951,M,99,20/09/2012,C349,2,3,1,1.0,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
990,203597870,23/10/1948,F,99,17/03/2015,C349,11,1,1,,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
1168,242023211,11/07/1952,M,3,29/04/2015,C349,2,5,1,1.0,...,Leste do Sul,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
1694,355887670,06/10/1930,F,99,,,3,1,0,,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
1780,374566470,17/02/1961,M,99,,,2,1,0,,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
1833,384251680,14/11/1951,M,99,24/07/2013,C349,2,2,1,1.0,...,Centro,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
1971,416595880,12/10/1965,M,3,10/01/2013,C349,2,5,1,1.0,...,Oeste,HOSPITAL JULIA KUBITSCHEK,Belo Horizonte,MG,Belo Horizonte,Centro,Sudeste,,,True
