Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from sklearn.feature_selection import chi2
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import seaborn as sn
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler
import warnings

Datasets

In [None]:
df_original = pd.read_csv('data/df_preprocessed.csv', delimiter=',')

In [None]:
df = df_original.copy(deep = True)
df.drop(columns=['Unnamed: 0'], inplace= True)

In [None]:
df['VARIANTE_COVID'].value_counts() #1: gamma, 2: delta e 3:omicrom

In [None]:
df.shape

CONSTANTS

In [None]:
DIC_GAMA_VS_OMICROM = {1:1,3:0} # Gamma : 1 , Omicrom : 0
DIC_DELTA_VS_OMICROM = {2:1,3:0} # Delta : 1 , Omicrom : 0
DIC_GAMA_VS_DELTA = {1:1,2:0} # Gamma : 1, Omicrom : 0
DIC_RACA = {1:'Branca', 2:'Preta',3:'Amarela',4:'Parda',5:'Indigena', 'Missing' : 'Missing'}
DIC_OBESIDADE = {1:'Yes', 2:'No', 'Missing' : 'Missing'}
DIC_CARDIOPATI = {1:'Yes', 2:'No', 'Missing' : 'Missing'}
DIC_DIABETES = {1:'Yes', 2:'No', 'Missing' : 'Missing'}
DIC_VACINA = {1:'Yes', 2:'No', 'Missing' : 'Missing'}
DIC_ESCOLA = {0:'Sem escolaridade', 1:'Fundamental 1',2:'Fundamental 2',3:'Médio',4:'Superior', 5 : 'Not applicable', 'Missing' : 'Missing'}
DIC_GRAVIDEZ = {1:'Trimestre 1', 2:'Trimestre 2',3:'Trimestre 3',4:'Idade Gestacional Ignorada',5:'Puerperas', 'Missing' : 'Missing'}

### Pre processing

In [None]:
df.shape

Percentage of the number '9' in each column. As in the pre-processing all NANs were replaced by 9, now a column having 60 percent of '9' indicates that 60 percent of its values represent ignored values or NANs. 

In [None]:
(df.eq(9).sum()/df.shape[0]*100).sort_values(ascending=False)

Categorizing missing values

In [None]:
df.replace(9, np.nan, inplace=True)

# Categorizing missing values
df.fillna('Missing', inplace=True)

Mapping dictionaries

In [None]:
df['CS_RACA']=df['CS_RACA'].map(DIC_RACA)
df['GRAVIDEZ'] = df['GRAVIDEZ'].map(DIC_GRAVIDEZ)
df['VACINA_COV'] = df['VACINA_COV'].map(DIC_VACINA)
df['CARDIOPATI'] = df['CARDIOPATI'].map(DIC_CARDIOPATI)
df['Age_group'] = pd.cut(df['NU_IDADE_N'], bins = [0, 20, 25, 30, 35, 40, 99], include_lowest = True, labels = ['0-19', '20-24', '25-29', '30-34', '35-39', '40+'])
df['DIABETES'] = df['DIABETES'].map(DIC_DIABETES)
df['OBESIDADE'] = df['OBESIDADE'].map(DIC_OBESIDADE)
df['CS_ESCOL_N'] = df['CS_ESCOL_N'].map(DIC_ESCOLA)
#df['UTI'] = df['UTI'].replace({1: "True", 2: "False"}) # 1 : YES , 2 : NO
df['UTI'] = df['UTI'].replace({1: 1, 2: 0}) # 1 : YES , 2 : NO
#df['EVOLUCAO'] = df['EVOLUCAO'].replace({1: 'False', 2: 'True'}) # Now we have cure = False; death = True
df['EVOLUCAO'] = df['EVOLUCAO'].replace({1: 0, 2: 1}) # Now we have cure = 0; death = 1

# 1. Table 3: Risk estimates for adverse maternal and perinatal outcomes among COVID-19 infection laboratory-confirmed case:

In [None]:
def analyze_and_plot(response, predictor, data, iloc_start, iloc_end, variant1, variant2, covariates=None):
    
    if response == "EVOLUCAO":
        data = data[data["EVOLUCAO"] != 3] # ignore "Death from other causes"

    # Logistic regression model
    formula = f"{response} ~ {predictor}"
    if covariates:
        formula += f" + {covariates}"

    # Logistic regression model
    print(f"Logistic regression model {response}:")
    model = smf.logit(formula, data=data).fit()
    #print(model.summary())

    # Odds ratios
    odds_ratios = pd.DataFrame(
        {
            "OR": model.params,
            "Lower CI": model.conf_int()[0],
            "Upper CI": model.conf_int()[1],
        }
    )
    odds_ratios = np.exp(odds_ratios)
    odds_ratios = odds_ratios.iloc[iloc_start:iloc_end]

    # Plot odds ratios
    print()
    print(f"Odds ratios for {predictor}:")
    print(odds_ratios)
    print()

    # Crosstab
    crosstab = pd.crosstab(data[response], data[predictor])

    if response == "UTI":
        #crosstab.rename(columns={"True": "YES", "False": "NO"}, index={1: variant1, 0: variant2}, inplace=True)
        crosstab.rename(columns={1: variant1, 0: variant2}, index={1: "YES", 0: "NO"}, inplace=True)
    else:
        crosstab.rename(columns = {1: variant1, 0: variant2}, index = {0:"Cure", 1: "Death"} ,inplace = True)
        #crosstab.rename(columns = {"False":"Cure", "True": "Death", 3 : "Death from other causes"}, index = {1: variant1, 0: variant2} ,inplace = True)
    plt.figure(figsize=(8, 6))
    ax = sn.heatmap(crosstab, annot=True, fmt="d", cmap = "coolwarm", cbar=True)

    # Add values to each cell
    for i in range(len(crosstab)):
        for j in range(len(crosstab.columns)):
            ax.text(j + 0.5, i + 0.5, crosstab.iloc[i, j],
                    ha='center', va='center', color='black', fontsize=10)

    #plt.title(f"Number of {response.lower()} for each variant")
    plt.xlabel("VARIANTE_COVID")
    plt.ylabel(response)
    plt.show()

Logistic Regression for Covid Variant (GAMMA VS OMICROM)

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 2].copy() #gamma versus omicrom
df_temp = df_temp[df_temp['UTI'] != 'Missing']
df_temp['UTI'] = df_temp['UTI'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_OMICROM) # Gamma : 1 , Omicrom : 0

analyze_and_plot("UTI", "VARIANTE_COVID", df_temp, 1, 3,"GAMMA", "OMICROM")

Interpretation:

Exposed group : Patient has the gamma variant

Non-exposed group : Patient has the omicrom variant

Event Exposed group : Patient transferred to ICU

- a : Patient with gamma and transferred to ICU = 2737
- c : Patient with omicrom and transferred to ICU = 407
- b : Patient with gamma and not transferred to ICU = 4747
- d : Patient with omicrom and not transferred to ICU = 3031

$OR = \frac{a/b}{c/d} = 4,2938$ 

Therefore, the group of patients with the gamma variant has 4.2938 times the odds of being transferred to the ICU compared to omicron variant patients.


In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 2].copy() #gamma versus omicrom
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_OMICROM) # Gamma : 1 , Omicrom : 0

analyze_and_plot("EVOLUCAO","VARIANTE_COVID", df_temp, 1, 3,"GAMMA", "OMICROM")

Interpretation:

Exposed group  : Patient has the gamma variant

Non-exposed group : Patient has the omicrom variant

Event : Patient died (True)

- a : Patient with gamma and died = 1138
- c : Patient with omicrom and died= 135
- b : Patient with gamma and cured = 6460
- d : Patient with omicrom and cured = 3512

$OR = \frac{a/b}{c/d} = 4,5827$ 

Therefore, the group of patients with the gamma variant has 4.5827 times the odds of being dead compared to omicron variant patients.

### Logistic Regression for Covid Variant (GAMA VS OMICROM) -Adjusted for age, gestational period, region, skin color, vaccination status

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 2].copy() #gamma versus omicrom
df_temp = df_temp[df_temp['UTI'] != 'Missing']
df_temp['UTI'] = df_temp['UTI'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_OMICROM) # Gamma : 1 , Omicrom : 0

analyze_and_plot("UTI","VARIANTE_COVID",df_temp, 1 , 32 ,"GAMMA","OMICROM", "Age_group + VACINA_COV + C(Regiao_nome) + C(CS_RACA) + C(GRAVIDEZ) + CS_ESCOL_N + CARDIOPATI + OBESIDADE")

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 2].copy() #gamma versus omicrom
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_OMICROM) # Gamma : 1 , Omicrom : 0

analyze_and_plot("EVOLUCAO","VARIANTE_COVID",df_temp, 1 , 32 ,"GAMMA","OMICROM", "Age_group + VACINA_COV + C(Regiao_nome) + C(CS_RACA) + C(GRAVIDEZ) + CS_ESCOL_N + CARDIOPATI + OBESIDADE")

Logistic regression for Covid Variant (DELTA VS OMICROM)

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 1].copy() #delta versus omicrom
df_temp = df_temp[df_temp['UTI'] != 'Missing']
df_temp['UTI'] = df_temp['UTI'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_DELTA_VS_OMICROM) # true : delta, false : omicrom

analyze_and_plot("UTI","VARIANTE_COVID",df_temp,1, 3,"DELTA", "OMICROM")

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 1].copy() #delta versus omicrom
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_DELTA_VS_OMICROM) # true : delta, false : omicrom

analyze_and_plot("EVOLUCAO","VARIANTE_COVID",df_temp,1, 3,"DELTA","OMICROM")

Logistic Regression for Covid Variant (DELTA VS OMICROM) -Adjusted for age, gestational period, region, skin color, vaccination status

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 1].copy() #delta versus omicrom
df_temp = df_temp[df_temp['UTI'] != 'Missing']
df_temp['UTI'] = df_temp['UTI'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_DELTA_VS_OMICROM) # true : delta, false : omicrom

analyze_and_plot("UTI","VARIANTE_COVID",df_temp, 1 , 32 ,"DELTA","OMICROM", "Age_group + VACINA_COV + C(Regiao_nome) + C(CS_RACA) + C(GRAVIDEZ) + CS_ESCOL_N + CARDIOPATI + OBESIDADE")

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 1].copy() #delta versus omicrom
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_DELTA_VS_OMICROM) # true : delta, false : omicrom

analyze_and_plot("EVOLUCAO","VARIANTE_COVID",df_temp, 1 , 32 ,"DELTA","OMICROM", "Age_group + VACINA_COV + C(Regiao_nome) + C(CS_RACA) + C(GRAVIDEZ) + CS_ESCOL_N + CARDIOPATI + OBESIDADE")

Logistic Regression for Covid Variant (GAMMA VS DELTA)

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 3].copy()#Gama vs delta
df_temp = df_temp[df_temp['UTI'] != 'Missing']
df_temp['UTI'] = df_temp['UTI'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_DELTA) # true : Gama, false : Delta

analyze_and_plot("UTI","VARIANTE_COVID", df_temp, 1, 3,"GAMMA", "DELTA")

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 3].copy()#Gama vs delta
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_DELTA) # true : Gama, false : Delta


analyze_and_plot("EVOLUCAO","VARIANTE_COVID", df_temp, 1, 3,"GAMMA", "DELTA")

Logistic Regression for Covid Variant (GAMMA VS DELTA)-Adjusted for age, gestational period, region, skin color, vaccination status

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 3].copy()#Gama vs delta
df_temp = df_temp[df_temp['UTI'] != 'Missing']
df_temp['UTI'] = df_temp['UTI'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_DELTA) # true : Gama, false : Delta

analyze_and_plot("UTI","VARIANTE_COVID",df_temp, 1 , 32 ,"GAMMA", "DELTA", "Age_group + VACINA_COV + C(Regiao_nome) + C(CS_RACA) + C(GRAVIDEZ) + CS_ESCOL_N + CARDIOPATI + OBESIDADE")

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 3].copy()#Gama vs delta
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)
df_temp.reset_index(inplace = True, drop = True)
df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_DELTA) # true : Gama, false : Delta

analyze_and_plot("EVOLUCAO", "VARIANTE_COVID",df_temp, 1 , 32 ,"GAMMA", "DELTA" ,"Age_group + VACINA_COV + C(Regiao_nome) + C(CS_RACA) + C(GRAVIDEZ) + CS_ESCOL_N + CARDIOPATI + OBESIDADE")

### Logistic regression for Covid Variant (DELTA VS OMICROM) for each region

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 1].copy()  
df_temp.reset_index(inplace=True, drop=True)

#create a variable
df_temp['UTI_EVOLUCAO'] = df_temp['VARIANTE_COVID'] * np.nan

lista_obito = [1, 3]
for i in range(len(df_temp['VARIANTE_COVID'])):
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 1):  # Individuals who were admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 1):  # Individuals admitted to the ICU who did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 0
        
df_temp['UTI_EVOLUCAO'].fillna('Missing', inplace=True)
df_temp = df_temp[df_temp['UTI_EVOLUCAO'] != 'Missing']

df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_DELTA_VS_OMICROM) # true : delta, false : omicrom

In [None]:
df_temp['UTI_EVOLUCAO'].value_counts()

In [None]:
#delimit the dataset for each region

df_temp_sudeste = df_temp.loc[df_temp['Regiao_nome'] == 'Southeast']
df_temp_Sul = df_temp.loc[df_temp['Regiao_nome'] == 'South']
df_temp_Nordeste = df_temp.loc[df_temp['Regiao_nome'] == 'Northeast']
df_temp_Centro_oeste = df_temp.loc[df_temp['Regiao_nome'] == 'Midwest']
df_temp_Norte = df_temp.loc[df_temp['Regiao_nome'] == 'North']

In [None]:
region_list = ['Southeast', 'South', 'Northeast', 'Midwest', 'North', 'Brazil']
data_list = [df_temp_sudeste, df_temp_Sul, df_temp_Nordeste, df_temp_Centro_oeste, df_temp_Norte, df_temp]
odds_ratios = []
lowerCI = []
upperCI = []

# Loop through each region and perform logistic regression
for region, data_ in zip(region_list, data_list):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit("VARIANTE_COVID ~ UTI_EVOLUCAO", data=data_).fit(disp=False)
        odds_ratios.append(np.exp(model.params[1]))
        lowerCI.append(np.exp(model.conf_int()[0][1]))
        upperCI.append(np.exp(model.conf_int()[1][1]))

# Create a DataFrame to store odds ratios and confidence intervals
or_ = pd.DataFrame({"OR": odds_ratios, "Lower CI": lowerCI, "Upper CI": upperCI})
or_.index = region_list
or_ = np.round(or_, 2)
print()
display(or_)

# Plotting the graph
fig, ax = plt.subplots(figsize=(10, 6))
ax.errorbar(or_['OR'], or_.index, xerr=[or_['OR'] - or_['Lower CI'], or_['Upper CI'] - or_['OR']],
            fmt='o', markersize=8, capsize=8, color='black', ecolor='gray')

# Adding labels and title to the graph
ax.set_xlabel("Odds ratio")
ax.set_title('Omicron VS Delta')
ax.set_xlim(0, 10)

# Adding a legend for better understanding
ax.legend(['Odds Ratio'])

# Adding a grid to the x-axis for better readability
ax.grid(axis='x', linestyle='--', alpha=0.6)

plt.show()

# Additional information about variant cases for each region
crosstab_delta_omicrom_REGION = pd.crosstab(df_temp['Regiao_nome'], df_temp['VARIANTE_COVID'])
crosstab_delta_omicrom_REGION.rename(columns={0: "OMICRON", 1: "DELTA"}, inplace=True)
print()
print("Number of variant cases for each region:")
print(crosstab_delta_omicrom_REGION)

Logistic regression for Covid Variant (GAMA VS OMICROM) for each region

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 2].copy()  
df_temp.reset_index(inplace=True, drop=True)

#create a variable
df_temp['UTI_EVOLUCAO'] = df_temp['VARIANTE_COVID'] * np.nan

lista_obito = [1, 3]
for i in range(len(df_temp['VARIANTE_COVID'])):
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 1):  # Individuals who were admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 1):  # Individuals admitted to the ICU who did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 0
        
df_temp['UTI_EVOLUCAO'].fillna('Missing', inplace=True)
df_temp = df_temp[df_temp['UTI_EVOLUCAO'] != 'Missing']

df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_OMICROM) # true : gamma, false : omicrom

In [None]:
df_temp['UTI_EVOLUCAO'].value_counts()

In [None]:
#delimit the dataset for each region

df_temp_sudeste = df_temp.loc[df_temp['Regiao_nome'] == 'Southeast']
df_temp_Sul = df_temp.loc[df_temp['Regiao_nome'] == 'South']
df_temp_Nordeste = df_temp.loc[df_temp['Regiao_nome'] == 'Northeast']
df_temp_Centro_oeste = df_temp.loc[df_temp['Regiao_nome'] == 'Midwest']
df_temp_Norte = df_temp.loc[df_temp['Regiao_nome'] == 'North']

In [None]:
region_list = ['Southeast', 'South', 'Northeast', 'Midwest', 'North', 'Brazil']
data_list = [df_temp_sudeste, df_temp_Sul, df_temp_Nordeste, df_temp_Centro_oeste, df_temp_Norte, df_temp]
odds_ratios = []
lowerCI = []
upperCI = []

# Loop through each region and perform logistic regression
for region, data_ in zip(region_list, data_list):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit("VARIANTE_COVID ~ UTI_EVOLUCAO", data=data_).fit(disp=False)
        odds_ratios.append(np.exp(model.params[1]))
        lowerCI.append(np.exp(model.conf_int()[0][1]))
        upperCI.append(np.exp(model.conf_int()[1][1]))

# Create a DataFrame to store odds ratios and confidence intervals
or_ = pd.DataFrame({"OR": odds_ratios, "Lower CI": lowerCI, "Upper CI": upperCI})
or_.index = region_list
or_ = np.round(or_, 2)
print()
display(or_)

# Plotting the graph
fig, ax = plt.subplots(figsize=(10, 6))
ax.errorbar(or_['OR'], or_.index, xerr=[or_['OR'] - or_['Lower CI'], or_['Upper CI'] - or_['OR']],
            fmt='o', markersize=8, capsize=8, color='black', ecolor='gray')

# Adding labels and title to the graph
ax.set_xlabel("Odds ratio")
ax.set_title('Omicron VS Gamma')
ax.set_xlim(1, 9)

# Adding a legend for better understanding
ax.legend(['Odds Ratio'])

# Adding a grid to the x-axis for better readability
ax.grid(axis='x', linestyle='--', alpha=0.6)

plt.show()

# Additional information about variant cases for each region
crosstab_gamma_omicrom_REGION = pd.crosstab(df_temp['Regiao_nome'], df_temp['VARIANTE_COVID'])
crosstab_gamma_omicrom_REGION.rename(columns={0: "OMICRON", 1: "GAMMA"}, inplace=True)
print()
print("Number of variant cases for each region:")
print(crosstab_gamma_omicrom_REGION)

Logistic regression for Covid Variant (GAMMA VS DELTA) for each region

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] != 3].copy()  
df_temp.reset_index(inplace=True, drop=True)

#create a variable
df_temp['UTI_EVOLUCAO'] = df_temp['VARIANTE_COVID'] * np.nan

lista_obito = [1, 3]
for i in range(len(df_temp['VARIANTE_COVID'])):
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 1):  # Individuals who were admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 1):  # Individuals admitted to the ICU who did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 0
        
df_temp['UTI_EVOLUCAO'].fillna('Missing', inplace=True)
df_temp = df_temp[df_temp['UTI_EVOLUCAO'] != 'Missing']

df_temp['VARIANTE_COVID']=df_temp['VARIANTE_COVID'].map(DIC_GAMA_VS_DELTA) # true : gamma, false : delta

In [None]:
#delimit the dataset for each region

df_temp_sudeste = df_temp.loc[df_temp['Regiao_nome'] == 'Southeast']
df_temp_Sul = df_temp.loc[df_temp['Regiao_nome'] == 'South']
df_temp_Nordeste = df_temp.loc[df_temp['Regiao_nome'] == 'Northeast']
df_temp_Centro_oeste = df_temp.loc[df_temp['Regiao_nome'] == 'Midwest']
df_temp_Norte = df_temp.loc[df_temp['Regiao_nome'] == 'North']

In [None]:
region_list = ['Southeast', 'South', 'Northeast', 'Midwest', 'North', 'Brazil']
data_list = [df_temp_sudeste, df_temp_Sul, df_temp_Nordeste, df_temp_Centro_oeste, df_temp_Norte, df_temp]
odds_ratios = []
lowerCI = []
upperCI = []

# Loop through each region and perform logistic regression
for region, data_ in zip(region_list, data_list):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit("VARIANTE_COVID ~ UTI_EVOLUCAO", data=data_).fit(disp=False)
        odds_ratios.append(np.exp(model.params[1]))
        lowerCI.append(np.exp(model.conf_int()[0][1]))
        upperCI.append(np.exp(model.conf_int()[1][1]))

# Create a DataFrame to store odds ratios and confidence intervals
or_ = pd.DataFrame({"OR": odds_ratios, "Lower CI": lowerCI, "Upper CI": upperCI})
or_.index = region_list
or_ = np.round(or_, 2)
print()
display(or_)

# Plotting the graph
fig, ax = plt.subplots(figsize=(10, 6))
ax.errorbar(or_['OR'], or_.index, xerr=[or_['OR'] - or_['Lower CI'], or_['Upper CI'] - or_['OR']],
            fmt='o', markersize=8, capsize=8, color='black', ecolor='gray')

# Adding labels and title to the graph
ax.set_xlabel("Odds ratio")
ax.set_title('Gamma VS Delta')
ax.set_xlim(0.5, 3.5)

# Adding a legend for better understanding
ax.legend(['Odds Ratio'])

# Adding a grid to the x-axis for better readability
ax.grid(axis='x', linestyle='--', alpha=0.6)

plt.show()

# Additional information about variant cases for each region
crosstab_gamma_delta_REGION = pd.crosstab(df_temp['Regiao_nome'], df_temp['VARIANTE_COVID'])
crosstab_gamma_delta_REGION.rename(columns={0: "DELTA", 1: "GAMMA"}, inplace=True)
print()
print("Number of variant cases for each region:")
print(crosstab_gamma_delta_REGION)

## Table 3: Association of Clinical features of SARS-CoV-2 infection at admission and adverse maternal outcomes, by SARS-COV-2 VOCs,  in Brazil and regions.  (one table fo each outcome ICU admission, Intubation, maternal death).

Logistic regression for ICU admission

In [None]:
vars = ['FEBRE', 'TOSSE', 'GARGANTA', 'DISPNEIA', 'DESC_RESP', 'SATURACAO','DIARREIA', 'VOMITO', 'DOR_ABD', 'FADIGA', 'PERD_OLFT', 'PERD_PALA']
colunas = ['UTI', 'FEBRE', 'TOSSE', 'GARGANTA', 'DISPNEIA', 'DESC_RESP', 'SATURACAO', 'DIARREIA', 'VOMITO', 'DOR_ABD', 'FADIGA', 'PERD_OLFT', 'PERD_PALA']

In [None]:
#df['UTI'] = df['UTI'].replace({"True": 1, "False": 2}) # 1 : YES , 2 : NO
df_temp = df.copy()  
for coluna in colunas:
    df_temp = df_temp.loc[df_temp[coluna] != 'Missing']
    
dic = {1:1, 2:0}
    
df_temp[colunas] = df_temp[colunas].applymap(lambda x: dic.get(x, x))

df_temp_gama = df_temp.loc[df_temp['VARIANTE_COVID'] == 1].copy()  

df_temp_delta = df_temp.loc[df_temp['VARIANTE_COVID'] == 2].copy()  

df_temp_omicrom = df_temp.loc[df_temp['VARIANTE_COVID'] == 3].copy()  

In [None]:
#creating Logistic Regression model only for p-values
models = {}
p_values = {}
for i, feature in enumerate(vars):
  with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        models[f"model_{i}"] = smf.logit(f"UTI ~ {feature}", data=df_temp).fit(disp=0)
        p_values[f"p_value_{i}"] = models[f"model_{i}"].pvalues[1]

lista_p_values = list(p_values.values())
lista_p_values = np.round(lista_p_values,4)

# Logistic Regression - Gamma

odds_ratios_gama = []
lowerCI_gama= []
upperCI_gama = []

for i, var in enumerate(vars):
    formula = f"UTI ~ {var}"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit(formula, data=df_temp_gama).fit(disp=0)

        odds_ratios_gama.append(np.exp(model.params[1]))
        lowerCI_gama.append(np.exp(model.conf_int()[0][1]))
        upperCI_gama.append(np.exp(model.conf_int()[1][1]))

or_gama = pd.DataFrame({"OR": odds_ratios_gama,"Lower CI": lowerCI_gama,"Upper CI": upperCI_gama})
or_gama.index = vars
or_gama = np.round(or_gama,2)
or_gama = or_gama.astype(str)
or_gama['OR GAMA (95% CI)'] = or_gama.apply(lambda x: x['OR'] + ' (' + x['Lower CI'] + ' ' + '-' + ' ' + x['Upper CI'] + ')', axis=1)
or_gama.drop(columns = ['OR','Lower CI','Upper CI'], inplace = True)

# Logistic Regression - Delta

odds_ratios_delta = []
lowerCI_delta= []
upperCI_delta = []

for i, var in enumerate(vars):
    formula = f"UTI ~ {var}"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit(formula, data=df_temp_delta).fit(disp=0)

        odds_ratios_delta.append(np.exp(model.params[1]))
        lowerCI_delta.append(np.exp(model.conf_int()[0][1]))
        upperCI_delta.append(np.exp(model.conf_int()[1][1]))

or_delta = pd.DataFrame({"OR": odds_ratios_delta,"Lower CI": lowerCI_delta,"Upper CI": upperCI_delta})
or_delta.index = vars
or_delta = np.round(or_delta,2)
or_delta = or_delta.astype(str)
or_delta['OR DELTA (95% CI)'] = or_delta.apply(lambda x: x['OR'] + ' (' + x['Lower CI'] + ' ' + '-' + ' ' + x['Upper CI'] + ')', axis=1)
or_delta.drop(columns = ['OR','Lower CI','Upper CI'], inplace = True)

# Logistic Regression - Omicrom

odds_ratios_omicrom = []
lowerCI_omicrom = []
upperCI_omicrom = []

for i, var in enumerate(vars):
    formula = f"UTI ~ {var}"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit(formula, data=df_temp_omicrom).fit(disp=0)

        odds_ratios_omicrom.append(np.exp(model.params[1]))
        lowerCI_omicrom.append(np.exp(model.conf_int()[0][1]))
        upperCI_omicrom.append(np.exp(model.conf_int()[1][1]))

or_omicrom = pd.DataFrame({"OR": odds_ratios_omicrom,"Lower CI": lowerCI_omicrom,"Upper CI": upperCI_omicrom})
or_omicrom.index = vars
or_omicrom = np.round(or_omicrom,2)
or_omicrom = or_omicrom.astype(str)
or_omicrom['OR OMICROM (95% CI)'] = or_omicrom.apply(lambda x: x['OR'] + ' (' + x['Lower CI'] + ' ' + '-' + ' ' + x['Upper CI'] + ')', axis=1)
or_omicrom.drop(columns = ['OR','Lower CI','Upper CI'], inplace = True)

or_UTI = pd.DataFrame({"OR GAMA (95% CI)":or_gama['OR GAMA (95% CI)'],"OR DELTA (95% CI)": or_delta['OR DELTA (95% CI)'],"OR OMICROM (95% CI)": or_omicrom['OR OMICROM (95% CI)']},index = vars)
or_UTI['p-value'] = list(lista_p_values)
or_UTI

Logistic regression for Death

In [None]:
df['EVOLUCAO'].value_counts()

In [None]:
df_temp = df.copy()  
for coluna in colunas:
    df_temp = df_temp.loc[df_temp[coluna] != 'Missing']
    
dic = {1:1, 2:0}
    
df_temp[colunas] = df_temp[colunas].applymap(lambda x: dic.get(x, x))

dic_evolucao = {1:1 ,0:0, 3:1, "Missing" : "Missing"} 
df_temp['EVOLUCAO']=df_temp['EVOLUCAO'].map(dic_evolucao)
df_temp = df_temp[df_temp['EVOLUCAO'] != 'Missing']
df_temp['EVOLUCAO'] = df_temp['EVOLUCAO'].astype(int)

df_temp_gama = df_temp.loc[df_temp['VARIANTE_COVID'] == 1].copy()  

df_temp_delta = df_temp.loc[df_temp['VARIANTE_COVID'] == 2].copy()  

df_temp_omicrom = df_temp.loc[df_temp['VARIANTE_COVID'] == 3].copy()   

In [None]:
#creating Logistic Regression model only for p-values
models = {}
p_values = {}
for i, feature in enumerate(vars):
  with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        models[f"model_{i}"] = smf.logit(f"EVOLUCAO ~ {feature}", data=df_temp).fit(disp=0)
        p_values[f"p_value_{i}"] = models[f"model_{i}"].pvalues[1]

lista_p_values = list(p_values.values())
lista_p_values = np.round(lista_p_values,4)

# Logistic Regression - Gamma

odds_ratios_gama = []
lowerCI_gama= []
upperCI_gama = []

for i, var in enumerate(vars):
    formula = f"EVOLUCAO ~ {var}"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit(formula, data=df_temp_gama).fit(disp=0)

        odds_ratios_gama.append(np.exp(model.params[1]))
        lowerCI_gama.append(np.exp(model.conf_int()[0][1]))
        upperCI_gama.append(np.exp(model.conf_int()[1][1]))

or_gama = pd.DataFrame({"OR": odds_ratios_gama,"Lower CI": lowerCI_gama,"Upper CI": upperCI_gama})
or_gama.index = vars
or_gama = np.round(or_gama,2)
or_gama = or_gama.astype(str)
or_gama['OR GAMA (95% CI)'] = or_gama.apply(lambda x: x['OR'] + ' (' + x['Lower CI'] + ' ' + '-' + ' ' + x['Upper CI'] + ')', axis=1)
or_gama.drop(columns = ['OR','Lower CI','Upper CI'], inplace = True)

# Logistic Regression - Delta

odds_ratios_delta = []
lowerCI_delta= []
upperCI_delta = []

for i, var in enumerate(vars):
    formula = f"EVOLUCAO ~ {var}"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit(formula, data=df_temp_delta).fit(disp=0)

        odds_ratios_delta.append(np.exp(model.params[1]))
        lowerCI_delta.append(np.exp(model.conf_int()[0][1]))
        upperCI_delta.append(np.exp(model.conf_int()[1][1]))

or_delta = pd.DataFrame({"OR": odds_ratios_delta,"Lower CI": lowerCI_delta,"Upper CI": upperCI_delta})
or_delta.index = vars
or_delta = np.round(or_delta,2)
or_delta = or_delta.astype(str)
or_delta['OR DELTA (95% CI)'] = or_delta.apply(lambda x: x['OR'] + ' (' + x['Lower CI'] + ' ' + '-' + ' ' + x['Upper CI'] + ')', axis=1)
or_delta.drop(columns = ['OR','Lower CI','Upper CI'], inplace = True)

# Logistic Regression - Omicrom

odds_ratios_omicrom = []
lowerCI_omicrom = []
upperCI_omicrom = []

for i, var in enumerate(vars):
    formula = f"EVOLUCAO ~ {var}"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = smf.logit(formula, data=df_temp_omicrom).fit(disp=0)

        odds_ratios_omicrom.append(np.exp(model.params[1]))
        lowerCI_omicrom.append(np.exp(model.conf_int()[0][1]))
        upperCI_omicrom.append(np.exp(model.conf_int()[1][1]))

or_omicrom = pd.DataFrame({"OR": odds_ratios_omicrom,"Lower CI": lowerCI_omicrom,"Upper CI": upperCI_omicrom})
or_omicrom.index = vars
or_omicrom = np.round(or_omicrom,2)
or_omicrom = or_omicrom.astype(str)
or_omicrom['OR OMICROM (95% CI)'] = or_omicrom.apply(lambda x: x['OR'] + ' (' + x['Lower CI'] + ' ' + '-' + ' ' + x['Upper CI'] + ')', axis=1)
or_omicrom.drop(columns = ['OR','Lower CI','Upper CI'], inplace = True)

or_OBITO = pd.DataFrame({"OR GAMA (95% CI)":or_gama['OR GAMA (95% CI)'],"OR DELTA (95% CI)": or_delta['OR DELTA (95% CI)'],"OR OMICROM (95% CI)": or_omicrom['OR OMICROM (95% CI)']},index = vars)
or_OBITO['p-value'] = list(lista_p_values)
or_OBITO

### To make a comparison with the general vaccination rate among cases, showing that the most severe cases were not vaccinated 

Analysis comparing vaccination coverage among cases of maternal death and ICU admission vs. patients who did not have either outcome for Gamma

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] == 1].copy()  #Gamma
df_temp.reset_index(inplace=True, drop=True)

#create a variable
df_temp['UTI_EVOLUCAO'] = df_temp['VARIANTE_COVID'] * np.nan

lista_obito = [1, 3]
for i in range(len(df_temp['VARIANTE_COVID'])):
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 1):  # Individuals who were admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 1):  # Individuals admitted to the ICU who did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 0
        
df_temp['UTI_EVOLUCAO'].fillna('Missing', inplace=True)
df_temp = df_temp[df_temp['UTI_EVOLUCAO'] != 'Missing']
df_temp['UTI_EVOLUCAO'] = df_temp['UTI_EVOLUCAO'].astype(int)
df_temp = df_temp[df_temp['VACINA_COV'] != 'Missing']
df_temp['VACINA_COV'] = df_temp['VACINA_COV'].replace({"No": 0, "Yes": 1}) 
df_temp.reset_index(inplace=True, drop=True)

In [None]:
# Logistic regression model
print(f"Logistic regression model:")
model = smf.logit("UTI_EVOLUCAO ~ VACINA_COV" , data=df_temp).fit(disp=False)
print(model.summary())

# Odds ratios
odds_ratios = pd.DataFrame(
    {
        "OR": model.params,
        "Lower CI": model.conf_int()[0],
        "Upper CI": model.conf_int()[1],
    }
)
odds_ratios = np.exp(odds_ratios)
odds_ratios = odds_ratios.iloc[1:3]

# Plot odds ratios
print()
print(f"Odds ratios for VACINA_COV:")
print(odds_ratios)
print()

# Crosstab
crosstab = pd.crosstab(df_temp["VACINA_COV"], df_temp["UTI_EVOLUCAO"])

crosstab.rename(columns={1: "YES", 0: "NO"}, index={0: "Otherwise", 1: "Several cases"}, inplace=True)

plt.figure(figsize=(8, 6))
ax = sn.heatmap(crosstab, annot=True, fmt="d", cmap="Blues", cbar=True)

# Add values to each cell
for i in range(len(crosstab)):
    for j in range(len(crosstab.columns)):
        ax.text(j + 0.5, i + 0.5, crosstab.iloc[i, j],
                ha='center', va='center', color='black', fontsize=10)

plt.xlabel("VACINA_COV")
plt.ylabel("UTI_EVOLUCAO")
plt.show()

Analysis comparing vaccination coverage among cases of maternal death and ICU admission vs. patients who did not have either outcome for Delta

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] == 2].copy()  #Delta
df_temp.reset_index(inplace=True, drop=True)

#create a variable
df_temp['UTI_EVOLUCAO'] = df_temp['VARIANTE_COVID'] * np.nan

lista_obito = [1, 3]
for i in range(len(df_temp['VARIANTE_COVID'])):
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 1):  # Individuals who were admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 1):  # Individuals admitted to the ICU who did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 0
        
df_temp['UTI_EVOLUCAO'].fillna('Missing', inplace=True)
df_temp = df_temp[df_temp['UTI_EVOLUCAO'] != 'Missing']
df_temp['UTI_EVOLUCAO'] = df_temp['UTI_EVOLUCAO'].astype(int)
df_temp = df_temp[df_temp['VACINA_COV'] != 'Missing']
df_temp['VACINA_COV'] = df_temp['VACINA_COV'].replace({"No": 0, "Yes": 1}) 
df_temp.reset_index(inplace=True, drop=True)

In [None]:
# Logistic regression model
print(f"Logistic regression model:")
model = smf.logit("UTI_EVOLUCAO ~ VACINA_COV" , data=df_temp).fit(disp=False)
print(model.summary())

# Odds ratios
odds_ratios = pd.DataFrame(
    {
        "OR": model.params,
        "Lower CI": model.conf_int()[0],
        "Upper CI": model.conf_int()[1],
    }
)
odds_ratios = np.exp(odds_ratios)
odds_ratios = odds_ratios.iloc[1:3]

# Plot odds ratios
print()
print(f"Odds ratios for VACINA_COV:")
print(odds_ratios)
print()

# Crosstab
crosstab = pd.crosstab(df_temp["VACINA_COV"], df_temp["UTI_EVOLUCAO"])

crosstab.rename(columns={1: "YES", 0: "NO"}, index={0: "Otherwise", 1: "Several cases"}, inplace=True)

plt.figure(figsize=(8, 6))
ax = sn.heatmap(crosstab, annot=True, fmt="d", cmap="Blues", cbar=True)

# Add values to each cell
for i in range(len(crosstab)):
    for j in range(len(crosstab.columns)):
        ax.text(j + 0.5, i + 0.5, crosstab.iloc[i, j],
                ha='center', va='center', color='black', fontsize=10)

plt.xlabel("VACINA_COV")
plt.ylabel("UTI_EVOLUCAO")
plt.show()

Analysis comparing vaccination coverage among cases of maternal death and ICU admission vs. patients who did not have either outcome for Omicrom

In [None]:
df_temp = df.loc[df['VARIANTE_COVID'] == 3].copy()  #Omicrom
df_temp.reset_index(inplace=True, drop=True)

#create a variable
df_temp['UTI_EVOLUCAO'] = df_temp['VARIANTE_COVID'] * np.nan

lista_obito = [1, 3]
for i in range(len(df_temp['VARIANTE_COVID'])):
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] in lista_obito) and (df_temp['UTI'][i] == 1):  # Individuals who were admitted to the ICU and died
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 1):  # Individuals admitted to the ICU who did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 1
    if (df_temp['EVOLUCAO'][i] == 0) and (df_temp['UTI'][i] == 0):  # Individuals who were not admitted to the ICU and did not die
        df_temp.loc[i, 'UTI_EVOLUCAO'] = 0
        
df_temp['UTI_EVOLUCAO'].fillna('Missing', inplace=True)
df_temp = df_temp[df_temp['UTI_EVOLUCAO'] != 'Missing']
df_temp['UTI_EVOLUCAO'] = df_temp['UTI_EVOLUCAO'].astype(int)
df_temp = df_temp[df_temp['VACINA_COV'] != 'Missing']
df_temp['VACINA_COV'] = df_temp['VACINA_COV'].replace({"No": 0, "Yes": 1}) 
df_temp.reset_index(inplace=True, drop=True)

In [None]:
# Logistic regression model
print(f"Logistic regression model:")
model = smf.logit("UTI_EVOLUCAO ~ VACINA_COV" , data=df_temp).fit(disp=False)
print(model.summary())

# Odds ratios
odds_ratios = pd.DataFrame(
    {
        "OR": model.params,
        "Lower CI": model.conf_int()[0],
        "Upper CI": model.conf_int()[1],
    }
)
odds_ratios = np.exp(odds_ratios)
odds_ratios = odds_ratios.iloc[1:3]

# Plot odds ratios
print()
print(f"Odds ratios for VACINA_COV:")
print(odds_ratios)
print()

# Crosstab
crosstab = pd.crosstab(df_temp["VACINA_COV"], df_temp["UTI_EVOLUCAO"])

crosstab.rename(columns={1: "YES", 0: "NO"}, index={0: "Otherwise", 1: "Several cases"}, inplace=True)

plt.figure(figsize=(8, 6))
ax = sn.heatmap(crosstab, annot=True, fmt="d", cmap="Blues", cbar=True)

# Add values to each cell
for i in range(len(crosstab)):
    for j in range(len(crosstab.columns)):
        ax.text(j + 0.5, i + 0.5, crosstab.iloc[i, j],
                ha='center', va='center', color='black', fontsize=10)

plt.xlabel("VACINA_COV")
plt.ylabel("UTI_EVOLUCAO")
plt.show()

**Final considerations for the research**




*   There is no information to perform the logistic regression for intubation (I couldn't find the variable in the data dictionary)
*   No information on Neonatal severe outcomes was found in the data dictionary
*   No information in the data dictionary to build table 4
*   No information on Pregnancy outcomes was found in the data dictionary







