# Inferential statistics

## Importing useful libraries

In [1]:
# Importing the pandas library for data manipulation
import pandas as pd

# Import numpy library for efficient numeric operations
import numpy as np

# Import statistics library for basic statistical functions
import statistics as st

# Import the chisquare function to perform a chi-square test.
from scipy.stats import chisquare, chi2_contingency

# Import the fisher exact test.
from scipy.stats import fisher_exact

## Importing the data used

In [2]:
df = pd.read_excel('dados_cancerpositivo.xlsx', index_col = 'Número')
df

Unnamed: 0_level_0,Medical record,Date of collection,Histopathological,Estrogen receptor expression,Progesterone receptor expression,HER2-amplified,KI67,Molecular subtype,Tumor size,Grade,...,Menopausal status,Weight,Height,BMI,Exposure to pesticides,Chemoresistance,Recurrence,Death,Municipality,HER
Número,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,26214,2015-05-27,1.0,1.0,1.0,0.0,1.0,2.0,20.0,1.0,...,1.0,57.0,1.60,22.265625,1.0,0.0,0.0,0.0,Dois Vizinhos,
3,24773,2015-04-06,1.0,0.0,0.0,0.0,1.0,5.0,14.0,2.0,...,1.0,84.0,1.62,32.007316,1.0,0.0,0.0,0.0,Capanema,
5,26248,2015-06-08,1.0,0.0,0.0,0.0,1.0,5.0,30.0,2.0,...,1.0,64.0,1.52,27.700831,1.0,1.0,1.0,0.0,Planalto,
7,25778,2015-06-10,1.0,1.0,1.0,1.0,1.0,4.0,25.0,1.0,...,0.0,52.0,1.55,21.644121,1.0,1.0,0.0,0.0,Dois Vizinhos,
8,15847,2015-06-10,1.0,1.0,1.0,0.0,1.0,2.0,16.0,2.0,...,1.0,52.0,1.55,21.644121,1.0,1.0,1.0,1.0,Dois Vizinhos,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,44730,2022-12-06,1.0,1.0,1.0,1.0,1.0,4.0,15.0,2.0,...,,,,,,0.0,0.0,0.0,,
942,46377,2022-12-23,1.0,1.0,1.0,0.0,0.0,1.0,17.0,1.0,...,,,,,,0.0,0.0,0.0,,
946,44770,2023-02-08,1.0,1.0,0.0,0.0,1.0,2.0,8.0,3.0,...,,,,,,0.0,0.0,0.0,,
948,45672,2023-02-09,1.0,1.0,1.0,0.0,1.0,1.0,15.0,2.0,...,,,,,,0.0,0.0,0.0,,


Changing the nomenclature of molecular subtypes to perform the chi-square test. 
For this analysis, data on molecular subtypes are organized as follows: Luminal A, Luminal B, HER2 and Triple negative, previously organized as 1,2,4 and 5 respectively.

In [3]:
df[df.columns[7]].replace(1, 'Luminal A', inplace=True)
df[df.columns[7]].replace(2, 'B Luminal', inplace=True)
df[df.columns[7]].replace(4, 'HER2-amplified', inplace=True)
df[df.columns[7]].replace(5, 'Triple-negative', inplace=True)
df[df.columns[7]].value_counts()

B Luminal          122
Luminal A          118
HER2-amplified      59
Triple-negative     56
Name: Molecular subtype, dtype: int64

In [4]:
df_copia = df.copy()

## Defining the populations of interest

### Separating the population exposed to pesticides from the total population

In [5]:
exposto = df[df["Exposure to pesticides"]==1]

### Separating the population not exposed to pesticides from the total population

In [6]:
n_exposto = df[df["Exposure to pesticides"]==0]

## Defining the functions for calculating chi-square

In [8]:
def print_do_p_value(p: np.array) -> list:
    return_list = ["<0.01" if i < 0.01 else str(i.round(2)) for i in p]
    return return_list

In [9]:
def teste_estatistico(df_retorno: pd.DataFrame, index_teste: list) -> list:
    obs = np.array(df_retorno.iloc[:,index_teste]).T
    _, p = chisquare(obs)
    return print_do_p_value(p)

### Function for calculation using the average and frequency of RISK STRATIFICATION

In [10]:
''' To calculate the comparison with risk stratification in parameters that use the average'''

def tabela_freq_med(df: pd.DataFrame, coluna_grupo: list) -> pd.DataFrame:
    df_retorno = round(df.loc[:, coluna_grupo_med].groupby([coluna_filter_1]).mean().T.copy(),2)
    
    index_df_1 = df_retorno.columns[0][0]+"/"+df_retorno.columns[1][0]
    index_df_2 = df_retorno.columns[1][0]+"/"+df_retorno.columns[2][0]
    index_df_3 = df_retorno.columns[0][0]+"/"+df_retorno.columns[2][0]
    
    df_retorno[index_df_1] = teste_estatistico(df_retorno, [0,1])
    df_retorno[index_df_2] = teste_estatistico(df_retorno, [1,2])
    df_retorno[index_df_3] = teste_estatistico(df_retorno, [0,2])
    
    return df_retorno

In [11]:
''' To calculate the comparison with risk stratification in binary parameters'''

def tabela_freq_soma(df: pd.DataFrame, coluna_grupo: list) -> pd.DataFrame:
    df_retorno = df.loc[:, coluna_grupo_sum].groupby([coluna_filter_1]).sum().T.copy()
    
    index_df_1 = df_retorno.columns[0][0]+"/"+df_retorno.columns[1][0]
    index_df_2 = df_retorno.columns[1][0]+"/"+df_retorno.columns[2][0]
    index_df_3 = df_retorno.columns[0][0]+"/"+df_retorno.columns[2][0]
    
    df_retorno[index_df_1] = teste_estatistico(df_retorno, [0,1])
    df_retorno[index_df_2] = teste_estatistico(df_retorno, [1,2])
    df_retorno[index_df_3] = teste_estatistico(df_retorno, [0,2])
    
    return df_retorno

### Function for calculation using the average and frequency of MOLECULAR SUBTYPES

In [12]:
''' To calculate the comparison with subtypes with parameters that use the average '''

def tabela_freq_med_sub(df: pd.DataFrame, coluna_grupo_med: list) -> pd.DataFrame:
    df_retorno = round(df.loc[:, coluna_grupo_med].groupby([coluna_filter_2]).mean().T.copy(),2)
    
    index_df_1 = df_retorno.columns[0][0]+"/"+df_retorno.columns[1][0]
    index_df_2 = df_retorno.columns[1][0]+"/"+df_retorno.columns[2][0]
    index_df_3 = df_retorno.columns[2][0]+"/"+df_retorno.columns[3][0]
    index_df_4 = df_retorno.columns[3][0]+"/"+df_retorno.columns[0][0]
    index_df_5 = df_retorno.columns[1][0]+"/"+df_retorno.columns[3][0]
    index_df_6 = df_retorno.columns[2][0]+"/"+df_retorno.columns[0][0]
    
    df_retorno[index_df_1] = teste_estatistico(df_retorno, [0,1])
    df_retorno[index_df_2] = teste_estatistico(df_retorno, [1,2])
    df_retorno[index_df_3] = teste_estatistico(df_retorno, [2,3])
    df_retorno[index_df_4] = teste_estatistico(df_retorno, [3,0])
    df_retorno[index_df_5] = teste_estatistico(df_retorno, [1,3])
    df_retorno[index_df_6] = teste_estatistico(df_retorno, [2,0])
    
    return df_retorno

In [13]:
''' To calculate comparison with subtypes in binary parameters'''

def tabela_freq_soma_sub(df: pd.DataFrame, coluna_grupo_sum: list) -> pd.DataFrame:
    df_retorno = df.loc[:, coluna_grupo_sum].groupby([coluna_filter_2]).sum().T.copy()
    
    index_df_1 = df_retorno.columns[0][0]+"/"+df_retorno.columns[1][0]
    index_df_2 = df_retorno.columns[1][0]+"/"+df_retorno.columns[2][0]
    index_df_3 = df_retorno.columns[2][0]+"/"+df_retorno.columns[3][0]
    index_df_4 = df_retorno.columns[3][0]+"/"+df_retorno.columns[0][0]
    index_df_5 = df_retorno.columns[1][0]+"/"+df_retorno.columns[3][0]
    index_df_6 = df_retorno.columns[2][0]+"/"+df_retorno.columns[0][0]
    
    df_retorno[index_df_1] = teste_estatistico(df_retorno, [0,1])
    df_retorno[index_df_2] = teste_estatistico(df_retorno, [1,2])
    df_retorno[index_df_3] = teste_estatistico(df_retorno, [2,3])
    df_retorno[index_df_4] = teste_estatistico(df_retorno, [3,0])
    df_retorno[index_df_5] = teste_estatistico(df_retorno, [1,3])
    df_retorno[index_df_6] = teste_estatistico(df_retorno, [2,0])
    
    return df_retorno

### Function for calculation using the frequency of EXPOSURE TO PESTICIDES

In [14]:
''' To calculate the comparison with exposure to pesticides in binary parameters'''

def tabela_freq_soma_teste(df: pd.DataFrame, coluna_grupo: list) -> pd.DataFrame:
    df_retorno = df.loc[:, coluna_grupo_sum].groupby([coluna_filter_3]).sum().T.copy()
    
    index_df_1 = str(df_retorno.columns[0])+ "/"+ str(df_retorno.columns[1])
    
    df_retorno[index_df_1] = teste_estatistico(df_retorno, [0,1])
    
    return df_retorno

### Separating the filters used in the function

In [15]:
# Separating the parameters of interest for comparison
coluna_filter_1 = "Risk stratification"
coluna_filter_2 = "Molecular subtype"
coluna_filter_3 = 'Exposure to pesticides'

# Separating the parameters used for calculation using AVERAGE and FREQUENCY
coluna_grupo_med = list(df.columns[[8, 14,16, 17, 18, 7, 13]].values.tolist())
coluna_grupo_sum = list(df.columns[[3,4,5,6,10,11,12,15,19,20,21,22, 7, 13]].values.tolist())

## Defining the functions for calculating fisher exact test

Fisher's exact test was calculated only for parameters with a patient frequency of less than 5.

In [16]:
def teste_fisher(data):
# performing fishers exact test on the data
    odd_ratio, p_value = fisher_exact(data)
    print ('odd ratio is : ' + str(round(odd_ratio,2)) + ' p_value is : ' + str(round(p_value,2)))

## Calculating the Chi-square test

In the Chi-square test of independence, the mean of quantitative parameters and the frequency of binary parameters were evaluated for an exploratory nature. Only results with clinical significance were reported in the article.

### Calculating for the population exposed to pesticides

#### Are patients exposed to pesticides different from each other according to RISK STRATIFICATION?

In [17]:
comp_exposto_soma = tabela_freq_soma(exposto, coluna_grupo_sum)
comp_exposto_soma

  terms = (f_obs_float - f_exp)**2 / f_exp


Risk stratification,High,Low,Medium,H/L,L/M,H/M
Estrogen receptor expression,25.0,15.0,83.0,0.11,<0.01,<0.01
Progesterone receptor expression,16.0,10.0,54.0,0.24,<0.01,<0.01
HER2-amplified,23.0,0.0,0.0,<0.01,,<0.01
KI67,46.0,2.0,62.0,<0.01,<0.01,0.12
Angiolymphatic emboli,24.0,1.0,24.0,<0.01,<0.01,1.0
Lymph node,33.0,0.0,31.0,<0.01,<0.01,0.8
Distant metastasis,37.0,0.0,35.0,<0.01,<0.01,0.81
Menopausal status,41.0,11.0,66.0,<0.01,<0.01,0.02
Exposure to pesticides,72.0,15.0,95.0,<0.01,<0.01,0.08
Chemoresistance,21.0,1.0,18.0,<0.01,<0.01,0.63


In [18]:
comp_exposto_med = tabela_freq_med(exposto, coluna_grupo_med)
comp_exposto_med

Risk stratification,High,Low,Medium,H/L,L/M,H/M
Tumor size,38.44,12.07,27.35,<0.01,0.01,0.17
Age at diagnosis,54.1,58.8,56.74,0.66,0.85,0.8
Weight,72.88,66.55,72.13,0.59,0.64,0.95
Height,1.6,1.61,1.59,1.0,0.99,1.0
BMI,28.0,26.21,28.11,0.81,0.8,0.99


#### Are patients with exposure to pesticides distinct in comparison to MOLECULAR SUBTYPE?

In [19]:
comp_exposto_soma_sm_alt = tabela_freq_soma_sub(exposto, coluna_grupo_sum)
comp_exposto_soma_sm_alt

  terms = (f_obs_float - f_exp)**2 / f_exp


Molecular subtype,B Luminal,HER2-amplified,Luminal A,Triple-negative,B/H,H/L,L/T,T/B,H/T,L/B
Estrogen receptor expression,65.0,10.0,58.0,0.0,<0.01,<0.01,<0.01,<0.01,<0.01,0.53
Progesterone receptor expression,38.0,8.0,43.0,0.0,<0.01,<0.01,<0.01,<0.01,<0.01,0.58
HER2-amplified,0.0,24.0,0.0,0.0,<0.01,<0.01,,,<0.01,
KI67,66.0,27.0,8.0,19.0,<0.01,<0.01,0.03,<0.01,0.24,<0.01
Angiolymphatic emboli,18.0,6.0,15.0,13.0,0.01,0.05,0.71,0.37,0.11,0.6
Lymph node,30.0,9.0,19.0,10.0,<0.01,0.06,0.09,<0.01,0.82,0.12
Distant metastasis,30.0,13.0,22.0,11.0,<0.01,0.13,0.06,<0.01,0.68,0.27
Menopausal status,46.0,14.0,42.0,25.0,<0.01,<0.01,0.04,0.01,0.08,0.67
Exposure to pesticides,66.0,33.0,61.0,38.0,<0.01,<0.01,0.02,<0.01,0.55,0.66
Chemoresistance,15.0,10.0,8.0,11.0,0.32,0.64,0.49,0.43,0.83,0.14


In [20]:
# Saving the tables obtained for the population exposed to pesticides
#tabela_exp = comp_exposto_soma_sm_alt.to_excel("tabela_qquadrado_expost_alt.xlsx")

In [21]:
comp_exposto_med_sm = tabela_freq_med_sub(exposto, coluna_grupo_med)
comp_exposto_med_sm

Molecular subtype,B Luminal,HER2-amplified,Luminal A,Triple-negative,B/H,H/L,L/T,T/B,H/T,L/B
Tumor size,33.0,34.24,23.85,33.35,0.88,0.17,0.21,0.97,0.91,0.22
Age at diagnosis,57.48,51.72,56.92,55.47,0.58,0.62,0.89,0.85,0.72,0.96
Weight,70.76,75.98,68.38,73.9,0.67,0.53,0.64,0.79,0.87,0.84
Height,1.6,1.62,1.59,1.6,0.99,0.99,1.0,1.0,0.99,1.0
BMI,27.03,28.81,27.27,28.59,0.81,0.84,0.86,0.83,0.98,0.97


 ## Calculating Fisher's exact test only for parameters with patient frequency less than 5.

In [22]:
recorrencia = 'Recurrence'
obito = 'Death'
subtipo = 'Molecular subtype'

Calculation for recurrence

In [23]:
tab_reco = pd.crosstab(exposto[subtipo], exposto[recorrencia])
tab_reco

Recurrence,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,59,7
HER2-amplified,27,5
Luminal A,56,4
Triple-negative,33,5


In [24]:
# Testing the Luminal B and Luminal A comparison
teste_fisher(tab_reco.iloc[[0,2],:])

odd ratio is : 0.6 p_value is : 0.54


Calculation for death

In [25]:
tab_obito =  pd.crosstab(exposto[subtipo], exposto[obito])
tab_obito

Death,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,63,3
HER2-amplified,31,2
Luminal A,59,2
Triple-negative,30,8


In [26]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_obito.iloc[[0,1],:])

odd ratio is : 1.35 p_value is : 1.0


In [27]:
# Testing the Luminal B and Luminal A comparison
teste_fisher(tab_obito.iloc[[0,2],:])

odd ratio is : 0.71 p_value is : 1.0


In [28]:
# Testing the Luminal B and Triple-negative comparison
teste_fisher(tab_obito.iloc[[0,3],:])

odd ratio is : 5.6 p_value is : 0.02


### Calculating for the population not exposed to pesticides

#### Are patients not exposed to pesticides different from each other according to RISK STRATIFICATION?

In [29]:
comp_n_exposto_soma = tabela_freq_soma(n_exposto, coluna_grupo_sum)
comp_n_exposto_soma

  terms = (f_obs_float - f_exp)**2 / f_exp


Risk stratification,High,Low,Medium,H/L,L/M,H/M
Estrogen receptor expression,15.0,10.0,70.0,0.32,<0.01,<0.01
Progesterone receptor expression,10.0,9.0,51.0,0.82,<0.01,<0.01
HER2-amplified,14.0,0.0,2.0,<0.01,0.16,<0.01
KI67,31.0,1.0,42.0,<0.01,<0.01,0.2
Angiolymphatic emboli,14.0,1.0,15.0,<0.01,<0.01,0.85
Lymph node,16.0,0.0,17.0,<0.01,<0.01,0.86
Distant metastasis,17.0,0.0,19.0,<0.01,<0.01,0.74
Menopausal status,24.0,7.0,48.0,<0.01,<0.01,<0.01
Exposure to pesticides,0.0,0.0,0.0,,,
Chemoresistance,9.0,0.0,14.0,<0.01,<0.01,0.3


In [30]:
comp_n_exposto_med = tabela_freq_med(n_exposto, coluna_grupo_med)
comp_n_exposto_med

Risk stratification,High,Low,Medium,H/L,L/M,H/M
Tumor size,40.34,18.2,24.48,<0.01,0.34,0.05
Age at diagnosis,53.97,60.89,56.73,0.52,0.7,0.79
Weight,70.24,74.25,73.31,0.74,0.94,0.8
Height,1.61,1.63,1.6,0.99,0.99,1.0
BMI,27.5,26.86,28.3,0.93,0.85,0.91


#### Are patients without exposure to pesticides distinct in comparison to MOLECULAR SUBTYPE?

In [31]:
comp_n_exposto_soma_sm_alt = tabela_freq_soma_sub(n_exposto, coluna_grupo_sum)
comp_n_exposto_soma_sm_alt

  terms = (f_obs_float - f_exp)**2 / f_exp


Molecular subtype,B Luminal,HER2-amplified,Luminal A,Triple-negative,B/H,H/L,L/T,T/B,H/T,L/B
Estrogen receptor expression,47.0,8.0,51.0,0.0,<0.01,<0.01,<0.01,<0.01,<0.01,0.69
Progesterone receptor expression,36.0,7.0,35.0,0.0,<0.01,<0.01,<0.01,<0.01,<0.01,0.91
HER2-amplified,0.0,18.0,0.0,0.0,<0.01,<0.01,,,<0.01,
KI67,46.0,18.0,6.0,14.0,<0.01,0.01,0.07,<0.01,0.48,<0.01
Angiolymphatic emboli,16.0,4.0,7.0,4.0,<0.01,0.37,0.37,<0.01,1.0,0.06
Lymph node,15.0,3.0,9.0,5.0,<0.01,0.08,0.29,0.03,0.48,0.22
Distant metastasis,16.0,4.0,10.0,5.0,<0.01,0.11,0.2,0.02,0.74,0.24
Menopausal status,25.0,9.0,39.0,12.0,<0.01,<0.01,<0.01,0.03,0.51,0.08
Exposure to pesticides,0.0,0.0,0.0,0.0,,,,,,
Chemoresistance,11.0,3.0,6.0,3.0,0.03,0.32,0.32,0.03,1.0,0.23


In [32]:
# Saving the tables obtained for the population exposed to pesticides
#tabela_n_exp = comp_n_exposto_soma_sm_alt.to_excel("tabela_qquadrado_naoexpost_alt.xlsx")

In [33]:
comp_n_exposto_med_sm = tabela_freq_med_sub(n_exposto, coluna_grupo_med)
comp_n_exposto_med_sm

Molecular subtype,B Luminal,HER2-amplified,Luminal A,Triple-negative,B/H,H/L,L/T,T/B,H/T,L/B
Tumor size,27.43,28.95,21.96,44.38,0.84,0.33,<0.01,0.05,0.07,0.44
Age at diagnosis,54.63,50.89,58.46,55.88,0.72,0.47,0.81,0.91,0.63,0.72
Weight,72.72,63.94,74.87,75.34,0.45,0.35,0.97,0.83,0.33,0.86
Height,1.59,1.58,1.62,1.64,1.0,0.98,0.99,0.98,0.97,0.99
BMI,27.88,25.58,28.43,28.66,0.75,0.7,0.98,0.92,0.68,0.94


## Calculating Fisher's exact test only for parameters with patient frequency less than 5.

In [34]:
metastasi = 'Distant metastasis'
menopausa = 'Menopausal status'
linfonodo = 'Lymph node'
embolo = 'Angiolymphatic emboli'
quimio = 'Chemoresistance'
recorrencia = 'Recurrence'
obito = 'Death'
subtipo = 'Molecular subtype'

Calculating angiolymphatic emboli

In [35]:
tab_embolon = pd.crosstab(n_exposto[subtipo], n_exposto[embolo])
tab_embolon

Angiolymphatic emboli,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,29,16
HER2-amplified,16,4
Luminal A,42,7
Triple-negative,11,4


In [36]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_embolon.iloc[[0,1],:])

odd ratio is : 0.45 p_value is : 0.26


In [37]:
# Testing the Luminal B and Triple-negative comparison
teste_fisher(tab_embolon.iloc[[0,3],:])

odd ratio is : 0.66 p_value is : 0.75


Calculating lymph node

In [38]:
tab_linfonodon = pd.crosstab(n_exposto[subtipo], n_exposto[linfonodo])
tab_linfonodon

Lymph node,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,24,15
HER2-amplified,7,3
Luminal A,31,9
Triple-negative,7,5


In [39]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_linfonodon.iloc[[0,1],:])

odd ratio is : 0.69 p_value is : 0.73


Calculating metastasis

In [40]:
tab_metastasen = pd.crosstab(n_exposto[subtipo], n_exposto[metastasi])
tab_metastasen

Distant metastasis,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,22,16
HER2-amplified,9,4
Luminal A,28,10
Triple-negative,7,5


In [41]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_metastasen.iloc[[0,1],:])

odd ratio is : 0.61 p_value is : 0.53


Calculating chemoresistance

In [42]:
tab_quimion = pd.crosstab(n_exposto[subtipo], n_exposto[quimio])
tab_quimion

Chemoresistance,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,36,11
HER2-amplified,18,3
Luminal A,44,6
Triple-negative,13,3


In [43]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_quimion.iloc[[0,1],:])

odd ratio is : 0.55 p_value is : 0.52


In [44]:
# Testing the Luminal B and Triple-negative comparison
teste_fisher(tab_quimion.iloc[[0,3],:])

odd ratio is : 0.76 p_value is : 1.0


Calculating recurrence

In [45]:
tab_recon = pd.crosstab(n_exposto[subtipo], n_exposto[recorrencia])
tab_recon

Recurrence,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,42,5
HER2-amplified,19,2
Luminal A,47,4
Triple-negative,13,3


In [48]:
# Testing the Luminal B and Luminal A comparison
teste_fisher(tab_recon.iloc[[0,2],:])

odd ratio is : 0.71 p_value is : 0.73


In [49]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_recon.iloc[[0,1],:])

odd ratio is : 0.88 p_value is : 1.0


In [50]:
# Testing the Luminal B and Triple-negative comparison
teste_fisher(tab_recon.iloc[[0,3],:])

odd ratio is : 1.94 p_value is : 0.41


Calculating death

In [51]:
tab_obiton = pd.crosstab(n_exposto[subtipo], n_exposto[obito])
tab_obiton

Death,0.0,1.0
Molecular subtype,Unnamed: 1_level_1,Unnamed: 2_level_1
B Luminal,45,2
HER2-amplified,21,0
Luminal A,51,0
Triple-negative,10,6


In [52]:
# Testing the Luminal B and Luminal A comparison
teste_fisher(tab_obiton.iloc[[0,2],:])

odd ratio is : 0.0 p_value is : 0.23


In [53]:
# Testing the Luminal B and HER2-amplified comparison
teste_fisher(tab_obiton.iloc[[0,1],:])

odd ratio is : 0.0 p_value is : 1.0


In [54]:
# Testing the Luminal B and Triple-negative comparison
teste_fisher(tab_obiton.iloc[[0,3],:])

odd ratio is : 13.5 p_value is : 0.0
