In [0]:
!pip install hypothesize

In [0]:
from hypothesize.utilities import create_example_data

### How to compare two groups

#### Load data from a CSV or create some random data

In [21]:
#df=pd.read_csv("/home/allan/two_groups_data.csv")
#df=pd.DataFrame(np.random.rand(30, 2), columns=['Group_1', 'Group_2'])
df=create_example_data(design_values=2)

df.head()

Unnamed: 0,cell_1,cell_2
0,0.329851,0.526237
1,0.606983,0.983393
2,0.810906,0.081501
3,0.423467,0.528037
4,0.925547,0.729418


#### Import the desired function and pass in the data for each group
- This example uses the bootstrapped-t method with 20% trimmed means
- The output is a dictionary containing the results (95% confidence interval, p_value, test statistics, etc...)

In [22]:
from hypothesize.compare_groups_with_single_factor import yuenbt

results=yuenbt(df.cell_1, df.cell_2)

print(results['ci'])

[-0.11603944424803112, 0.18613575064035434]


---

### How to compare three groups

#### Load data from a CSV or create some random data

In [23]:
import pandas as pd

#df=pd.read_csv("/home/allan/one_way_data.csv")
#df=pd.DataFrame(np.random.rand(30, 3), columns=['Group_1', 'Group_2', 'Group_3'])
df=create_example_data(design_values=3)

df.head()

Unnamed: 0,cell_1,cell_2,cell_3
0,0.73784,0.989855,0.773771
1,0.563954,0.803775,0.488092
2,0.455691,0.399078,0.766576
3,0.406155,0.990134,0.130478
4,0.707244,0.142606,0.76368


#### Import the desired functions and pass in the inputs
- One appraoch is to use a set of linear contrasts that will test all pairwise comparisons
- Then, the bootstrap-t method and the 20% trimmed mean can be used
- CIs are adjusted to control for FWE
- All pairwise contrasts can be created automatically using the `con1way` function
- The results are a dictionary of DataFrames that contain various statistics (p_value, CIs, standard error, test statistics, etc)

In [0]:
from hypothesize.compare_groups_with_single_factor import linconb
from hypothesize.utilities import con1way

results=linconb(df, con=con1way(3))

In [25]:
results['test']

Unnamed: 0,contrast_index,test,se,p_value
0,0.0,-0.772485,0.076008,0.474124
1,1.0,-1.145491,0.061955,0.268781
2,2.0,-0.151003,0.081146,0.883139


In [26]:
results['psihat']

Unnamed: 0,contrast_index,psihat,ci_low,ci_up
0,0.0,-0.058715,-0.240198,0.122768
1,1.0,-0.070968,-0.218896,0.07696
2,2.0,-0.012253,-0.206006,0.181499


---

### How to compare groups in a factorial design

#### Load data from a CSV or create some random data

In [27]:
import pandas as pd

#df=pd.read_csv("/home/allan/two_way_data.csv")
#df=pd.DataFrame(np.random.rand(30, 6), columns=['Cell_1_1', 'Cell_1_2', 'Cell_1_3', 'Cell_2_1', 'Cell_2_2', 'Cell_2_3'])
df=create_example_data(design_values=[2,3])

df.head()

Unnamed: 0,cell_1_1,cell_1_2,cell_1_3,cell_2_1,cell_2_2,cell_2_3
0,0.167022,0.614963,0.519649,0.228407,0.819594,0.556258
1,0.850018,0.927924,0.475344,0.294832,0.492162,0.463243
2,0.955372,0.957722,0.490048,0.981387,0.213012,0.161629
3,0.057583,0.803063,0.860049,0.464877,0.019053,0.377123
4,0.984513,0.858128,0.331574,0.067155,0.484409,0.870925


#### Import the desired function and pass in the data
- This example uses a 2-by-3 design
- One approach is to use a set of linear contrasts that will test all main effects and interactions
- Then, the bootstrap-t method and the 20% trimmed mean can be used
- The results are a dictionary of DataFrames that contain various statistics for each factor and the interactions

In [0]:
from hypothesize.compare_groups_with_two_factors import bwmcp

results=bwmcp(J=2, K=3, x=df)

In [29]:
results['factor_A']

Unnamed: 0,con_num,psihat,se,test,crit_value,p_value
0,0.0,-0.024799,0.119185,-0.208069,2.009311,0.834725


In [30]:
results['factor_B']

Unnamed: 0,con_num,psihat,se,test,crit_value,p_value
0,0.0,0.078105,0.121623,0.642185,2.345532,0.547579
1,1.0,0.045894,0.128648,0.356742,2.345532,0.707846
2,2.0,-0.03221,0.113123,-0.284738,2.345532,0.772955


In [31]:
results['factor_AB']

Unnamed: 0,con_num,psihat,se,test,crit_value,p_value
0,0.0,0.018329,0.121623,0.150703,2.326647,0.891486
1,1.0,-0.146916,0.128648,-1.142001,2.326647,0.252087
2,2.0,-0.165245,0.113123,-1.460755,2.326647,0.145242


---

### How to compute a robust correlation

#### Load data from a CSV or create some random data

In [32]:
import pandas as pd

#df=pd.read_csv("/home/allan/two_groups_data.csv")
#df=pd.DataFrame(np.random.rand(30, 2), columns=['Group_1', 'Group_2'])
df=create_example_data(design_values=2)

df.head()

Unnamed: 0,cell_1,cell_2
0,0.349846,0.166183
1,0.88076,0.933885
2,0.400145,0.462919
3,0.000669,0.765621
4,0.084667,0.729029


#### Import the desired function and pass in the data for each group
- One approach is to winsorize the x and y data
- A heteroscedastic method for testing zero correlation is also provided in this package but not shown here 
 - Please see the function `corb` which uses the percentile bootstrap to compute a 1-alpha CI and p_value for any correlation 
- The output is a dictionary containing various statistics (the winsorized correlation, winsorized covariance, etc...)

In [33]:
from hypothesize.measuring_associations import wincor

results=wincor(df.cell_1, df.cell_2)

results['wcor']

0.008829355421979232