In [2]:
import numpy as np
from math import sqrt 
from statsmodels.stats.power import TTestIndPower 

# Tutorial I found most helpful for power analysis: 
# https://www.geeksforgeeks.org/introduction-to-power-analysis-in-python/

no_lig = np.array([16394000.0, 11548300.0, 16756800.0, 15643300.0]) # These were the values using the no ligand SASA based structure data # ([16829900.0, 10818400.0, 5520290.0])
lig = np.array([54221300.0, 5043110.0, 34082400.0, 25628200.0]) # These were the values using the no ligand SASA based structure data # ([23684500.0, 10786000.0, 15460900.0])

#calculation of effect size 
# size of samples in pilot study 
n1, n2 = len(lig), len(no_lig)
  
# variance of samples in pilot study 
s1, s2 = np.std(lig)**2, np.std(no_lig)**2 
  
# calculate the pooled standard deviation  
# (Cohen's d) 
s = sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2)) 
  
# means of the samples 
u1, u2 = np.mean(lig), np.mean(no_lig)
  
# calculate the effect size 
d = (u1 - u2) / s 
print(f'Effect size: {d}') 
  
# factors for power analysis 
alpha = 0.05
power = 0.8
  
# perform power analysis to find sample size  
# for given effect 
obj = TTestIndPower() 
n = obj.solve_power(effect_size=d, alpha=alpha, power=power, 
                    ratio=1, alternative='two-sided') 

print('Sample size/Number needed in each group: {:.3f}'.format(n))

Effect size: 1.1668963053065435
Sample size/Number needed in each group: 12.565
