Skip to content
This repository has been archived by the owner on Oct 29, 2020. It is now read-only.

Commit

Permalink
Merge pull request #93 from NeuroDataDesign/ananyas
Browse files Browse the repository at this point in the history
Two sample tests
  • Loading branch information
tpsatish95 committed Dec 14, 2018
2 parents 3da3bef + 0cca8da commit 898690f
Show file tree
Hide file tree
Showing 9 changed files with 39,920 additions and 0 deletions.
9,399 changes: 9,399 additions & 0 deletions demos/salary_csv.csv

Large diffs are not rendered by default.

20,424 changes: 20,424 additions & 0 deletions demos/two_sample_demo_notebook.ipynb

Large diffs are not rendered by default.

457 changes: 457 additions & 0 deletions mgcpy/benchmarks/Power Curves for Two-Sample Tests.ipynb

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions mgcpy/benchmarks/power_two_sample.py
@@ -0,0 +1,59 @@
import numpy as np
import math
from mgcpy.independence_tests.dcorr import DCorr
from transform_matrices import Transform_Matrices
from scipy.ndimage import rotate

def power(independence_test, sample_generator, num_samples=100, theta=0, num_dimensions=2, noise=0.0, repeats=1000, alpha=.05):
'''
Estimate power
:param independence_test: an object whose class inherits from the Independence_Test abstract class
:type: Object(Independence_Test)
:param sample_generator: a function used to generate simulation from simulations.py with parameters given by the following arguments
- num_samples: default to 100
- num_dimensions: default to 1
- noise: default to 0
:type: function
:param num_samples: the number of samples generated by the simulation
:type: int
:param num_dimensions: the number of dimensions of the samples generated by the simulation
:type: int
:param noise: the noise used in simulation
:type: float
:param repeats: the number of times we generate new samples to estimate the null/alternative distribution
:type: int
:param alpha: the type I error level
:type: float
:return empirical_power: the estimated power
:type: float
'''
def compute_distance_matrix(data_matrix_X, data_matrix_Y):
# obtain the pairwise distance matrix for X and Y
dist_mtx_X = squareform(pdist(data_matrix_X, metric='euclidean'))
dist_mtx_Y = squareform(pdist(data_matrix_Y, metric='euclidean'))
return (dist_mtx_X, dist_mtx_Y)

# test statistics under the null, used to estimate the cutoff value under the null distribution
test_stats_null = np.zeros(repeats)
# test statistic under the alternative
test_stats_alternative = np.zeros(repeats)
for rep in range(repeats):
# generate new samples for each iteration
X, Y = sample_generator(num_samples, num_dimensions, noise)
data_matrix_X=Transform_Matrices(X,Y)[0]
data_matrix_Y=Transform_Matrices(X,Y)[1]
data_matrix_Y=data_matrix_Y[:,np.newaxis]
data_matrix_X=rotate(data_matrix_X,theta)
# data_matrix_X,data_matrix_Y=compute_distance_matrix(X,Y)
# permutation test
permuted_y = np.random.permutation(data_matrix_Y)
test_stats_null[rep], _ = independence_test.test_statistic(data_matrix_X=data_matrix_X, data_matrix_Y=permuted_y)
test_stats_alternative[rep], _ = independence_test.test_statistic(data_matrix_X=data_matrix_X, data_matrix_Y=data_matrix_Y)

# the cutoff is determined so that 1-alpha of the test statistics under the null distribution
# is less than the cutoff
cutoff = np.sort(test_stats_null)[math.ceil(repeats*(1-alpha))]
# the proportion of test statistics under the alternative which is no less than the cutoff (in which case
# the null is rejected) is the empirical power
empirical_power = np.where(test_stats_alternative >= cutoff)[0].shape[0] / repeats
return empirical_power

0 comments on commit 898690f

Please sign in to comment.