# pyNetCor Demo 

## Create Data

In [38]:
import numpy as np

features = 1000
samples = 100
arr1 = np.random.random((features, samples))
arr2 = np.random.random((features, samples))

## Calculate correlation matrix

In [39]:
# Compute and return the full matrix at once.

from pynetcor.cor import corrcoef

cor_default_values = np.corrcoef(arr1)
cor_test_values = corrcoef(arr1)

assert np.allclose(cor_default_values, cor_test_values)

## Top-k correlation search

Identify the accurate top k correlations (Pearson correlation).

In [40]:
from pynetcor.cor import cor_topk

k = 15

# numpy implementation
cor_ref = np.corrcoef(arr1).flatten()
topk_order = np.argsort(-np.abs(cor_ref))[:k]
topk_values = cor_ref[topk_order]
val_ref = topk_values

# pyNetCor implementation
val = cor_topk(arr1, arr1, k=k)[:, 2]

assert np.allclose(val, val_ref)

## Top-k differential correlation search

Identify the accurate top k differences in correlation between pairs of features across two states or time points.

In [41]:
from pynetcor.cor import cor_topkdiff

k = 15

# numpy implementation
cm1 = np.corrcoef(arr1)
cm2 = np.corrcoef(arr2)
m_diff = cm1 - cm2
v_diff = m_diff.flatten()
order = np.argsort(-np.abs(v_diff))
topkdiff_ref = np.abs(v_diff[order])[:k]

# pynetcor implementation
topkdiff = np.abs(
    cor_topkdiff(x1=arr1, y1=arr2, x2=arr1, y2=arr2, k=k, method="pearson", threads=8)[
        :, 2
    ]
)

assert np.allclose(topkdiff, topkdiff_ref)

## P-value computation

Compute the P-values for correlations (Pearson or Spearman) using the Student's t-distribution. The approximation method is significantly faster than the classical method, with the absolute errors are nearly less than 1e-8.

In [42]:
import scipy
from pynetcor.cor import pvalue_student_t

# scipy pearsonr
correlations = []
pvalues = []
for i in range(arr1.shape[1]):
    for j in range(arr1.shape[1]):
        r, p = scipy.stats.pearsonr(arr1[:, i], arr1[:, j])
        correlations.append(r)
        pvalues.append(p)
correlations = np.asarray(correlations)
pvalues = np.asarray(pvalues)

# pyNetCor
derived_pvalues = pvalue_student_t(
    correlations, arr1.shape[0] - 2, approx=False, threads=8
)

assert np.all(np.isclose(pvalues, derived_pvalues))

## Unified implementation for calculating correlations and P-values.

In [43]:
import scipy
from pynetcor.cor import cortest

# Pearson correlation & P-value approximation
cortest_result = cortest(arr1.T, approx_pvalue=True, threads=8)

# scipy pearsonr
correlations = []
pvalues = []
for i in range(arr1.shape[1]):
    for j in range(i + 1, arr1.shape[1]):
        r, p = scipy.stats.pearsonr(arr1[:, i], arr1[:, j])
        correlations.append(r)
        pvalues.append(p)
correlations = np.asarray(correlations)
pvalues = np.asarray(pvalues)

assert np.allclose(cortest_result[:, 2], correlations)
assert np.allclose(cortest_result[:, 3], pvalues)

### chunking computation, recommended for large-scale analysis that exceed RAM

In [44]:
import scipy
from pynetcor.cor import chunked_cortest

cortest_lst = []
for iter in chunked_cortest(arr1.T, approx_pvalue=True, threads=8):
    cortest_lst.append(iter)
cortest_result = np.concatenate(cortest_lst, axis=0)

# scipy pearsonr
correlations = []
pvalues = []
for i in range(arr1.shape[1]):
    for j in range(i + 1, arr1.shape[1]):
        r, p = scipy.stats.pearsonr(arr1[:, i], arr1[:, j])
        correlations.append(r)
        pvalues.append(p)
correlations = np.asarray(correlations)
pvalues = np.asarray(pvalues)

assert np.allclose(cortest_result[:, 2], correlations)
assert np.allclose(cortest_result[:, 3], pvalues)

### Multiple testing correction: holm, hochberg, bonferroni, BH, BY.

In [45]:
from statsmodels.stats.multitest import multipletests
from pynetcor.cor import cortest

# Pearson correlation & multiple testing correction
cortest_result = cortest(arr1.T, adjust_pvalue=True, adjust_method="BH", threads=8)

# scipy pearsonr & statsmodels multipletests
correlations = []
pvalues = []
for i in range(arr1.shape[1]):
    for j in range(i + 1, arr1.shape[1]):
        r, p = scipy.stats.pearsonr(arr1[:, i], arr1[:, j])
        correlations.append(r)
        pvalues.append(p)
correlations = np.asarray(correlations)
pvalues = np.asarray(pvalues)
adjusted_pvalues = multipletests(pvalues, method="fdr_bh")[1]

assert np.allclose(cortest_result[:, 4], adjusted_pvalues)