In [1]:
import sys
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

sys.path.insert(0, '../../')
import ccal
%matplotlib inline


<13:30:03.486418> Checking dependencies ...
<13:30:03.488519> Using the following packages:
<13:30:03.490248> 	matplotlib (v1.5.1)
<13:30:03.490283> 	numpy (v1.10.4)
<13:30:03.490305> 	pandas (v0.18.0)
<13:30:03.490327> 	rpy2 (v2.7.9)
<13:30:03.490335> 	scikit-learn (v0.17.1)
<13:30:03.490359> 	scipy (v0.17.0)
<13:30:03.490367> 	seaborn (v0.7.0)


In [2]:
nelement = 5
features = ccal.support.make_random_features(10, nelement)
ref = ccal.support.make_random_features(1, nelement)

In [10]:
import math
from numpy.random import choice
import scipy.stats as stats
from statsmodels.sandbox.stats.multicomp import multipletests
information_coefficient = ccal.information.information_coefficient
confidence = 0.95
nperm = 3

In [7]:
scores = ccal.analyze.compute_against_reference(features, ref)
scores

Unnamed: 0,IC
Feature 0,0.209236
Feature 1,0.43283
Feature 2,-0.274587
Feature 3,-0.511382
Feature 4,-0.068896
Feature 5,-0.323407
Feature 6,0.408133
Feature 7,-0.19211
Feature 8,0.068516
Feature 9,-0.603373


In [21]:
print('Bootstrapping to get {} confidence interval ...'.format(confidence))
nsampling = 10
confidence = 0.95
features_nrow, features_ncol = features.shape

# Bootstrap to get CI
nsample = math.ceil(0.632 * features_ncol)

# Sample
sampled_scores = np.empty((features_nrow, nsampling))
for i in range(nsampling):
    sample_indices = choice(features.columns.tolist(), nsample).tolist()
    sampled_features = features.ix[:, sample_indices]
    sampled_ref = ref.ix[sample_indices]    
    for j, (idx, s) in enumerate(sampled_features.iterrows()):
        sampled_scores[j, i] = information_coefficient(s, sampled_ref)

# Get confidence interval
confidence_intervals = pd.DataFrame(index=features.index, columns=['{0:.2f} Quantile'.format(1 - confidence), '{0:.2f} Qualtile'.format(confidence)])
z_critical = stats.norm.ppf(q=confidence)
for i, f in enumerate(sampled_scores):
    mean = f.mean()
    stdev = f.std()
    moe = z_critical * (stdev / math.sqrt(f.size))
    confidence_intervals.iloc[i] = mean - moe, mean + moe

Bootstrapping to get 0.95 confidence interval ...


In [22]:
print('Performing permutation test with {} permutations ...'.format(nperm))
permutation_scores = np.empty((features_nrow, nperm))
shuffled_ref = np.array(ref)
for i in range(nperm):
    np.random.shuffle(shuffled_ref)
    for j, (idx, s) in enumerate(features.iterrows()):
        permutation_scores[j, i] = information_coefficient(s, shuffled_ref)

permutation_pvals = pd.DataFrame(index=features.index, columns=['Local P-Value', 'Global P-Value'])
all_permutation_scores = permutation_scores.flatten()
for i, feature in enumerate(permutation_scores):
    # Local P-Value
    local_pval = float(sum(feature > float(scores.iloc[i])) / nperm)
    if not local_pval:
        loval_pval = float(1 / nperm)
    permutation_pvals.ix[i, 'Local P-Value'] = local_pval
        
    # Global P-Value
    global_pval = float(sum(feature > float(scores.iloc[i])) / (nperm * features_nrow))
    if not global_pval:
        global_pval = float(1 / (nperm * features_nrow))
    permutation_pvals.ix[i, 'Global P-Value'] = global_pval

Performing permutation test with 3 permutations ...


  v = v[overlap] + jitters[i]


In [23]:
fdrs = pd.DataFrame(multipletests(permutation_pvals.ix[:, 'Global P-Value'], method='fdr_bh')[1],
                    index=features.index, columns=['FDR (BH)'])

In [24]:
pd.concat([confidence_intervals, permutation_pvals, fdrs], axis=1)

Unnamed: 0,0.05,0.95,Local P-Value,Global P-Value,FDR (BH)
Feature 0,0.156929,0.680053,0.0,0.0333333,0.0666667
Feature 1,0.227398,0.73092,0.666667,0.0666667,0.0833333
Feature 2,-0.711176,-0.183365,1.0,0.1,0.1
Feature 3,-0.821584,-0.614619,1.0,0.1,0.1
Feature 4,-0.492775,0.163776,0.333333,0.0333333,0.0666667
Feature 5,-0.587211,0.0476509,0.0,0.0333333,0.0666667
Feature 6,0.174126,0.67741,0.0,0.0333333,0.0666667
Feature 7,-0.35923,0.370021,0.0,0.0333333,0.0666667
Feature 8,-0.16808,0.48267,0.666667,0.0666667,0.0833333
Feature 9,-0.627633,0.0305804,0.666667,0.0666667,0.0833333
