# Test Notebook for Confidence Regions Toolbox

## Import Packages

In [1]:
%pip install numpy
%pip install pandas
%pip install matplotlib
%pip install nilearn
%pip install nibabel
%pip install dask

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Basic imports
import numpy
import yaml
import matplotlib.pyplot as plt

# Imports from Confidence Regions Toolbox
from generate import generate_CRs
from coverage import check_violations

# Import supporting functions
from lib.set_theory import *
from lib.boundary import *
from lib.regression import *

# Import data generation
from tests.generate_2d_data import *
from tests.generate_ni_data import *


## Data Generation

Initial parameters.

In [None]:
# Number of fields m
m = 3

# Get number of subjects
nSub = 80

# Get number of simulation realizations
nReals = 100

# Get number of bootstraps
nBoot = 5000

# Get Threshold
c = 2

# Get p values
p = np.linspace(0,1,21)

# Get the number of p-values we're looking at
nPvals = len(p)

# Get Tau
tau = 1/np.sqrt(nSub)

# Dimensions of simulated data
data_dim = np.array([nSub, 100,100])

Create some circular signals.

In [None]:
# ---------------------------------------------------------------
# Mus
# ---------------------------------------------------------------
# Create empty specifications
mu_specs = {}

# Loop through mus, adding each field in turn
for i in np.arange(m):

    # New empty dict
    mu_specs['mu'+str(i+1)]={}

    # Mu type
    mu_specs['mu'+str(i+1)]['type'] = 'circle2D' 

    # Mu FWHM
    mu_specs['mu'+str(i+1)]['fwhm'] = np.array([5,5])

    # Mu r
    mu_specs['mu'+str(i+1)]['r'] = 40

    # Mu magnitude
    mu_specs['mu'+str(i+1)]['mag'] = 3

    # Get some evenly spaced center points
    centers = circle_points(np.array([25]),np.array([m]))

    # Mu center
    mu_specs['mu'+str(i+1)]['center'] = centers[i,:].astype(np.int)

# ---------------------------------------------------------------
# Epsilons
# ---------------------------------------------------------------
# Create empty specifications
noise_specs = {}

# Loop through noises, adding each field in turn
for i in np.arange(m):

    # New empty dict
    noise_specs['noise'+str(i+1)]={}

    # Add FWHM
    noise_specs['noise'+str(i+1)]['FWHM'] = np.array([0, 3, 3])

    # Add type
    noise_specs['noise'+str(i+1)]['type'] = 'homogen'

Generate the data.

In [None]:
for i in np.arange(m):

    # ----------------------------------------------------------------
    # Data generation
    # ----------------------------------------------------------------

    # Obtain noise
    noise = get_noise(noise_specs['noise'+str(i+1)], data_dim)

    # Obtain mu
    mu = get_mu(mu_specs['mu'+str(i+1)], data_dim)

    # Create the data
    data = mu + noise

    # Save mus
    if i == 0:
        mus = np.array(mu)
    else:
        mus = np.concatenate((mus,mu),axis=0)

    # Combine data
    if i == 0:
        datas = np.array(data.reshape(1,*(data.shape)))
    else:
        datas = np.concatenate((datas,data.reshape(1,*(data.shape))),axis=0)

## Make Some Plots

In [None]:
# Data for the "5th" subject
plt.imshow(np.mean(datas,axis=1)[2,:,:])
plt.colorbar()

## Generate Confidence Regions

In [None]:
# Output directory
out_dir = '/home/tommaullin/Documents'

# Generate Confidence Regions
FcHat_minus, FcHat_plus, FcHat, a = generate_CRs(datas, c, p, out_dir)

In [None]:
i = 20

print(a[i],p[i])
plt.imshow(1*FcHat_plus[i,:,:]+1*FcHat+1*FcHat_minus[i,:,:])
plt.colorbar()


In [None]:
print(a[i])
plt.imshow(1*FcHat_plus[i,:,:]-1*FcHat_minus[i,:,:])
plt.colorbar()

## Assess Coverage

In [None]:
# Get Fc
Fc = mu > c


In [None]:
# Get the results
results = check_violations(FcHat_plus, FcHat_minus, datas, mus, c, tau, a)

In [None]:
results[1]

## Assess Coverage over Repeated Runs

In [None]:
import time

# Set the number of repetitions
nReps = 1000

# Loop through the repetitions
for j in np.arange(nReps):
    print(j)
    
    # Time data generation
    start = time.time()

    for i in np.arange(m):
        # Obtain noise
        noise = get_noise(noise_specs['noise'+str(i+1)], data_dim)

        # Obtain mu
        mu = get_mu(mu_specs['mu'+str(i+1)], data_dim)

        # Create the data
        data = mu + noise

        # Save mus
        if i == 0:
            mus = np.array(mu)
        else:
            mus = np.concatenate((mus,mu),axis=0)

        # Combine data
        if i == 0:
            datas = np.array(data.reshape(1,*(data.shape)))
        else:
            datas = np.concatenate((datas,data.reshape(1,*(data.shape))),axis=0)
    
    # Time data generation
    end = time.time()
    #print(end - start)

    # Time confidence region generation
    start = time.time()

    # Generate Confidence Regions
    FcHat_minus, FcHat_plus, FcHat, a = generate_CRs(datas, c, p, out_dir)

    # Time confidence region generation
    end = time.time()
    #print(end - start)

    # Time violation checking
    start = time.time()

    # Get the results
    results = check_violations(FcHat_plus, FcHat_minus, datas, mus, c, tau, a)

    # Time violation checking
    end = time.time()
    #print(end - start)

    #print(results)
    # Average the results
    if j == 0:
        avg_results = np.array(results[0])
    else:
        # Concatenate the results
        concat_results = np.concatenate((avg_results.reshape(1,np.prod(avg_results.shape)),
                                        results[0].reshape(1,np.prod(avg_results.shape))),axis=0)
        # Avergae the results
        avg_results = np.mean(concat_results,axis=0)


In [None]:
# Plot average results against p
plt.plot(p,avg_results)
plt.xlabel('p')
plt.ylabel('Average number of violations')
plt.title('Average number of violations against p')
plt.show()

### NeuroImaging Example

First, we generate some test data.

In [2]:
# Parameters for data generation
n = 30
p = 3
out_dir = '/home/tommaullin/Documents/ConfSets_Rehaul/'

# Generate some test data
y_files, beta_files, X = generate_data(n, p, out_dir)

beta shape (reg):  (120, 120, 120, 3, 1)
Xbeta shape (reg):  (120, 120, 120, 30, 1)


Compute the mean and standard deviation images.

In [3]:
# Get the estimated betahat, sigmahat and residuals
betahat_files, var_betahat_files, resid_files = regression(y_files, X, out_dir, chunk_size=20)

chunk shapes:  (100, 100, 100, 20, 1) (1, 1, 1, 20, 3)
chunk shapes:  (100, 100, 100, 10, 1) (1, 1, 1, 10, 3)
chunk check:  True
beta shape (100, 100, 100, 3, 1)


We're going to use the first $\hat{\beta}$ as our signal, $\mu$.

In [4]:
# Get muhat and sigmahat
muhat_file = betahat_files[0]
sigmahat_file = var_betahat_files[0]

# Threshold c
c = 2

# Get p values
p = np.linspace(0,1,21)

Let's try to generate some CRs.

In [6]:
# Generate CRs
FcHat_minus, FcHat_plus, FcHat, a_estBdry = generate_CRs(muhat_file, sigmahat_file, resid_files, c, p)

marker 5.2:  20.25902533531189
marker 6:  0.0005853176116943359
marker 7:  2.86102294921875e-06
marker 8:  0.0001087188720703125
marker 9:  0.00012183189392089844
alpha:  [1]
marker 3:  0.00013184547424316406
marker 4:  0.004904508590698242
marker 5.2:  20.262287616729736
marker 6:  0.00014662742614746094
marker 7:  5.0067901611328125e-06
marker 8:  0.00012040138244628906
marker 9:  0.00015211105346679688
alpha:  [1]
marker 3:  5.53131103515625e-05
marker 4:  0.003541707992553711
marker 5.2:  20.267202377319336
marker 6:  0.0002605915069580078
marker 7:  3.5762786865234375e-06
marker 8:  0.0001423358917236328
marker 9:  0.0001659393310546875
alpha:  [1]
marker 3:  7.414817810058594e-05
marker 4:  0.0029070377349853516
marker 5.2:  20.27080225944519
marker 6:  0.00024580955505371094
marker 7:  4.0531158447265625e-06
marker 8:  0.000141143798828125
marker 9:  0.00018715858459472656
alpha:  [1]
marker 3:  8.320808410644531e-05
marker 4:  0.00399017333984375
marker 5.2:  20.2774875164032
m

  g = ((muHats-c)/(sigmas*tau))


In [None]:
i = 20
slice = 51

print(a_estBdry[i],p[i])
plt.imshow(1*FcHat_plus[i,slice,:,:]+1*FcHat[slice,:,:]+1*FcHat_minus[i,slice,:,:])
plt.colorbar()


In [None]:
# Load in muhat and make an image of it
muhat = nib.load(muhat_file).get_fdata()
sigmahat = nib.load(sigmahat_file).get_fdata()

# Make image
plt.imshow((muhat[slice,:,:]-c)/sigmahat[slice,:,:])
plt.colorbar()
plt.show()

In [None]:

# Make image
plt.imshow(muhat[slice,:,:])
plt.colorbar()
plt.show()

In [None]:
a = np.array([1,2])

t1 = time.time()
np.array2string(a)
t2 = time.time()
print(t2-t1)

In [8]:
np.percentile([1,1],100)

1.0