## 1) store data:
### 1a) import histo data into a HDF5 file

# Project name: 
    The Functional Neuroanatomy of the Human Subthalamic Nucleus
    
    Code by Gilles de Hollander; cleaned up and commented by Max Keuken

# Goal of the project: 
    To investigate the internal organisation of the human subthalamic nucleus using a combination of histology and MRI. The non-demented control tissue has been originally analyzed by Gilles de Hollander. The following scripts will be cleaned up to only contain the parts of code that are actually used and relevant for the project. 

### The subject ID codes that correspond to the control data (n=7):
13095, 14037, 14051, 14069, 15033, 15035, 15055

# Layout of the analysis script
### 1) Combine and store the data:
    1) import histo data into a HDF5 file that contains the histo data and the STN masks in the folder:
       /home/mkeuken1/data/post_mortem/new_data_format/

### 2) Plot the data:
    2) load in the HDF5 data files using the base.py script. The base.py script loads in the data, sets the resolution but also smooths the data with 0.15 and 0.3mm fwhm. 

### 3) Statistical analysis of the 27 PCA intensity sectors
    3a) Creating the 27 PCA sectors where for each stain, across the subjects we will test whether they differ from 0
    3b) Doing the actual statistical testing: t-tests which are FDR corrected for multiple comparisons.

### 4) Mixture models based on global intensity distribution

### 5) Mixture models based on voxel gradient vectors

### 1) Combine and store the data
#### Importing the histological data as well as the masks of the STN and save them into a HDF5 file.
 

In [5]:
############
# What is the exact dataset that we are working with?
############

# The stain data of the following tissue blocks: 13095, 14037, 14051, 14069, 15033, 15035, 15055 
# 
# The specific data files are the processed files that will also be shared via DANS/Figshare. 
# The DANS/Figshare has the following folder structure:
#   Subject ID/
#              stain/
#                    unique stain/
#                                 orig/ (not relevant for this project, the multipage tiff as from the microscope)
#                                 proc/ (these are the files we will use for this project)
#              blockface/               (not relevant for this project)
#              MRI/                     (not relevant for this project)
#
# The stain data in the proc/ folder is aligned to the Blockface space
#
# All stain to blockface registration steps were visually inspected by Anneke Alkemade. If the registration failed, 
#   this stain and slice was excluded. See "exclusion_list.txt" for an overview. 
# 
# For this project the processed .png files (as indicated in the proc.DANS/Figshare folder) were renamed and
#   copied to the following folder:
#      data/STN_Histo/stacked_slides/
#
# How were the files renamed?
#    13095_vglut1_proc_1800_7561_2_blockface.png -> 13095_vglut1_1800_7561.png
#
#  and moved to their respective subjectID folder:
#    data/STN_Histo/stacked_slides/subjectID/
#
############
# Start code
############

# Importing a number of different tools
import re
import pandas
import glob
import h5py
import scipy as sp
from scipy import ndimage
import natsort
import numpy as np
import os

# Find the stains.png images per tissue blocks that have been registered to the blockface images
fns = glob.glob('/home/mkeuken1/data/post_mortem/stacked_slides/*/*')
reg = re.compile('.*/(?P<subject_id>[0-9]{5})_png/(?P<stain>[A-Za-z0-9]+)_(?P<slice>[0-9]+)_[0-9]+_(?P<id>[0-9]+)\.png')

df = pandas.DataFrame([reg.match(fn).groupdict() for fn in fns if reg.match(fn)])
df['subject_id'] = df['subject_id'].astype(int)
df['slice'] = df['slice'].astype(int)
df['fn'] = [fn for fn in fns if reg.match(fn)]
df['id'] = df['id'].astype(int)

df = df.drop_duplicates(['subject_id', 'slice', 'stain'], keep='last')

# The naming conventions of the stains was lower case so rename to match to uppercase
def correct_stain(stain):
    if stain == 'calr':
        return 'CALR'
    
    if stain == 'fer':
        return 'FER'

    if stain == 'gabra3':
        return 'GABRA3'
    
    if stain == 'gad6567':
        return 'GAD6567'
    
    if stain == 'mbp':
        return 'MBP'
    
    if stain == 'parv':
        return 'PARV'    
        
    if stain == 'sert':
        return 'SERT' 
    
    if stain == 'smi32':
        return 'SMI32' 
    
    if stain == 'syn':
        return 'SYN'   
    
    if stain == 'th':
        return 'TH' 
    
    if stain == 'transf':
        return 'TRANSF' 
    
    if stain == 'vglut1':
        return 'VGLUT1'
    
    return stain

df['stain'] = df.stain.map(correct_stain).astype(str)

# Make a data structure that will be used for combining the histo data
df.to_pickle('/home/mkeuken1/data/post_mortem/data.pandas')

# Find the masks of the STN that were based of two raters who parcellated the STN using the PARV and SMI32 stains.
reg3 = re.compile('/home/mkeuken1/data/post_mortem/histo_masks/(?P<subject_id>[0-9]{5})_RegMasks_(?P<rater>[A-Z]+)/(?P<stain>[A-Z0-9a-z_]+)_(?P<slice>[0-9]+)_([0-9]+)_(?P<id>[0-9]+)\.png')

fns = glob.glob('/home/mkeuken1/data/post_mortem/histo_masks/*_RegMasks_*/*_*_*_*.png')

masks = pandas.DataFrame([reg3.match(fn).groupdict() for fn in fns])
masks['fn'] = fns
masks['subject_id'] = masks['subject_id'].astype(int)
masks['slice'] = masks['slice'].astype(int)

masks.set_index(['subject_id', 'slice', 'stain', 'rater'], inplace=True)
masks.sort_index(inplace=True)

masks.to_pickle('/home/mkeuken1/data/post_mortem/masks.pandas')

mask_stains = ['PARV', 'SMI32']
raters_a = ['KH', 'MT']

# There were a few masks missing (either due to not correct saving or skipping), so MCKeuken and AAlkemade parcellated the 
# remaing ones
raters_b = ['MCK', 'AA']

# A for loop that creates the .HDF5 files per tissue block 
for subject_id, d in df.groupby(['subject_id']):
    print subject_id
    
    slices = natsort.natsorted(d.slice.unique())
    
    print slices
    
    stains = natsort.natsorted(d.stain.unique())
    resolution = ndimage.imread(d.fn.iloc[0]).shape

    data_array = np.zeros((len(slices),) + resolution + (len(stains),))
    data_array[:] = np.nan
    
    print 'Storing data'
    for idx, row in d.iterrows():
        
        slice_idx = slices.index(row['slice'])
        stain_idx = stains.index(row['stain'])
        
        data_array[slice_idx, ..., stain_idx] = ndimage.imread(row.fn)
        
    mask_array = np.zeros((len(slices),) + resolution + (4,))
    
    
    print 'Storing masks'
    for idx, row in masks.ix[subject_id].reset_index().iterrows():
        
        slice_idx = slices.index(row['slice'])
        
        if row.rater in raters_a:
            last_idx = mask_stains.index(row.stain) * 2 + raters_a.index(row.rater)
        else:
            last_idx = mask_stains.index(row.stain) * 2 + raters_b.index(row.rater)
        
        im = ndimage.imread(row.fn)
        mask_array[slice_idx, ..., last_idx] = im > np.percentile(im, 70)
        
        
    print 'Creating HDF5 file'
    p = '/home/mkeuken1/data/post_mortem/new_data_format/%s/' % subject_id
    
    if not os.path.exists(p):
        os.makedirs(p)
    
    new_file = h5py.File(os.path.join(p, 'images.hdf5' % subject_id), )
    new_file.create_dataset('data', data=data_array)
    new_file.create_dataset('mask', data=mask_array.astype(bool))
    new_file.close()
    
    d.to_pickle(os.path.join(p, 'data.pandas'))
    masks.ix[subject_id].reset_index().to_pickle(os.path.join(p, 'masks.pandas'))


13095
[950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050, 2100, 2150]
Storing data
Storing masks
Creating HDF5 file
14037
[850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050, 2100, 2150, 2200, 2250]
Storing data
Storing masks
Creating HDF5 file
14051
[800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050]
Storing data
Storing masks
Creating HDF5 file
14069
[800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050, 2100, 2150, 2200, 2250, 2300]
Storing data
Storing masks
Creating HDF5 file
15033
[300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 145

### 2) Plot the data data:
#### There are two different types of plots that we are going for here. The first type is a plot that displays the intensity histogram of the stain which is combined with a tri-planner view of the STN. This is done per subject and stain. The second type of plot is used to check whether the MRI data aligns with the blockface images, whether the stains align with the blockface images, and finally whether the masks of the STN are located in a plausible location. 

#### It should be noted that we are not using the intensity per pixel but that we smooth the data a bit. Namely with a Gaussian smoothing kernel 0.3mm fwhm. For the original analysis we also used 0.15mm fwhm. 

In [6]:
############
# How does the data look like?
############
# To visualize the data we plot the stacked stains in a tri-planner view. This allows us to check whether there
#   are slices that are still completely misaligned. 
# We also create an intensity histogram to get an initial feeling for how the data distribution looks like.
#
# Given the high resolution of the data and that we are interested in the distribution thoughout the STN we decided 
#   to smooth the data a bit. Either with a 0.3mm or a 0.15mm fwhm Gaussian kernel. 
############
# Start code
############
#
# Importing a number of different tools
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

from pystain import StainDataset
import os
import numpy as np

import seaborn as sns
sns.set_context('poster')
sns.set_style('whitegrid')

# Which tissue blocks are we going to visualize? 
subject_ids = [13095, 14037, 14051, 14069, 15033, 15035, 15055]

# Ensure that the color coding is normalized between the min and max per stain
def cmap_hist(data, bins=None, cmap=plt.cm.hot, vmin=None, vmax=None):
    n, bins, patches = plt.hist(data, bins=bins)
    bin_centers = 0.5 * (bins[:-1] + bins[1:])
    
    if vmin is None:
        vmin = data.min()
    if vmax is None:
        vmax = data.max()

    # scale values to interval [0,1]
    col = (bin_centers - vmin) / vmax

    for c, p in zip(col, patches):
        plt.setp(p, 'facecolor', cmap(c))

# Create the figures per stain, per tissue block, per smoothing kernal.
for subject_id in subject_ids[:]:
    for fwhm in [0.15, 0.3]:
        dataset = StainDataset(subject_id, fwhm=fwhm)
        dataset.get_vminmax((0, 99))

        d = '/home/mkeuken1/data/post_mortem/visualize_stains_v1/%s/' % (subject_id)
        
        if not os.path.exists(d):
            os.makedirs(d) 

        fn = os.path.join(d, 'stains_%s.pdf' % fwhm)
        pdf = PdfPages(fn)
        
        for i, stain in enumerate(dataset.stains):
            print 'Plotting %s' % stain
            plt.figure()
            # thresholded mask area is where at least 3 masks overlay
            data = dataset.smoothed_data.value[dataset.thresholded_mask, i]
            data = data[~np.isnan(data)]
            bins = np.linspace(0, dataset.vmax[i], 100)
            cmap_hist(data, bins, plt.cm.hot, vmin=dataset.vmin[i], vmax=dataset.vmax[i])
            plt.title(stain)
            plt.savefig(pdf, format='pdf')

            plt.close(plt.gcf())

            plt.figure()

            if not os.path.exists(d):
                os.makedirs(d)

            for i, orientation in enumerate(['coronal', 'axial', 'sagittal']):
                for j, q in enumerate([.25, .5, .75]):
                    ax = plt.subplot(3, 3, i + j*3 + 1)
                    slice = dataset.get_proportional_slice(q, orientation)
                    dataset.plot_slice(slice=slice, stain=stain, orientation=orientation, cmap=plt.cm.hot)
                    ax.set_anchor('NW')

                    
            plt.gcf().set_size_inches(20, 20)
            plt.suptitle(stain)
            plt.savefig(pdf, format='pdf')
            plt.close(plt.gcf())

        pdf.close()

/home/mkeuken1/data/post_mortem/new_data_format/13095/images.hdf5
data_smoothed_0.15_thr_3 not cached
 *** CALR ***
All slices available for stain CALR!
 *** FER ***
All slices available for stain FER!
 *** GABRA3 ***
All slices available for stain GABRA3!
 *** GAD6567 ***
All slices available for stain GAD6567!
 *** MBP ***
All slices available for stain MBP!
 *** PARV ***
All slices available for stain PARV!
 *** SERT ***
Slices that are not available for stain SERT:
 * slice 1400 (can be interpolated)
float64 (1896, 1512)
float64 (25, 1896, 1512, 12)
 * slice 2100 (can be interpolated)
float64 (1896, 1512)
float64 (25, 1896, 1512, 12)
 *** SMI32 ***
All slices available for stain SMI32!
 *** SYN ***
All slices available for stain SYN!
 *** TH ***
Slices that are not available for stain TH:
 * slice 1200 (can be interpolated)
float64 (1896, 1512)
float64 (25, 1896, 1512, 12)
 * slice 1750 (can be interpolated)
float64 (1896, 1512)
float64 (25, 1896, 1512, 12)
 *** TRANSF ***
Slices t

### 4) Statistical analysis of the 27 PCA sectors
#### 4a) For each subject the data is collected, masked so that we only have the data in the masks, a two component PCA is run of which the first component is along the dorsal axis, whereas the second component is via the lateral axis. Then in the Y direction, or anterior/posterior axis, the structure is devided into three parts. Afterwards, for the lateral and dorsal PCA components, the line is devided into 3 parts. This is doen for each Y slices, resulting in 3x3x3: 27 sectors. 

#### The data of those 27 sectors are then combined across subjects per stain. 


In [8]:
############
# Is the data uniformly distributed over the STN?
############
#
# To test this question we devide the STN into 27 sectors based on a PCA analysis where we identify the three main 
#   axes which are then each devided into three parts. 
#
# The mean intensity per stain is subtracted of each elipsoid, so that if the data is uniformly distributed each
#   sector would be equal to zero. If there are sectors that have a signal lower than the overall mean these sectors
#   will have a negative value and vice versa for higher signals. 
# 

from sklearn.decomposition import PCA
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('poster')
sns.set_style('whitegrid')
import pandas
from pystain import StainDataset

subject_id = 13095
ds = StainDataset(subject_id)


conversion_matrix = np.array([[0, 0, ds.xy_resolution],
                      [-ds.z_resolution, 0, 0],
                      [0, -ds.xy_resolution, 0]])
results = []

subject_ids=[13095, 14037, 14051, 14069, 15033, 15035, 15055]


for subject_id in subject_ids[:]:
    ds = StainDataset(subject_id, fwhm=0.3)

# Get coordinates of mask and bring them to mm
    x, y, z = np.where(ds.thresholded_mask)
    coords = np.column_stack((x, y, z))
    coords_mm = conversion_matrix.dot(coords.T).T
    coords_mm -= coords_mm.mean(0)

# Fit two components and make sure first axis walks dorsal
#   and second component lateral
    pca = PCA()
    pca.fit_transform((coords_mm - coords_mm.mean(0))[:, (0, 2)])

    components = pca.components_
    print components

    if components[0, 1] < 0:
        components[0] = -components[0]

    if components[1, 0] < 0:
        components[1] = -components[1]

    print components

    coords_dataframe = pandas.DataFrame(coords_mm, columns=['x_mm', 'y_mm', 'z_mm'])
    coords_dataframe['slice'] = x

    coords_dataframe['pc1'] = components.dot(coords_mm[:, (0, 2)].T)[0, :]
    coords_dataframe['pc2'] = components.dot(coords_mm[:, (0, 2)].T)[1, :]

    coords_dataframe[['pc1_slice_center', 'pc2_slice_center']] = coords_dataframe.groupby(['slice'])[['pc1', 'pc2']].apply(lambda x: x - x.mean())

    coords_dataframe['slice_3'] = pandas.qcut(coords_dataframe.y_mm, 3, labels=['posterior', 'middle', 'anterior'])    

    coords_dataframe['pc1_3'] = coords_dataframe.groupby('slice_3').pc1.apply(lambda d: pandas.qcut(d, 3, labels=['ventral', 'middle', 'dorsal']))
    coords_dataframe['pc2_3'] = coords_dataframe.groupby(['slice_3', 'pc1_3']).pc2.apply(lambda d: pandas.qcut(d, 3, labels=['medial', 'middle', 'lateral']))

    df= pandas.concat((ds.smoothed_dataframe, coords_dataframe), 1)
    tmp = df.pivot_table(index=['pc1_3', 'pc2_3', 'slice_3'], values=ds.stains, aggfunc='mean').copy()
    tmp['subject_id'] = subject_id

    results.append(tmp.copy())

    df = pandas.concat(results).reset_index().set_index(['subject_id', 'slice_3', 'pc1_3', 'pc2_3'])
    df = pandas.melt(df.reset_index(), id_vars=['subject_id', 'slice_3', 'pc1_3', 'pc2_3'], var_name='stain')
    df['value'] = df.groupby(['subject_id', 'stain']).transform(lambda x: (x - x.mean()) / x.std())

    def plot_ellipse_values(values, ellipse_pars=None, size=(1000, 1000), vmin=None, vmax=None, cmap=plt.cm.coolwarm, **kwargs):

        ''' values is a n-by-m array'''

        if ellipse_pars is None:
            a = 350
            b = 150
            x = 500
            y = 500

            theta = 45. / 180 * np.pi

        else:
            a, b, x, y, theta = ellipse_pars

        A = a**2 * (np.sin(theta))**2 + b**2 * (np.cos(theta))**2
        B = 2 * (b**2 - a**2) * np.sin(theta) * np.cos(theta)
        C = a**2 * np.cos(theta)**2 + b**2 * np.sin(theta)**2
        D = -2 * A * x - B* y
        E = -B * x - 2 * C * y
        F = A* x**2 + B*x*y + C*y**2 - a**2*b**2


        X,Y = np.meshgrid(np.arange(size[0]), np.arange(size[1]))

        in_ellipse = A*X**2 + B*X*Y +C*Y**2 + D*X + E*Y +F < 0


        pc1 = np.array([[np.cos(theta)], [np.sin(theta)]])
        pc2 = np.array([[np.cos(theta - np.pi/2.)], [np.sin(theta - np.pi/2.)]])

        pc1_distance = pc1.T.dot(np.array([(X - x).ravel(), (Y - y).ravel()])).reshape(X.shape)
        pc2_distance = pc2.T.dot(np.array([(X - x).ravel(), (Y - y).ravel()])).reshape(X.shape)

        pc1_quantile = np.floor((pc1_distance / a + 1 ) / 2. * values.shape[0])
        pc2_quantile = np.floor((pc2_distance / b + 1 ) / 2. * values.shape[1])

        im = np.zeros_like(X, dtype=float)

        for pc1_q in np.arange(values.shape[0]):
            for pc2_q in np.arange(values.shape[1]):
                im[in_ellipse * (pc1_quantile == pc1_q) & (pc2_quantile == pc2_q)] = values[pc1_q, pc2_q]


        im = np.ma.masked_array(im, ~in_ellipse)
        cax = plt.imshow(im, origin='lower', cmap=cmap, vmin=vmin, vmax=vmax, **kwargs)
        plt.grid('off')
        sns.despine()

        return cax

/home/mkeuken1/data/post_mortem/new_data_format/13095/images.hdf5
/home/mkeuken1/data/post_mortem/new_data_format/13095/images.hdf5
[[-0.98094749 -0.19427308]
 [-0.19427308  0.98094749]]
[[ 0.98094749  0.19427308]
 [ 0.19427308 -0.98094749]]
/home/mkeuken1/data/post_mortem/new_data_format/14037/images.hdf5
[[-0.95611755 -0.29298334]
 [ 0.29298334 -0.95611755]]
[[ 0.95611755  0.29298334]
 [ 0.29298334 -0.95611755]]
/home/mkeuken1/data/post_mortem/new_data_format/14051/images.hdf5
[[-0.78933812 -0.61395874]
 [ 0.61395874 -0.78933812]]
[[ 0.78933812  0.61395874]
 [ 0.61395874 -0.78933812]]
/home/mkeuken1/data/post_mortem/new_data_format/14069/images.hdf5
[[-0.70764237 -0.70657079]
 [ 0.70657079 -0.70764237]]
[[ 0.70764237  0.70657079]
 [ 0.70657079 -0.70764237]]
/home/mkeuken1/data/post_mortem/new_data_format/15033/images.hdf5
[[-0.66358108 -0.74810437]
 [ 0.74810437 -0.66358108]]
[[ 0.66358108  0.74810437]
 [ 0.74810437 -0.66358108]]
/home/mkeuken1/data/post_mortem/new_data_format/15035/

### 4) Statistical analysis of the 27 PCA sectors
#### 4b) For each stain and sector we do a simple t-test to compare whether the intensity values are different from zero. This is corrected for multiple comparisons using a fdr correction, critical p-value of 0.05.

#### The sectors that survive the fdr correction are then plotted on the elipsoid, where red indicates above average intensity, blue indicates below average intensity. 
 

In [9]:
from statsmodels.sandbox.stats import multicomp
from matplotlib import patches
import scipy as sp
sns.set_style('white')
df.stain.unique()

pca_folder = '/home/mkeuken1/data/post_mortem/visualize_stains_v1/PCA_sectors'
if not os.path.exists(pca_folder):
    os.makedirs(pca_folder) 

for stain, d in df.groupby(['stain']):
    fn = '/home/mkeuken1/data/post_mortem/visualize_stains_v1/PCA_sectors/{stain}_big_picture_coolwarm.pdf'.format(**locals())
    pdf = PdfPages(fn)

    fig, axes = plt.subplots(nrows=1, ncols=3)

    for i, (slice, d2) in enumerate(d.groupby('slice_3')):

        ax = plt.subplot(1, 3, ['anterior', 'middle', 'posterior'].index(slice) + 1)

        n = d2.groupby(['pc1_3', 'pc2_3']).value.apply(lambda v: len(v)).unstack(1).ix[['ventral', 'middle', 'dorsal'], ['medial', 'middle', 'lateral']]
        t = d2.groupby(['pc1_3', 'pc2_3']).value.apply(lambda v: sp.stats.ttest_1samp(v, 0,nan_policy='omit')[0]).unstack(1).ix[['ventral', 'middle', 'dorsal'], ['medial', 'middle', 'lateral']]
        p = d2.groupby(['pc1_3', 'pc2_3']).value.apply(lambda v: sp.stats.ttest_1samp(v, 0,nan_policy='omit')[1]).unstack(1).ix[['ventral', 'middle', 'dorsal'], ['medial', 'middle', 'lateral']]
        mean = d2.groupby(['pc1_3', 'pc2_3']).value.mean().unstack(1).ix[['ventral', 'middle', 'dorsal'], ['medial', 'middle', 'lateral']]

        # FDR
        p.values[:] = multicomp.fdrcorrection0(p.values.ravel())[1].reshape(3, 3)

        if i == 1:
            a, b, x, y, theta  = 350, 150, 300, 275, 45
        else:
            a, b, x, y, theta  = 300, 125, 300, 275, 45.

        plot_ellipse_values(t[p<0.05].values, size=(600, 550), ellipse_pars=(a, b, x, y,  theta / 180. * np.pi), vmin=-7, vmax=7, cmap=plt.cm.coolwarm)


        e1 = patches.Ellipse((x, y), a*2, b*2,
                         angle=theta, linewidth=2, fill=False, zorder=2)

        ax.add_patch(e1)

        plt.xticks([])
        plt.yticks([])    

        sns.despine(bottom=True, left=True)

        #sns.despine(bottom=True, left=True)
        print stain
        print p.values  
    plt.suptitle(stain, fontsize=24)
    fig.set_size_inches(15., 4.)
    pdf.savefig(fig, transparent=True)    
    pdf.close()

 

CALR
[[  5.34068634e-04   2.50732216e-04   1.86304348e-04]
 [  5.26880268e-02   2.27548328e-02   1.09905774e-03]
 [  2.61606877e-01   2.66856031e-01   2.57288596e-01]]
CALR
[[  2.40918794e-01   1.67301334e-01   7.81614479e-02]
 [  2.26324607e-03   1.84200296e-03   6.96200095e-01]
 [  1.07619416e-04   1.37581657e-03   1.24354205e-01]]
CALR
[[  5.65716880e-01   1.77379063e-02   1.17241325e-01]
 [  1.69706272e-04   1.69706272e-04   1.69706272e-04]
 [  2.98076855e-04   1.69706272e-04   1.40439224e-03]]
FER
[[ 0.75432804  0.75100287  0.75432804]
 [ 0.75432804  0.75432804  0.75432804]
 [ 0.75100287  0.75100287  0.75432804]]
FER
[[ 0.40201533  0.40201533  0.40201533]
 [ 0.40201533  0.73414561  0.73414561]
 [ 0.89583131  0.73414561  0.89583131]]
FER
[[ 0.77889694  0.92115964  0.8753716 ]
 [ 0.77889694  0.77889694  0.77889694]
 [ 0.8753716   0.77889694  0.77889694]]
GABRA3
[[ 0.22834683  0.12574622  0.18244092]
 [ 0.82977949  0.27963894  0.22834683]
 [ 0.12574622  0.22834683  0.91441154]]
GABRA

### Mixture analysis