In [None]:
import pandas as pd
import numpy as np
import sqlite3
import os

from allensdk.api.queries.ontologies_api import OntologiesApi
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
path = r'/Users/jenniferwh/Dropbox (Allen Institute)/Mesoscale Connectome Papers in Progress/2018 Plaque'

In [None]:
conn = sqlite3.connect(r"\\allen\aibs\ccf\Maitham\Alzheimer_Data\jennifer_structure_data.db")
dframe = pd.read_sql_query(r"select * from correct_volumes", conn)

In [None]:
dat = dframe[dframe['Side'] != 'l']

In [None]:
dat.head()

In [None]:
len(dat['Image_Series_ID'].unique())

In [None]:
mcc = MouseConnectivityCache(manifest_file='../connectivity/mouse_connectivity_manifest.json',
                            resolution=25) # Switch to 25 um for visualizations
oapi = OntologiesApi()
summary_structures = oapi.get_structures(structure_set_names="'Mouse Connectivity - Summary'")
summary_structure_ids = [item['id'] for item in summary_structures]
print(len(summary_structure_ids))
coarse_structures = oapi.get_structures(structure_set_names="'Mouse - Coarse'")
coarse_structure_ids = [item['id'] for item in coarse_structures]
print(len(coarse_structure_ids))
structures = summary_structure_ids + coarse_structure_ids + [997]

In [None]:
structure_tree = mcc.get_structure_tree()
ia_map = structure_tree.get_id_acronym_map()
ai_map = {value:key for key, value in ia_map.iteritems()}

In [None]:
meta = pd.read_csv(os.path.join(path, 'T503 animal tracking sheet.csv'))
print(len(meta))

In [None]:
meta.keys()

In [None]:
dataset = meta[meta['Deformation fields call (R hemisphere)'] == 'Pass']
print(len(dataset))
isids = [int(value) for value in dataset['Link to images in LIMS']]
print(len(isids))
dataset['image_series_id'] = isids

In [None]:
dat[~dat['Image_Series_ID'].isin(dataset['image_series_id'])]['Image_Series_ID'].unique()
# This looks good - these both should be failed.

In [None]:
dataset_ids = dat['Image_Series_ID'].unique()
print(len(dataset_ids))

dataset = dataset[dataset['image_series_id'].isin(dataset_ids)]
dataset = dataset[dataset['image_series_id'] != 549363956] #failing this one
print(len(dataset))

In [None]:
dataset.Experiment.unique()

In [None]:
dataset[dataset['Experiment'] == 'Filter PIlot']

In [None]:
dataset.loc[dataset['Specimen ID'].str.contains('APP/PS1'), 'Mouse Line'] = 'APP/PS1'
dataset.loc[dataset['Specimen ID'].str.contains('APP_PS1'), 'Mouse Line'] = 'APP/PS1'
dataset.loc[dataset['Specimen ID'].str.contains('J20'), 'Mouse Line'] = 'J20'
dataset.loc[dataset['Specimen ID'].str.contains('Tg2576'), 'Mouse Line'] = 'Tg2576'
dataset.loc[dataset['Specimen ID'].str.contains('rTg4510'), 'Mouse Line'] = 'APP/PS1/rTg4510'
dataset.loc[dataset['Specimen ID'].str.contains('Cre'), 'Mouse Line'] = 'APP/PS1/Cre'

In [None]:
dataset[dataset['Mouse Line'].isnull()]['Specimen ID']

In [None]:
dataset['Mouse Line'].unique()

In [None]:
dataset[dataset['Mouse Line'] == 'APP/PS1/rTg4510']['Dataset']

In [None]:
dataset.loc[dataset['image_series_id'].isin([501931571, 505146658, 505147620, 505270489, 504727983,
                                                  514738621, 515754694, 515755412, 515917061, 519142254,
                                                  519726404, 519730753, 525771071, 525793327, 525794145,
                                                  526187745, 526188472, 526189182, 530695533, 584513037,
                                                  588346881, 588185697, 588668135, 588521040, 589702175,
                                                  591392216, 591394969, 591392970, 591533591, 591536022,
                                                  685071020, 688072968, 652523559, 686682385, 697635588]),
                                           'group'] = 'control'
print(len(dataset[dataset['group'] == 'control']))
dataset.loc[dataset['image_series_id'].isin([650144906, 690736862, 692167387, 698112037]), 'group'] = 'APP'
print(len(dataset[dataset['group'] == 'APP']))
dataset.loc[dataset['image_series_id'].isin([650144198, 682198929, 683620148]), 'group'] = 'tau'
print(len(dataset[dataset['group'] == 'tau']))
dataset.loc[dataset['image_series_id'].isin([682203974, 693151433, 693665781]), 'group'] = 'APP/tau'
print(len(dataset[dataset['group'] == 'APP/tau']))

In [None]:
dataset = dataset[~dataset['group'].isnull()]
print(len(dataset))

In [None]:
dataset['group'].unique()

In [None]:
def get_structure_data(structure):
    structure_id = ia_map[structure]
    volume = []
    groups = []
    for group in dataset['group'].unique():
        isids = dataset[(dataset['group'] == group)]['image_series_id'].values
        for isid in isids:
            groups.append(group)
            volume.append(dat[(dat['Structure_ID'] == structure_id) & 
                                         (dat['Image_Series_ID'] == isid)]
                                     ['Volume'].values[0])
    structuredat = pd.DataFrame({'structure': structure, 
                                 'group': groups, 'volume': volume})
    return structuredat

def get_relative_structure_data(structures):
    structure_ids = structures
    refisids = dataset[(dataset['group'] == 'control')]['image_series_id'].values
    volumes = []
    structures = []
    groups = []
    for structure_id in structure_ids:
        refvolume = dat[(dat['Structure_ID'] == structure_id) & 
                                         (dat['Image_Series_ID'].isin(refisids))]['Volume'].mean()
        for group in dataset['group'].unique():
            if group != 'control':
                isids = dataset[(dataset['group'] == group)]['image_series_id'].values
                for isid in isids:
                    structures.append(ai_map[structure_id])
                    groups.append(group)
                    volume = dat[(dat['Structure_ID'] == structure_id) & 
                                         (dat['Image_Series_ID'] == isid)]['Volume'].values[0]
                    volumes.append((volume/refvolume)*100)
    structuredat = pd.DataFrame({'structure': structures, 
                                 'group': groups, 'volume': volumes})
    return structuredat

In [None]:
pltdat = pd.DataFrame(columns={'group', 'volume', 'structure'})
for structure in coarse_structure_ids:
    structure_acronym = ai_map[structure]
    pltdat = pd.concat([pltdat, get_structure_data(structure_acronym)])
pltdat = pd.concat([pltdat, get_structure_data('fiber tracts')])
mdf = pd.melt(pltdat, id_vars = ['structure', 'group'], value_name = 'structure volume')

In [None]:
mdf.tail()

In [None]:
fig, ax = plt.subplots(1, figsize = (15, 5))
g = sns.violinplot('structure', 'structure volume', hue = 'group', data = mdf, ax=ax, font_size = 20)
g.set_title('Volume of major structure divisions in 12 mo mice', fontsize=30)
g.set_xlabel('Structure', fontsize=20)
g.set_ylabel('Volume', fontsize=20)
g.tick_params(labelsize=20)
plt.tight_layout()
plt.savefig(os.path.join(path, 'boxplots major brain divisions volume by line.pdf'), 
            bbox_inches='tight', pad_inches=0.3, format='pdf', transparent = True, dpi=1000)

In [None]:
pltdat = get_relative_structure_data(coarse_structure_ids)
pltdat.head()

In [None]:
fig, ax = plt.subplots(1, figsize = (15, 5))
g = sns.swarmplot('structure', 'volume', hue = 'group', data = pltdat, ax=ax, s=10)
ax.axhline(y=100, xmin=0, xmax=100, color = 'grey', linestyle = 'dashed', zorder = -1)
g.set_title('Volume of major structure divisions in 12 mo mice relative to control', 
            fontsize=20)
g.set_xlabel('Structure', fontsize=20)
g.set_ylabel('Volume (percent of control)', fontsize=20)
g.tick_params(labelsize=20)
plt.subplots_adjust(top=1.2)
plt.legend(loc=4)
plt.tight_layout()
plt.savefig(os.path.join(path, 'boxplots major brain divisions relative volume by line.pdf'), 
            bbox_inches='tight', pad_inches=0.3, format='pdf', transparent = True, dpi=1000)