In [None]:
import os
import pandas as pd
import seaborn as sns
sns.set_context('poster')
sns.set_style('white')
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

In [None]:
path = r'/Users/jenniferwh/Dropbox (Allen Institute)/Mesoscale Connectome Papers in Progress/2018 Plaque'

In [None]:
control = pd.read_csv(os.path.join(path, 'app_control_structure_volumes.csv'))
app = pd.read_csv(os.path.join(path, 'app_structure_volumes.csv'))

In [None]:
app.head()

In [None]:
len(app['Structure_ID'].unique())

In [None]:
mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ss = st.get_structures_by_set_id([167587189])
ss_ids = [structure['id'] for structure in ss]

In [None]:
len(ss)

In [None]:
iso = st.get_structures_by_acronym(['Isocortex'])[0]
iso_d = st.descendant_ids([iso['id']])[0]
iso_ss = [structure for structure in iso_d if structure in ss_ids]
print(len(iso_ss))

In [None]:
hipp = st.get_structures_by_acronym(['HPF'])[0]
hipp_d = st.descendant_ids([hipp['id']])[0]
hipp_ss = [structure for structure in hipp_d if structure in ss_ids]
print(len(hipp_ss))

In [None]:
ia_map = st.get_id_acronym_map()
ai_map = {value:key for key, value in ia_map.iteritems()}

In [None]:
plot_order = ['FRP', 'MOp', 'MOs', 'SSp-n', 'SSp-bfd', 'SSp-ll', 'SSp-m', 'SSp-ul', 'SSp-tr', 'SSp-un',
              'SSs', 'GU', 'VISC', 'AUDd', 'AUDp', 'AUDpo', 'AUDv', 'VISal', 'VISam', 'VISl', 'VISp', 'VISpl',
              'VISpm', 'VISli', 'VISpor', 'ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBm', 'ORBvl', 'AId', 'AIp',
              'AIv', 'RSPagl', 'RSPd', 'RSPv', 'VISa', 'VISrl', 'TEa', 'PERI', 'ECT']

In [None]:
dat = pd.concat([app, control])
dat['structure acronym'] = [ai_map[structure] for structure in dat['Structure_ID']]
dat = dat

In [None]:
dat['Genotype group'].unique()

In [None]:
dat = dat[dat['Genotype group'].isin(['APP/PS1', 'J20', 'Tg2576', 'control'])]
#dat = dat[dat['Genotype group'].isin(['APP/PS1', 'J20', 'control'])]

In [None]:
mouse_line_colors = ['#8da0cb', '#fc8d62', '#66c2a5', '#000000']

In [None]:
fig, ax = plt.subplots()

In [None]:
import matplotlib.lines as mlines
def get_mouse_line_legend():
    black = mlines.Line2D([], [], color = 'k', label = 'control')
    blue = mlines.Line2D([], [], color=mouse_line_colors[0],
                          label='APP/PS1')
    orange = mlines.Line2D([], [], color=mouse_line_colors[1],
                          label='hAPP-J20')
    green = mlines.Line2D([], [], color=mouse_line_colors[2],
                          label='Tg2576')
    return [black, blue, orange, green]

In [None]:
savepath = r'/Users/jenniferwh/Dropbox (Allen Institute)/Mesoscale Connectome Papers in Progress/2018 Plaque/figures'

In [None]:
ages = ['4 mo', '6 mo', '9 mo', '12 mo', '15 mo', '18 mo']
rows = len(ages)
fig = plt.figure()
for i in range(rows):
    pltdat = dat[(dat['Structure_ID'].isin(iso_ss)) & 
                (dat['Age group'] == ages[i])]
    sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=4, aspect=3, kind="point", 
                   data=pltdat[pltdat['Genotype group'] == 'control'], 
                   order = plot_order, hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.3,
                  legend = False, zorder = -1)
    g = sns.stripplot(x="structure acronym", y="Volume", hue="Genotype group", palette=mouse_line_colors,
                  data=pltdat[pltdat['Genotype group'] != 'control'], s= 15,
                   order = plot_order, hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.7)
    g.legend_.remove()
    plt.xticks(rotation = -90)
    plt.title(ages[i])
    if i == 0:
        plt.legend(handles = get_mouse_line_legend(), loc=1, bbox_to_anchor = (1, 1.4))
plt.savefig(os.path.join(savepath, 'isocortex region volume by age and line.pdf'), 
            bbox_inches='tight', pad_inches=0.3, format='pdf', transparent = True, dpi=300)

In [None]:
ages = ['4 mo', '6 mo', '9 mo', '12 mo', '15 mo', '18 mo']
rows = len(ages)
fig = plt.figure(figsize=(10,6))
for i in range(rows):
    pltdat = dat[(dat['Structure_ID'].isin(hipp_ss)) & 
                (dat['Age group'] == ages[i])]
    sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=4, aspect=3, kind="point", 
                   data=pltdat[pltdat['Genotype group'] == 'control'], 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.3,
                  legend = False, zorder = -1)
    g = sns.stripplot(x="structure acronym", y="Volume", hue="Genotype group", palette=mouse_line_colors,
                  data=pltdat[pltdat['Genotype group'] != 'control'], s= 15,
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.7)
    g.legend_.remove()
    plt.xticks(rotation = -90)
    plt.title(ages[i])
    if i == 0:
        plt.legend(handles = get_mouse_line_legend(), loc=1, bbox_to_anchor = (1, 1.4))
plt.savefig(os.path.join(savepath, 'hippocampus region volume by age and line.pdf'), 
            bbox_inches='tight', pad_inches=0.3, format='pdf', transparent = True, dpi=300)

In [None]:
pltdat = dat[dat['Structure_ID'].isin(iso_ss)]
sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", row="sex", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=3, kind="point", data=pltdat, 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.5,
                  legend = False)
plt.xticks(rotation = -90)
plt.legend(loc=1)

In [None]:
pltdat = dat[dat['Structure_ID'].isin(hipp_ss)]
sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", row="sex", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=3, kind="point", data=pltdat, 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.5,
                  legend = False)
plt.xticks(rotation = -90)
plt.legend(loc=1)

In [None]:
dat.head()

In [None]:
ages = []
structures = []
means = []
for age in dat['Age group'].unique():
    for structure in dat['Structure_ID'].unique():
        ages.append(age)
        structures.append(structure)
        cm = np.mean(dat[(dat['Age group'] == age) & 
                         (dat['Structure_ID'] == structure) &
                                 (dat['Genotype group'] == 'control')]['Volume'])
        means.append(cm)
meanc = pd.DataFrame({'age': ages, 'structure': structures, 'mean': means})

In [None]:
meanc.head()

In [None]:
meanc[(meanc['age'] == age) &
                                                                            (meanc['structure'] == structure)
                                                                            ]['mean'].values

In [None]:
ss_ids

In [None]:
dat = dat[dat['Structure_ID'].isin(ss_ids)]
for age in meanc['age']:
    for structure in dat['Structure_ID'].unique():
        dat.loc[dat['Age group'] == age, 'percent_control'] = [volume/(meanc[(meanc['age'] == age) &
                                                                            (meanc['structure'] == structure)
                                                                            ]['mean'].values)*100 
                                                           for volume in dat[dat['Age group'] == age]['Volume'].values]

In [None]:
dat.tail()

In [None]:
dat.describe()

In [None]:
ages = ['4 mo', '6 mo', '9 mo', '12 mo', '15 mo', '18 mo']
rows = len(ages)
fig = plt.figure()
for i in range(rows):
    pltdat = dat[(dat['Structure_ID'].isin(iso_ss)) & 
                (dat['Age group'] == ages[i])]
    sns.factorplot(x="structure acronym", y="percent_control", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=4, aspect=3, kind="point", 
                   data=pltdat[pltdat['Genotype group'] != 'control'], 
                   order = plot_order, hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.3,
                  legend = False, zorder = -1)
    g = sns.stripplot(x="structure acronym", y="percent_control", hue="Genotype group", palette=mouse_line_colors,
                  data=pltdat[pltdat['Genotype group'] != 'control'], s= 15,
                   order = plot_order, hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.7)
    g.legend_.remove()
    plt.xticks(rotation = -90)
    plt.title(ages[i])
    if i == 0:
        plt.legend(handles = get_mouse_line_legend(), loc=1, bbox_to_anchor = (1, 1.4))
plt.savefig(os.path.join(savepath, 'isocortex region volume by age and line percent control.pdf'), 
            bbox_inches='tight', pad_inches=0.3, format='pdf', transparent = True, dpi=300)

In [None]:
pltdat = dat[dat['Structure_ID'].isin(hipp_ss)]
m = pltdat[pltdat['sex'] == 'M']
f = pltdat[pltdat['sex'] == 'F']
fig = plt.figure(figsize=(10,6))
g = sns.catplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=2, kind="point", 
               data=m[m['Genotype group'] == 'control'], 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.5,
                  legend = False)
sns.stripplot(x="structure acronym", y="Volume", hue="Genotype group", 
                   palette=mouse_line_colors, data=m[m['Genotype group'] != 'control'], 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], s = 15, alpha = 0.5,
                  ax = g.ax)
g.ax.legend_.remove()
ax = fig.add_subplot()
h = sns.catplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=2, kind="point", 
               data=f[f['Genotype group'] == 'control'], 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.5,
                  legend = False)
sns.stripplot(x="structure acronym", y="Volume", hue="Genotype group", 
                   palette=mouse_line_colors, data=f[f['Genotype group'] != 'control'], 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], s = 15, alpha = 0.5,
                  ax = h.ax)
h.ax.legend_.remove()
plt.xticks(rotation = -90)
plt.legend(handles = get_mouse_line_legend(), loc=1)

In [None]:
th = st.get_structures_by_acronym(['TH'])[0]
th_d = st.descendant_ids([th['id']])[0]
th_ss = [structure for structure in th_d if structure in ss_ids]
print(len(th_ss))

In [None]:
ages = ['4 mo', '6 mo', '9 mo', '12 mo', '15 mo', '18 mo']
rows = len(ages)
for i in range(rows):
    fig.add_subplot(rows, 1, i+1)

    pltdat = dat[(dat['Structure_ID'].isin(th_ss)) & 
                (dat['Age group'] == ages[i])]
    sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=3, kind="point", data=pltdat, 
                   hue_order = ['APP/PS1', 'J20', 'control'], alpha = 0.5,
                  legend = False)
    plt.xticks(rotation = -90)
    plt.title(ages[i])
    if i == 0:
        plt.legend(loc=1)

In [None]:
coarse_structures = st.get_structures_by_set_id([687527670])
coarse_structure_ids = [item['id'] for item in coarse_structures]

In [None]:
ages = ['4 mo', '6 mo', '9 mo', '12 mo', '15 mo', '18 mo']
rows = len(ages)
for i in range(rows):
    fig.add_subplot(rows, 1, i+1)

    pltdat = dat[(dat['Structure_ID'].isin(coarse_structure_ids)) & 
                (dat['Age group'] == ages[i])]
    sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=3, kind="point", data=pltdat, 
                   hue_order = ['APP/PS1', 'J20', 'control'], alpha = 0.5,
                  legend = False)
    plt.xticks(rotation = -90)
    plt.title(ages[i])
    if i == 0:
        plt.legend(loc=1)

In [None]:
dat = dat[dat['Genotype group'].isin(['APP/PS1', 'J20', 'Tg2576', 'control'])]
mouse_line_colors = ['#8da0cb', '#fc8d62', '#66c2a5', '#000000']

In [None]:
ages = ['4 mo', '6 mo', '9 mo', '12 mo', '15 mo', '18 mo']
rows = len(ages)
for i in range(rows):
    fig.add_subplot(rows, 1, i+1)

    pltdat = dat[(dat['Structure_ID'].isin(coarse_structure_ids)) & 
                (dat['Age group'] == ages[i])]
    sns.factorplot(x="structure acronym", y="Volume", hue="Genotype group", capsize=.2, 
                   palette=mouse_line_colors, height=5, aspect=3, kind="point", data=pltdat, 
                   hue_order = ['APP/PS1', 'J20', 'Tg2576', 'control'], alpha = 0.5,
                  legend = False)
    plt.xticks(rotation = -90)
    plt.title(ages[i])
    if i == 0:
        plt.legend(loc=1)

In [None]:
dat.head()

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
formula = 'SUCCESS ~ LOWINC + PERASIAN + PERBLACK + PERHISP + PCTCHRT + \
           PCTYRRND + PERMINTE*AVYRSEXP*AVSALK + PERSPENK*PTRATIO*PCTAF'
dta = star98[['NABOVE', 'NBELOW', 'LOWINC', 'PERASIAN', 'PERBLACK', 'PERHISP',
              'PCTCHRT', 'PCTYRRND', 'PERMINTE', 'AVYRSEXP', 'AVSALK',
              'PERSPENK', 'PTRATIO', 'PCTAF']]
endog = dta['NABOVE'] / (dta['NABOVE'] + dta.pop('NBELOW'))
del dta['NABOVE']
dta['SUCCESS'] = endog

In [None]:
def fit_glm(categorical_var, mouse_line, age, plaque_volume):
    '''inputs
    1. categorical variable (sex)
    2. mouse line
    3. age
    4. plaque volume per region
    # fit glm for each experiment
    '''
    coeff1 = [] #distance
    coeff2 = [] #sex
    tvals = []
    pvals = []
    for exp in range(len(sex)):
        groups = np.array(categorical_var)

        dummy = sm.categorical(groups, drop=True)
        x1 = mouse_line[exp]
        x2 = age[exp]

        # drop reference category
        X = np.column_stack((x1, x2, dummy[:,1:]))
        X = sm.add_constant(X, prepend=False)

        # y Use log projection density
        y = plaque_volume[exp]
    
        # fit
        fit = sm.OLS(y, X).fit()
    
        # add coeff
        coeff1 += [fit.params[0]]
        coeff2 += [fit.params[1]]
        tvals += [fit.tvalues]
        pvals += [fit.pvalues]
    return coeff1, coeff2, tvals, pvals