In [None]:
import pandas as pd
import numpy as np
import sqlite3
import pickle
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
from scipy import stats
import statsmodels.api as sm

In [None]:
sns.set(style="white",color_codes=True)
pal = sns.color_palette("Paired", 12)
sns.set_palette(pal)

## Load Datasets

There are three datasets to look at: the average template mice, the app control group, and the app+ mice.

### Average Template Mice

In [None]:
conn = sqlite3.connect('//allen/aibs/ccf/Maitham/volume_measurement/structure_analysis.db')

In [None]:
avg_temp = pd.read_sql_query('select * from avg_temp_finished',conn)

In [None]:
conn.close()

In [None]:
avg_temp = avg_temp.sort_values(by=['Image_Series_ID','Structure_ID','side'])

In [None]:
avg_temp.head()

### app control group

In [None]:
conn = sqlite3.connect('//allen/aibs/ccf/Maitham/Alzheimer_Data/jennifer_structure_data.db')

In [None]:
control = pd.read_sql_query('select * from control_total_finished',conn)

In [None]:
control.head()

### app+

In [None]:
app = pd.read_sql_query('select * from app_total_finished',conn)

In [None]:
conn.close()

In [None]:
app.head()

## Whole Brain

The first thing to compare is the whole brain volume and variability.

### Volume

In [None]:
wb_vol_avg_temp = avg_temp[avg_temp.Structure_ID==997].Volume
wb_vol_control = control[control.Structure_ID==997].Volume
wb_vol_app = app[app.Structure_ID==997].Volume

In [None]:
sns.distplot(wb_vol_avg_temp,color='blue')
sns.distplot(wb_vol_control,color='green')
sns.distplot(wb_vol_app,color='red')

### Variability Model

The variability model uses surface area to volume ratio (sa2v) to predict the coefficient of variation (cov).

mcov_sqrd ~ p*sa2v, where p = 7.0466

In [None]:
wb_index_avg_temp = avg_temp[avg_temp.Structure_ID==997].index
wb_index_control = control[control.Structure_ID==997].index
wb_index_app = app[app.Structure_ID==997].index

In [None]:
wb_resid_avg_temp = avg_temp.loc[wb_index_avg_temp,'resid']/avg_temp.loc[wb_index_avg_temp,'sa2v']
wb_resid_control = control.loc[wb_index_control,'resid']/control.loc[wb_index_control,'sa2v']
wb_resid_app = app.loc[wb_index_app,'resid']/app.loc[wb_index_app,'sa2v']

In [None]:
sns.distplot(wb_resid_avg_temp,color='blue')
sns.distplot(wb_resid_control,color='green')
sns.distplot(wb_resid_app,color='red')

## Coarse level structures

In [None]:
# Coarse structure IDs
with open('//allen/aibs/ccf/Maitham/Alzheimer_Data/coarse') as f:
    coarse = pickle.load(f)

### Average Template

In [None]:
# Structure Indeces for right structures
coarse_structure_right_index = []
for i in coarse:
    indx = avg_temp[avg_temp.Structure_ID==i].index
    temp = avg_temp.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        coarse_structure_right_index.append(temp[1])
    else:
        coarse_structure_right_index.append(temp[0])

In [None]:
# Structure Indeces for left structures
coarse_structure_left_index = []
for i in coarse:
    indx = avg_temp[avg_temp.Structure_ID==i].index
    temp = avg_temp.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        coarse_structure_left_index.append(temp[0])
    else:
        coarse_structure_left_index.append(temp[0])

In [None]:
# dataframe indeces for right structures
coarse_right_index_avg_temp = []
for i in coarse_structure_right_index:
    coarse_right_index_avg_temp.extend(avg_temp[avg_temp.Structure_Index == i].index.values.tolist())

In [None]:
# dataframe indeces for left structures
coarse_left_index_avg_temp = []
for i in coarse_structure_left_index:
    coarse_left_index_avg_temp.extend(avg_temp[avg_temp.Structure_Index == i].index.values.tolist())

In [None]:
# right coarse structure residuals normalized to sa2v
coarse_right_avg_temp = avg_temp.loc[coarse_right_index_avg_temp,:]
coarse_right_resid_avg_temp = coarse_right_avg_temp.resid/coarse_right_avg_temp.sa2v

In [None]:
# left coarse structure residuals normalized to sa2v
coarse_left_avg_temp = avg_temp.loc[coarse_left_index_avg_temp,:]
coarse_left_resid_avg_temp = coarse_left_avg_temp.resid/coarse_left_avg_temp.sa2v

In [None]:
sns.distplot(coarse_right_resid_avg_temp,color='blue',label='right')
sns.distplot(coarse_left_resid_avg_temp,color='red',label='left')
plt.legend()

#### Breakdown by structure

In [None]:
# get the dataframe indeces for each structure and pair to that structure id
coarse_right_index_dict = {}
for i in coarse:
    coarse_right_index_dict[i] = coarse_right_avg_temp[coarse_right_avg_temp.Structure_ID == i].index.values.tolist()

# for each structure, find the normalized residual
coarse_right_resid_dict = {}
for i in coarse:
    coarse_right_resid_dict[i] = coarse_right_avg_temp.loc[coarse_right_index_dict[i],'resid']/coarse_right_avg_temp.loc[coarse_right_index_dict[i],'sa2v']

In [None]:
# get the dataframe indeces for each structure and pair to that structure id
coarse_left_index_dict = {}
for i in coarse:
    coarse_left_index_dict[i] = coarse_left_avg_temp[coarse_left_avg_temp.Structure_ID == i].index.values.tolist()

# for each structure, find the normalized residual
coarse_left_resid_dict = {}
for i in coarse:
    coarse_left_resid_dict[i] = coarse_left_avg_temp.loc[coarse_left_index_dict[i],'resid']/coarse_left_avg_temp.loc[coarse_left_index_dict[i],'sa2v']

In [None]:
for i in coarse:
    sns.distplot(coarse_right_resid_dict[i],label=str(i))
plt.legend()
plt.title('Right')

In [None]:
for i in coarse:
    sns.distplot(coarse_left_resid_dict[i],label=str(i))
plt.legend()
plt.title('Left')

### Control

In [None]:
# Structure Indeces for right structures
coarse_structure_right_index_control = []
for i in coarse:
    indx = control[control.Structure_ID==i].index
    temp = control.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        coarse_structure_right_index_control.append(temp[1])
    else:
        coarse_structure_right_index_control.append(temp[0])

In [None]:
# Structure Indeces for left structures
coarse_structure_left_index_control = []
for i in coarse:
    indx = control[control.Structure_ID==i].index
    temp = control.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        coarse_structure_left_index_control.append(temp[0])
    else:
        coarse_structure_left_index_control.append(temp[0])

In [None]:
# dataframe indeces for right structures
coarse_right_index_control = []
for i in coarse_structure_right_index_control:
    coarse_right_index_control.extend(control[control.Structure_Index == i].index.values.tolist())

In [None]:
# dataframe indeces for left structures
coarse_left_index_control = []
for i in coarse_structure_left_index_control:
    coarse_left_index_control.extend(control[control.Structure_Index == i].index.values.tolist())

In [None]:
# right coarse structure residuals normalized to sa2v
coarse_right_control = control.loc[coarse_right_index_control,:]
coarse_right_resid_control = coarse_right_control.resid/coarse_right_control.sa2v

In [None]:
# left coarse structure residuals normalized to sa2v
coarse_left_control = control.loc[coarse_left_index_control,:]
coarse_left_resid_control = coarse_left_control.resid/coarse_left_control.sa2v

In [None]:
sns.distplot(coarse_right_resid_control,color='blue',label='right')
sns.distplot(coarse_left_resid_control,color='red',label='left')
plt.legend()

#### Breakdown by structure

In [None]:
# get the dataframe indeces for each structure and pair to that structure id
coarse_right_index_dict = {}
for i in coarse:
    coarse_right_index_dict[i] = coarse_right_control[coarse_right_control.Structure_ID == i].index.values.tolist()

# for each structure, find the normalized residual
control_coarse_right_resid_dict = {}
for i in coarse:
    control_coarse_right_resid_dict[i] = coarse_right_control.loc[coarse_right_index_dict[i],'resid']/coarse_right_control.loc[coarse_right_index_dict[i],'sa2v']

In [None]:
# get the dataframe indeces for each structure and pair to that structure id
coarse_left_index_dict = {}
for i in coarse:
    coarse_left_index_dict[i] = coarse_left_control[coarse_left_control.Structure_ID == i].index.values.tolist()

# for each structure, find the normalized residual
control_coarse_left_resid_dict = {}
for i in coarse:
    control_coarse_left_resid_dict[i] = coarse_left_control.loc[coarse_left_index_dict[i],'resid']/coarse_left_control.loc[coarse_left_index_dict[i],'sa2v']

In [None]:
for i in coarse:
    sns.distplot(control_coarse_right_resid_dict[i],label=str(i))
plt.legend()
plt.title('Right')

In [None]:
for i in coarse:
    sns.distplot(control_coarse_left_resid_dict[i],label=str(i))
plt.legend()
plt.title('Left')

In [None]:
plt.subplot(2,2,1)
for i in coarse:
    sns.distplot(coarse_left_resid_dict[i],label=str(i))
plt.title('Left - Avg Temp')

plt.subplot(2,2,2)
for i in coarse:
    sns.distplot(coarse_right_resid_dict[i],label=str(i))
plt.title('Right - Avg Temp')

plt.subplot(2,2,3)
for i in coarse:
    sns.distplot(control_coarse_left_resid_dict[i],label=str(i))
plt.title('Left - Control')

plt.subplot(2,2,4)
for i in coarse:
    sns.distplot(control_coarse_right_resid_dict[i],label=str(i))
plt.title('Right - Control')


### app+

In [None]:
# Structure Indeces for right structures
coarse_structure_right_index_app = []
for i in coarse:
    indx = app[app.Structure_ID==i].index
    temp = app.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        coarse_structure_right_index_app.append(temp[1])
    else:
        coarse_structure_right_index_app.append(temp[0])

In [None]:
# Structure Indeces for left structures
coarse_structure_left_index_app = []
for i in coarse:
    indx = app[app.Structure_ID==i].index
    temp = app.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        coarse_structure_left_index_app.append(temp[0])
    else:
        coarse_structure_left_index_app.append(temp[0])

In [None]:
# dataframe indeces for right structures
coarse_right_index_app = []
for i in coarse_structure_right_index_app:
    coarse_right_index_app.extend(app[app.Structure_Index == i].index.values.tolist())

In [None]:
# dataframe indeces for left structures
coarse_left_index_app = []
for i in coarse_structure_left_index_app:
    coarse_left_index_app.extend(app[app.Structure_Index == i].index.values.tolist())

In [None]:
# right coarse structure residuals normalized to sa2v
coarse_right_app = app.loc[coarse_right_index_app,:]
coarse_right_resid_app = coarse_right_app.resid/coarse_right_app.sa2v

In [None]:
# left coarse structure residuals normalized to sa2v
coarse_left_app = app.loc[coarse_left_index_app,:]
coarse_left_resid_app = coarse_left_app.resid/coarse_left_app.sa2v

In [None]:
sns.distplot(coarse_right_resid_app,color='blue',label='right')
sns.distplot(coarse_left_resid_app,color='red',label='left')
plt.legend()

#### Breakdown by structure

In [None]:
# get the dataframe indeces for each structure and pair to that structure id
coarse_right_index_dict = {}
for i in coarse:
    coarse_right_index_dict[i] = coarse_right_app[coarse_right_app.Structure_ID == i].index.values.tolist()

# for each structure, find the normalized residual
app_coarse_right_resid_dict = {}
for i in coarse:
    app_coarse_right_resid_dict[i] = coarse_right_app.loc[coarse_right_index_dict[i],'resid']/coarse_right_app.loc[coarse_right_index_dict[i],'sa2v']

In [None]:
# get the dataframe indeces for each structure and pair to that structure id
coarse_left_index_dict = {}
for i in coarse:
    coarse_left_index_dict[i] = coarse_left_app[coarse_left_app.Structure_ID == i].index.values.tolist()

# for each structure, find the normalized residual
app_coarse_left_resid_dict = {}
for i in coarse:
    app_coarse_left_resid_dict[i] = coarse_left_app.loc[coarse_left_index_dict[i],'resid']/coarse_left_app.loc[coarse_left_index_dict[i],'sa2v']

In [None]:
for i in coarse:
    sns.distplot(app_coarse_right_resid_dict[i],label=str(i))
plt.legend()
plt.title('Right')

In [None]:
for i in coarse:
    sns.distplot(app_coarse_left_resid_dict[i],label=str(i))
plt.legend()
plt.title('Left')

### Coarse-level Comparison of Avg Template, Control, and App+

In [None]:
sns.distplot(coarse_right_resid_avg_temp,color='blue')
sns.distplot(coarse_right_resid_control,color='green')
sns.distplot(coarse_right_resid_app,color='red')

#### Standardized distributions

In order to compare these distributions, we standardize using their respective means and standard deviations.

In [None]:
a = coarse_right_resid_avg_temp - coarse_right_resid_avg_temp.mean()
a = a/coarse_right_resid_avg_temp.std()

In [None]:
b = coarse_right_resid_control - coarse_right_resid_control.mean()
b = b/coarse_right_resid_control.std()

In [None]:
c = coarse_right_resid_app - coarse_right_resid_app.mean()
c = c/coarse_right_resid_app.std()

In [None]:
sns.distplot(a,color='blue',label='Avg Temp')
sns.distplot(b,color='green',label='Control')
sns.distplot(c,color='red',label='App+')
plt.legend()

We can also view the standardized structures in this way

In [None]:
std_coarse_right_resid = {}
std_coarse_right_resid_control = {}
std_coarse_right_resid_app = {}

for i in coarse:
    temp = coarse_right_resid_dict[i] - coarse_right_resid_avg_temp.mean()
    std_coarse_right_resid[i] = temp / coarse_right_resid_avg_temp.std()
    
    temp = control_coarse_right_resid_dict[i] - coarse_right_resid_control.mean()
    std_coarse_right_resid_control[i] = temp / coarse_right_resid_control.std()
    
    temp = app_coarse_right_resid_dict[i] - coarse_right_resid_app.mean()
    std_coarse_right_resid_app[i] = temp / coarse_right_resid_app.std()

In [None]:
plt.subplot(3,1,1)
for i in coarse:
    sns.distplot(std_coarse_right_resid[i],label=str(i))
plt.xlim((-4,4))

plt.subplot(3,1,2)
for i in coarse:
    sns.distplot(std_coarse_right_resid_control[i],label=str(i))
plt.xlim((-4,4))
    
plt.subplot(3,1,3)
for i in coarse:
    ax = sns.distplot(std_coarse_right_resid_app[i],label=str(i))
plt.xlim((-4,4))
plt.figlegend(ax.get_lines(),map(lambda x:str(x),coarse),'right')

#### Thalamus and Hypothalamus

This is an example of where the avg template and control show similar variability, and are both less variable than the app+

In [None]:
plt.subplot(2,1,1)
i = 549
sns.distplot(std_coarse_right_resid[i],color='blue',label='avg_temp')
sns.distplot(std_coarse_right_resid_control[i],color='green',label='control')
sns.distplot(std_coarse_right_resid_app[i],color='red',label='app+')
plt.title('Thalamus')
plt.xlim((-4,2))
plt.legend()

plt.subplot(2,1,2)
i = 1097
sns.distplot(std_coarse_right_resid[i],color='blue',label='avg_temp')
sns.distplot(std_coarse_right_resid_control[i],color='green',label='control')
sns.distplot(std_coarse_right_resid_app[i],color='red',label='app+')
plt.title('Hypothalamus')
plt.xlim((-4,2))
plt.legend()

#### Striatum

This is an example of where the avg template and app+ show similar variability, and are both more variable than the control group.

In [None]:
i = 477
sns.distplot(std_coarse_right_resid[i],color='blue',label='avg_temp')
sns.distplot(std_coarse_right_resid_control[i],color='green',label='control')
sns.distplot(std_coarse_right_resid_app[i],color='red',label='app+')
plt.title('Striatum')
plt.legend()

## Summary Level Analysis

In [None]:
with open('//allen/aibs/ccf/Maitham/Alzheimer_Data/summary') as f:
    summary = pickle.load(f)

### Average Template

In [None]:
# Structure Indeces for right structures
summary_structure_right_index = []
for i in summary:
    indx = avg_temp[avg_temp.Structure_ID==i].index
    temp = avg_temp.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        summary_structure_right_index.append(temp[1])
    else:
        summary_structure_right_index.append(temp[0])

In [None]:
# dataframe indeces for right structures
summary_right_index_avg_temp = []
for i in summary_structure_right_index:
    summary_right_index_avg_temp.extend(avg_temp[avg_temp.Structure_Index == i].index.values.tolist())

In [None]:
# right coarse structure residuals normalized to sa2v
summary_right_avg_temp = avg_temp.loc[summary_right_index_avg_temp,:]
summary_right_resid_avg_temp = summary_right_avg_temp.resid/summary_right_avg_temp.sa2v

In [None]:
sns.distplot(summary_right_resid_avg_temp)

### Control

In [None]:
# Structure Indeces for right structures
summary_structure_right_index_control = []
for i in summary:
    indx = control[control.Structure_ID==i].index
    temp = control.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        summary_structure_right_index_control.append(temp[1])
    else:
        summary_structure_right_index_control.append(temp[0])

In [None]:
# dataframe indeces for right structures
summary_right_index_control = []
for i in summary_structure_right_index_control:
    summary_right_index_control.extend(control[control.Structure_Index == i].index.values.tolist())

In [None]:
# right coarse structure residuals normalized to sa2v
summary_right_control = control.loc[summary_right_index_control,:]
summary_right_resid_control = summary_right_control.resid/summary_right_control.sa2v

In [None]:
sns.distplot(summary_right_resid_control)

### App+

In [None]:
# Structure Indeces for right structures
summary_structure_right_index_app = []
for i in summary:
    indx = app[app.Structure_ID==i].index
    temp = app.loc[indx,'Structure_Index'].unique().tolist()
    if len(temp)>1:
        summary_structure_right_index_app.append(temp[1])
    else:
        summary_structure_right_index_app.append(temp[0])

In [None]:
# dataframe indeces for right structures
summary_right_index_app = []
for i in summary_structure_right_index_app:
    summary_right_index_app.extend(app[app.Structure_Index == i].index.values.tolist())

In [None]:
# right coarse structure residuals normalized to sa2v
summary_right_app = app.loc[summary_right_index_app,:]
summary_right_resid_app = summary_right_app.resid/summary_right_app.sa2v

In [None]:
sns.distplot(summary_right_resid_app)

### Summary structure comparison

In [None]:
sns.distplot(summary_right_resid_avg_temp,color='blue',label='avg temp')
sns.distplot(summary_right_resid_control,color='green',label='control')
sns.distplot(summary_right_resid_app,color='red',label='app+')
plt.legend()

In [None]:
std_summary_avg_temp = summary_right_resid_avg_temp - summary_right_resid_avg_temp.mean()
std_summary_avg_temp = std_summary_avg_temp/summary_right_resid_avg_temp.std()

std_summary_control = summary_right_resid_control - summary_right_resid_control.mean()
std_summary_control = std_summary_control/summary_right_resid_control.std()

std_summary_app = summary_right_resid_app - summary_right_resid_app.mean()
std_summary_app = std_summary_app/summary_right_resid_app.std()

In [None]:
sns.distplot(std_summary_avg_temp,color='blue',label='avg temp')
sns.distplot(std_summary_control,color='green',label='control')
sns.distplot(std_summary_app,color='red',label='app+')
plt.legend()

In [None]:
std_summary_avg_temp.head()

## Jen starting here

In [None]:
app.head()

In [None]:
meta = pd.read_csv(r'C:\Users\jenniferwh\Dropbox (Allen Institute)\Mesoscale Connectome Papers in Progress\2018 Plaque\AD_structure_volume_GLM_data.csv')

In [None]:
meta.head()

In [None]:
meta['Age group'].unique()

In [None]:
not_in_master_data = (app[~app['Image_Series_ID'].isin(meta['Image_Series_ID'])])

In [None]:
not_in_master_data['Image_Series_ID'].unique()

In [None]:
not_in_master_data = (control[~control['Image_Series_ID'].isin(meta['Image_Series_ID'])])

In [None]:
control.head()

In [None]:
control['side'].unique()

In [None]:
appdat = app[app['Image_Series_ID'].isin(meta['Image_Series_ID'])]

In [None]:
appdat = appdat[appdat['side'].isin(['r', 'm'])]

In [None]:
meta.head()

In [None]:
appdat = appdat.merge(meta[['Image_Series_ID', 'Genotype group', 'sex', 'Age group']], on='Image_Series_ID')

In [None]:
appdat.to_csv(os.path.join(path, 'app_structure_volumes.csv'))

In [None]:
control = control[control['side'].isin(['r', 'm'])]

In [None]:
controldat = control.merge(meta[['Image_Series_ID', 'Genotype group', 'sex', 'Age group']], on='Image_Series_ID')

In [None]:
controldat.to_csv(os.path.join(path, 'app_control_structure_volumes.csv'))

In [None]:
len(appdat['Image_Series_ID'].unique())

In [None]:
appdat['Structure_ID'].values

In [None]:
meta['Genotype group'].unique()

In [None]:
np.mean(c_distribution)

In [None]:
# T-tests for structure volume
ages = []
mouse_lines = []
structures = []
ttest_results = []
pvals = []
difference_magnitude = []
controldat = control[control['side'].isin(['r', 'm'])]
appdat = app[app['side'].isin(['r', 'm'])]
for age in meta['Age group'].unique():
    dataset = meta[meta['Age group'] == age]
    control_ids = dataset[dataset['Genotype group'] == 'control']['Image_Series_ID']
    app_ids = dataset[dataset['Genotype group'] == 'APP/PS1']['Image_Series_ID']
    j20_ids = dataset[dataset['Genotype group'] == 'J20']['Image_Series_ID']
    Tg2576_ids = dataset[dataset['Genotype group'] == 'Tg2576']['Image_Series_ID']
    for structure in summary:
        c_distribution = controldat[(controldat['Image_Series_ID'].isin(control_ids)) &
                                (controldat['Structure_ID'] == structure)]['Volume']
        app_distribution = appdat[(appdat['Image_Series_ID'].isin(app_ids)) &
                                 (appdat['Structure_ID'] == structure)]['Volume']
        t, p = stats.ttest_ind(c_distribution, app_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('APP/PS1')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(app_distribution))
        
        j20_distribution = appdat[(appdat['Image_Series_ID'].isin(j20_ids)) &
                                 (appdat['Structure_ID'] == structure)]['Volume']
        t, p = stats.ttest_ind(c_distribution, j20_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('J20')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(j20_distribution))
            
        Tg2576_distribution = appdat[(appdat['Image_Series_ID'].isin(Tg2576_ids)) &
                                 (appdat['Structure_ID'] == structure)]['Volume']
        t, p = stats.ttest_ind(c_distribution, Tg2576_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('Tg2576')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(Tg2576_distribution))
        
results = pd.DataFrame({'Age': ages, 'Mouse Line': mouse_lines, 'Structure': structures, 
                        't_test results': ttest_results, 
                        'fdr_corrected_pvalues': sm.stats.fdrcorrection(pvals, alpha=0.05, method='indep')[1],
                      'difference magnitude': difference_magnitude})

In [None]:
mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
st = mcc.get_structure_tree()
ia_map = st.get_id_acronym_map()
ai_map = {value:key for key, value in ia_map.iteritems()}

In [None]:
results['structure acronym'] = [ai_map[structure] for structure in results['Structure']]

In [None]:
results[results['fdr_corrected_pvalues'] < 0.05]

In [None]:
ipsi_volumes = pd.read_csv(r'C:\Users\jenniferwh\Dropbox (Allen Institute)\Mesoscale Connectome Papers in Progress\2018 Plaque\ipsi_volumes.csv')

In [None]:
ipsi_volumes.head()

In [None]:
volumes = []
for structure in results['structure acronym']:
    if structure in ipsi_volumes['structure'].values:
        volumes.append(ipsi_volumes[ipsi_volumes['structure'] == structure]['volume'].values[0])
    else:
        volumes.append(np.nan)

In [None]:
results['Structure Volume'] = volumes

In [None]:
results.head()

In [None]:
path = r'C:\Users\jenniferwh\Dropbox (Allen Institute)\Mesoscale Connectome Papers in Progress\2018 Plaque'

In [None]:
results.to_csv(os.path.join(path, 'ss_volume_differences.csv'))

In [None]:
diffmag = [np.log10(magnitude+1e-10) for magnitude in results['difference magnitude'] if magnitude > 0]

In [None]:
sns.distplot(diffmag,color='blue')

In [None]:
appdat.head()

In [None]:
# T-tests for surface area
ages = []
mouse_lines = []
structures = []
ttest_results = []
pvals = []
difference_magnitude = []
controldat = control[control['side'].isin(['r', 'm'])]
appdat = app[app['side'].isin(['r', 'm'])]
for age in meta['Age group'].unique():
    dataset = meta[meta['Age group'] == age]
    control_ids = dataset[dataset['Genotype group'] == 'control']['Image_Series_ID']
    app_ids = dataset[dataset['Genotype group'] == 'APP/PS1']['Image_Series_ID']
    j20_ids = dataset[dataset['Genotype group'] == 'J20']['Image_Series_ID']
    Tg2576_ids = dataset[dataset['Genotype group'] == 'Tg2576']['Image_Series_ID']
    for structure in summary:
        c_distribution = controldat[(controldat['Image_Series_ID'].isin(control_ids)) &
                                (controldat['Structure_ID'] == structure)]['Surface_Area']
        app_distribution = appdat[(appdat['Image_Series_ID'].isin(app_ids)) &
                                 (appdat['Structure_ID'] == structure)]['Surface_Area']
        t, p = stats.ttest_ind(c_distribution, app_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('APP/PS1')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(app_distribution))
        
        j20_distribution = appdat[(appdat['Image_Series_ID'].isin(j20_ids)) &
                                 (appdat['Structure_ID'] == structure)]['Surface_Area']
        t, p = stats.ttest_ind(c_distribution, j20_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('J20')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(j20_distribution))
            
        Tg2576_distribution = appdat[(appdat['Image_Series_ID'].isin(Tg2576_ids)) &
                                 (appdat['Structure_ID'] == structure)]['Surface_Area']
        t, p = stats.ttest_ind(c_distribution, Tg2576_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('Tg2576')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(Tg2576_distribution))
        
results = pd.DataFrame({'Age': ages, 'Mouse Line': mouse_lines, 'Structure': structures, 
                        't_test results': ttest_results, 
                        'fdr_corrected_pvalues': sm.stats.fdrcorrection(pvals, alpha=0.05, method='indep')[1],
                      'difference magnitude': difference_magnitude})

In [None]:
results['structure acronym'] = [ai_map[structure] for structure in results['Structure']]
results[results['fdr_corrected_pvalues'] < 0.05]

In [None]:
volumes = []
for structure in results['structure acronym']:
    if structure in ipsi_volumes['structure'].values:
        volumes.append(ipsi_volumes[ipsi_volumes['structure'] == structure]['volume'].values[0])
    else:
        volumes.append(np.nan)
results['Structure Volume'] = volumes
results.to_csv(os.path.join(path, 'ss_volume_differences.csv'))

In [None]:
diffmag = [np.log10(magnitude+1e-10) for magnitude in results['difference magnitude'] if magnitude > 0]
sns.distplot(diffmag)

In [None]:
# T-tests for surface area:V ratio
ages = []
mouse_lines = []
structures = []
ttest_results = []
pvals = []
difference_magnitude = []
controldat = control[control['side'].isin(['r', 'm'])]
appdat = app[app['side'].isin(['r', 'm'])]
for age in meta['Age group'].unique():
    dataset = meta[meta['Age group'] == age]
    control_ids = dataset[dataset['Genotype group'] == 'control']['Image_Series_ID']
    app_ids = dataset[dataset['Genotype group'] == 'APP/PS1']['Image_Series_ID']
    j20_ids = dataset[dataset['Genotype group'] == 'J20']['Image_Series_ID']
    Tg2576_ids = dataset[dataset['Genotype group'] == 'Tg2576']['Image_Series_ID']
    for structure in summary:
        c_distribution = controldat[(controldat['Image_Series_ID'].isin(control_ids)) &
                                (controldat['Structure_ID'] == structure)]['sa2v']
        app_distribution = appdat[(appdat['Image_Series_ID'].isin(app_ids)) &
                                 (appdat['Structure_ID'] == structure)]['sa2v']
        t, p = stats.ttest_ind(c_distribution, app_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('APP/PS1')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(app_distribution))
        
        j20_distribution = appdat[(appdat['Image_Series_ID'].isin(j20_ids)) &
                                 (appdat['Structure_ID'] == structure)]['sa2v']
        t, p = stats.ttest_ind(c_distribution, j20_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('J20')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(j20_distribution))
            
        Tg2576_distribution = appdat[(appdat['Image_Series_ID'].isin(Tg2576_ids)) &
                                 (appdat['Structure_ID'] == structure)]['sa2v']
        t, p = stats.ttest_ind(c_distribution, Tg2576_distribution)
        if not np.isnan(p):
            ages.append(age)
            mouse_lines.append('Tg2576')
            structures.append(structure)
            ttest_results.append(t)
            pvals.append(p)
            difference_magnitude.append(np.mean(c_distribution) - np.mean(Tg2576_distribution))
        
results = pd.DataFrame({'Age': ages, 'Mouse Line': mouse_lines, 'Structure': structures, 
                        't_test results': ttest_results, 
                        'fdr_corrected_pvalues': sm.stats.fdrcorrection(pvals, alpha=0.05, method='indep')[1],
                      'difference magnitude': difference_magnitude})

In [None]:
results['structure acronym'] = [ai_map[structure] for structure in results['Structure']]
results[results['fdr_corrected_pvalues'] < 0.05]

In [None]:
volumes = []
for structure in results['structure acronym']:
    if structure in ipsi_volumes['structure'].values:
        volumes.append(ipsi_volumes[ipsi_volumes['structure'] == structure]['volume'].values[0])
    else:
        volumes.append(np.nan)
results['Structure Volume'] = volumes
results.to_csv(os.path.join(path, 'ss_volume_differences.csv'))