In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import nrrd
import json
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache
from allensdk.api.queries.ontologies_api import OntologiesApi
import requests
from anatomy.anatomy_api import AnatomyApi
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm
from scipy.optimize import curve_fit
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.graphics import utils
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison

import seaborn as sns
sns.set_context('paper')
sns.set_style('white')

%matplotlib inline
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [2]:
aapi = AnatomyApi()
ss = aapi.get_summary_structure_data('id')
mcc = MouseConnectivityCache(manifest_file = '../connectivity/mouse_connectivity_manifest.json')
structure_tree = mcc.get_structure_tree()
isocortex = structure_tree.get_structures_by_acronym(['Isocortex'])[0]
cla = structure_tree.get_structures_by_acronym(['CLA'])[0]['id']
HPF = structure_tree.get_structures_by_acronym(['HPF'])[0]
iso = structure_tree.descendant_ids([isocortex['id']])[0]
iso = [structure for structure in iso if structure in ss]
hipp = structure_tree.descendant_ids([HPF['id']])[0]
hipp = [structure for structure in hipp if structure in ss]
ia_map = structure_tree.get_id_acronym_map()
ai_map = {value:key for key, value in ia_map.items()}
ctx_strs = [ai_map[structure] for structure in iso]
hipp_strs = [ai_map[structure] for structure in hipp]
valid_strs = ctx_strs#+hipp_strs #No Cla

In [4]:
basepath = r'../../../'
datpath = (r'../data_files')
savepath = os.path.join(basepath, '_Neuron_final', '_final_figures', 'Figure_4')

td_dataset = pd.read_csv(os.path.join(datpath, 'target_defined_dataset.csv'))
td_dataset = td_dataset[td_dataset['include'] == 'yes']
td_dataset = td_dataset[td_dataset['source'].isin(valid_strs)]
td_dataset = td_dataset[td_dataset['target_by_polygon'] != 'POST']
print(len(td_dataset))
print(len(td_dataset['source'].unique()))

121
25


In [5]:
c_by_source = pd.read_csv(os.path.join(datpath, 'good_wt_correlations.csv'))
print(len(c_by_source))
alldat = pd.read_csv(os.path.join(datpath, 'good_td_wt_correlations.csv'))
print(len(alldat))
td_dat = pd.read_csv(os.path.join(datpath, 'good_td_td_correlations.csv'))
print(len(td_dat))

627
586
241


In [9]:
with open(os.path.join(datpath, 'matched_sets.json'), 'r') as jsonfile:
    groups = json.load(jsonfile)
groups = pd.DataFrame(groups)

In [10]:
# formatting is all screwed up. Correct manually.
for ix, row in groups.iterrows():
    print(row['source'])
    print(row['td_sets']) # many of these are subsets of larger sets

VISp
[[485553574, 495346667, 501711996, 501786400, 501787135, 501837158, 502592260, 515920693, 526784559, 539511058, 546389260, 561918178, 563352720, 565146821, 574637452], [485553574, 501711996, 501786400, 501787135, 501837158, 502592260, 515920693, 526784559, 539511058, 546389260, 561918178, 563352720, 565146821, 574637452], [485553574, 501711996, 501786400, 501787135, 501837158, 502592260, 515920693, 526784559, 546389260, 561918178, 563352720, 565146821, 574637452], [485553574, 501786400, 501787135, 501837158, 502592260, 515920693, 526784559, 546389260, 563352720, 574637452]]
VISl
[[501785691, 501883865, 502590301, 502955689, 502956560, 504176074, 518013943, 523180728, 531397136, 552543088, 553080579, 560965104, 571653937], [501785691, 501883865, 502590301, 502955689, 502956560, 504176074, 518013943, 523180728, 531397136, 552543088, 553080579, 560965104, 571653937, 572388976], [501785691, 501883865, 502590301, 502955689, 502956560, 504176074, 518013943, 531397136, 552543088, 5530805

In [7]:
VISp_td = [485553574, 495346667, 501711996, 501786400, 501787135, 501837158, 502592260, 
  515920693, 526784559, 539511058, 546389260, 561918178, 563352720, 565146821, 574637452]
VISl_td = [501785691, 501883865, 502590301, 502955689, 502956560, 504176074, 518013943, 
     523180728, 531397136, 552543088, 553080579, 560965104, 571653937, 572388976]
RSPagl_td = [604100536, 617901499]
ACAd_td = [475829896, 528741104, 571401645, 607059419, 607321130, 609475867]
RSPv_td = [521255975, 569904687, 592522663, 592724077, 623838656, 664716091, 666090944, 
           868641659]
ORBvl_td = [479115470, 617898760, 617900105]
ORBl_td = [571816813, 572390577, 601804603, 636799953]
VISam_td = [478678606, 560045081, 561986735, 591168591, 613898292, 651703553]
PL_td = [575683857, 606260719, 609157409]
RSPd_td = [518605181, 529129011]

In [8]:
groups.loc[groups['source'] == 'VISp', 'td_sets'] = [VISp_td]
groups.loc[groups['source'] == 'VISl', 'td_sets'] = [VISl_td]
groups.loc[groups['source'] == 'RSPagl', 'td_sets'] = [RSPagl_td]
groups.loc[groups['source'] == 'ACAd', 'td_sets'] = [ACAd_td]
groups.loc[groups['source'] == 'RSPv', 'td_sets'] = [RSPv_td]
groups.loc[groups['source'] == 'ORBvl', 'td_sets'] = [ORBvl_td]
groups.loc[groups['source'] == 'ORBl', 'td_sets'] = [ORBl_td]
groups.loc[groups['source'] == 'VISam', 'td_sets'] = [VISam_td]
groups.loc[groups['source'] == 'PL', 'td_sets'] = [PL_td]
groups.loc[groups['source'] == 'RSPd', 'td_sets'] = [RSPd_td]

In [9]:
groups.keys()

Index(['source', 'td_sets', 'wt_sets'], dtype='object')

In [10]:
for source in groups['source'].unique():
    print(source)
    td_exps = groups[groups['source'] == source]['td_sets'].values[0]
    print('TD', len(td_exps))
    wt = alldat[alldat['image_series_id'].isin(td_exps)]
    wt_matches = alldat[alldat['image_series_id'].isin(td_exps)][
            'match_id'].unique()
    print(len(wt_matches))
    for match in wt_matches:
        if len(wt[wt['match_id'] == match]) < len(td_exps)-1:
            wt_matches = wt_matches[wt_matches != match]
    print(len(wt_matches))
    groups.loc[groups['source'] == source, 'wt_sets'] = [wt_matches]

VISp
TD 15
38
5
VISl
TD 14
10
1
RSPagl
TD 2
6
6
ACAd
TD 6
9
6
RSPv
TD 8
2
1
ORBvl
TD 3
5
4
ORBl
TD 4
4
2
VISam
TD 6
5
5
PL
TD 3
2
2
RSPd
TD 2
2
2


In [11]:
groups.to_csv(os.path.join(savepath, 'curated_matches.csv'), index = False)

In [12]:
groups.to_json(os.path.join(savepath, 'curated_matches.json'))

In [13]:
all_td_exps = list(groups['td_sets'].values)
all_td_exps = set([item for sublist in all_td_exps for item in sublist])
print(len(all_td_exps))
all_wt_exps = list(groups['wt_sets'].values)
all_wt_exps = set([item for sublist in all_wt_exps for item in sublist])
print(len(all_wt_exps))

63
34


In [14]:
groups['source']

0      VISp
1      VISl
2    RSPagl
3      ACAd
4      RSPv
5     ORBvl
6      ORBl
7     VISam
8        PL
9      RSPd
Name: source, dtype: object