In [None]:
from common_preamble import *
from scipy import stats

In [None]:
arbor_ais_df = pd.read_hdf('../data/arbor_ais_data.h5', 'v185')
ais_synapse_data_all = pd.read_hdf(os.path.abspath('../data/ais_synapse_data_all_v185.h5'), 'data')

#### Set the AIS length to analyze

In [None]:
from ais_synapse_utils import aggregate_ais_dataframes
complete_ais_ids = np.unique(ais_synapse_data['post_pt_root_id'])
ais_id_to_analyze = np.unique(ais_synapse_data['post_pt_root_id'])

ais_synapse_filter = ais_synapse_data['d_top_skel'] < min_ais_len
ais_synapse_data_f = ais_synapse_data[ais_synapse_filter]

aggregated_ais_syn_df = aggregate_ais_dataframes(complete_ais_ids, ais_synapse_data_f)
aggregated_ais_syn_df = aggregated_ais_syn_df[aggregated_ais_syn_df['ais_len'] >= min_ais_len].reset_index()

### Plotting

In [None]:
plot_label_lookup = {'syn_net_chc': '# ChC Syn.',
                     'syn_net_non': '# Non-ChC Syn.',
                     'size_net_chc': 'Net ChC Syn. Size',
                     'soma_y_adj': 'Soma Depth ($\mu m$)',
                     'soma_y_um': 'Soma Depth ($\mu m$)',
                     'soma_x_um': 'Soma Mediolateral Pos. ($\mu m$)',
                     'n_syn_soma': '# Syn Soma',
                     'soma_synapses': '# Syn Soma',
                     'soma_area': 'Soma Area ($\mu m^2$)',
                     'soma_syn_density': '# Syn Soma/($\mu m^2$)',
                     'num_cells_chc': '# ChC Connections',
                     'syn_mean_chc': '# Syn/Connection',
                     'conn_frac': 'Connectivity Fraction',
                     'num_potential': '# Potential ChC',
                     'size_mean_chc': 'Mean ChC Syn Size',
                     'pca_2': 'Soma Size Comp.',
                     'pca_0': 'Soma Depth Comp.',
                     'pca_1': 'Soma Inhibition Comp.',
                     'pca_3': 'PC 3',
                     'pca_4': 'PC 4',
                     'ais_radius': 'AIS Radius',
                     'syn_max_chc': 'Max Syn ChC',
                    }

In [None]:
def nan_pearsonr(x,y):
    good_data_x = np.logical_and(~np.isnan(x), ~np.isinf(x))
    good_data_y = np.logical_and(~np.isnan(y), ~np.isinf(y))
    good_data = np.logical_and(good_data_x, good_data_y)
    return stats.pearsonr(x[good_data], y[good_data])

In [None]:
row_filter = (arbor_ais_df['label']>1) & (~np.isnan(arbor_ais_df['soma_area']))

figsize = (2, 2)

arbor_ais_df['soma_y_adj'] = arbor_ais_df['soma_y_um']
common_y = ['soma_y_adj', 'n_syn_soma', 'soma_x_um']
if 'label' in arbor_ais_df.columns:
    common_y += ['soma_synapses', 'soma_area', 'soma_syn_density', 'ais_radius']

col_pairs = {'syn_net_chc': ['syn_net_non', 'num_cells_chc', 'syn_mean_chc'] + common_y,
             'syn_net_non': ['syn_net_chc'] + common_y,
             'syn_mean_chc': ['num_cells_chc'] + common_y,
             'num_cells_chc': ['syn_mean_chc'] + common_y,
             }
invert_columns = ['soma_y_adj']
tickintdict = {'soma_syn_density': False, 'syn_mean_chc': False}

tick_dict = {'syn_net_chc': np.arange(0,27,5),
             'syn_net_non': np.arange(0,27,5),
             'soma_y_adj': np.arange(0, 121, 20),
             'soma_x_um': np.arange(0, 251, 50),
             'n_syn_soma': np.arange(60,161,20),
             'soma_synapses': np.arange(40, 121, 20),
             'soma_area': np.arange(450, 801, 100),
             'soma_syn_density': np.arange(0.06, 0.181, 0.02),
             'num_cells_chc': np.arange(0,10,2),
             'syn_mean_chc': np.arange(0,8.1,2),
             'ais_radius': np.arange(175, 376, 50)}

xprecision = {'soma_syn_density': 2}

needs_label = ['soma_synapses', 'soma_area', 'soma_syn_density']
for y in col_pairs:
    for x in col_pairs.get(y):
        if x in needs_label or y in needs_label:
            use_df = arbor_ais_df[row_filter]
        else:
            use_df = arbor_ais_df
        fig, ax = make_scatterplot(x, y, use_df,
                                   figsize, plot_label_lookup, tick_dict,
                                   xtick_int=tickintdict.get(x, True),
                                   ytick_int=tickintdict.get(y, True),
                                   xprecision=xprecision.get(x, 1))
        ax.set_title(f'R = {nan_pearsonr(use_df[x], use_df[y])[0]:.2f}')
#         fig.savefig(fname=plot_dir+'/scatterplots_{y}_vs_{x}.pdf'.format(x=x, y=y), bbox_inches="tight")

In [None]:
corr_matrix_columns = [
 'soma_synapses',
 'soma_area',
 'soma_syn_density',
 'syn_net_non',
 'ais_radius',
 'soma_y_um']

extra_corr_matrix_columns = ['syn_net_chc']+corr_matrix_columns
ais_item_data = arbor_ais_df[row_filter][extra_corr_matrix_columns]

corr_mat = np.zeros((len(extra_corr_matrix_columns), len(extra_corr_matrix_columns)))
corr_mat_p = np.zeros((len(extra_corr_matrix_columns), len(extra_corr_matrix_columns)))
for ii in range(len(extra_corr_matrix_columns)):
    for jj in range(len(extra_corr_matrix_columns)):
        r, p = nan_pearsonr(ais_item_data[extra_corr_matrix_columns[ii]], ais_item_data[extra_corr_matrix_columns[jj]])
        corr_mat[ii,jj] = r
        corr_mat_p[ii,jj] = p

In [None]:
from statsmodels.stats import multitest
is_sig, corr_p, _, _ = multitest.multipletests(corr_mat_p[np.tril_indices_from(corr_mat, k=-1)])
tri_inds = np.tril_indices_from(corr_mat, k=-1)
put_star = []
for ii, jj, sig in zip(*tri_inds, is_sig):
    if sig:
        put_star.append([ii, jj])
put_star = np.array(put_star)

In [None]:
do_plot = False 

mask = np.zeros_like(corr_mat, dtype=np.bool)
mask[np.tril_indices_from(mask)] = True

cmap = sns.color_palette('RdBu', n_colors=31)
fig, ax = plt.subplots(figsize=(6,6))
sns.heatmap(corr_mat, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
ax.plot(put_star[:,0]+0.5, put_star[:,1]+0.5, 'k*')
ax.set_yticks(np.arange(0.5, len(extra_corr_matrix_columns)-1+0.5))
_=ax.set_yticklabels([plot_label_lookup[x] for x in extra_corr_matrix_columns[:-1]], rotation=0)

ax.set_xticks(np.arange(1.5, len(extra_corr_matrix_columns)+0.5))
_=ax.set_xticklabels([plot_label_lookup[x] for x in extra_corr_matrix_columns[1:]], rotation=45)
ax.xaxis.tick_top()
if do_plot is True:
    fig.savefig(f'{plot_dir}/variable_correlation_structure.pdf', bbox_inches='tight')

In [None]:
from sklearn.decomposition import *

ais_item_data = arbor_ais_df[row_filter][corr_matrix_columns]
Xdat = ais_item_data.values
Xz = stats.zscore(Xdat, axis=0)

pca = FastICA(n_components=3, random_state=1004 )

Xz_pca = pca.fit_transform(Xz)

for ii in range(pca.n_components):
    if pca.components_[ii,np.argmax(np.abs(pca.components_[ii,:]))] < 0:   # If the dominant component is negative
        pca.components_[ii, :] = -1 * pca.components_[ii, :]
        Xz_pca[:,ii] = -1 * Xz_pca[:,ii]

do_plot = False
for ii in range(pca.n_components):
    fig, ax = plt.subplots(figsize=(1,2))
    ax.barh(np.arange(pca.components_.shape[1]), pca.components_[ii,:], height=0.5, color='k')
    ax.vlines(0, -1, 6, linewidth=1)
    ax.set_ylim((-0.5, 5.5))
    ax.set_yticks(np.arange(pca.components_.shape[1]))
    ax.set_yticklabels([plot_label_lookup[x] for x in corr_matrix_columns])
    ax.invert_yaxis()
    maxval=np.max(np.abs(pca.components_[ii,:]))
    ax.set_xlim((-maxval, maxval))
    ax.set_xticks((-maxval, 0, maxval))
    ax.set_xticklabels((f'{-maxval:0.2f}', '0', f'{maxval:0.2f}'))
    sns.despine(ax=ax, offset=5, trim=True)
    ax.set_title(f'PC {ii}')
    if do_plot:
        fig.savefig(f'{plot_dir}/factor_component_{ii}.pdf', bbox_inches='tight')

In [None]:
arbor_ais_df_use = arbor_ais_df[row_filter]
for ii in np.arange(pca.n_components):
    arbor_ais_df_use[f'pca_{ii}'] = Xz_pca[:,ii]

In [None]:
row_filter_true = arbor_ais_df_use['post_pt_root_id']>0

In [None]:
do_save= False
ls_prefix = {True: 'rls', False: 'ols'}

for use_robust in [True, False]:
    ### Same for good soma cutout
    base_variables = [f'pca_{ii}' for ii in range(pca.n_components)]

    y_col = 'syn_net_chc'
    columns_chc = base_variables
    fig, ax, res_df, _ = ols_analysis_single(arbor_ais_df_use, row_filter_true, 'syn_net_chc', columns_chc[::-1], chc_color, robust=use_robust, plot_label_lookup=plot_label_lookup, xticks=np.arange(-0.75,.76,0.25))
    if do_save:
        res_df.to_csv(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.csv'.format(y_col), index=False)
        fig.savefig(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.pdf'.format(y_col), bbox_inches="tight")
        residual_scatterplots(y_col, columns_chc, row_filter_true, arbor_ais_df_use, 'exact', plot_dir, plot_label_lookup=plot_label_lookup, robust=use_robust)
    
    y_col = 'size_net_chc'
    columns_chc = base_variables
    fig, ax, res_df, _ = ols_analysis_single(arbor_ais_df_use, row_filter_true, y_col, columns_chc[::-1], chc_color, robust=use_robust, plot_label_lookup=plot_label_lookup, xticks=np.arange(-0.75,.76,0.25))
    if do_save:
        res_df.to_csv(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.csv'.format(y_col), index=False)
        fig.savefig(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.pdf'.format(y_col), bbox_inches="tight")
        residual_scatterplots(y_col, columns_chc, row_filter_true, arbor_ais_df_use, 'exact', plot_dir, plot_label_lookup=plot_label_lookup, robust=use_robust)

    y_col = 'syn_mean_chc'
    columns_mean = base_variables
    fig, ax, res_df, _ = ols_analysis_single(arbor_ais_df_use, row_filter_true, y_col, columns_mean[::-1], syn_per_conn_color, robust=use_robust, plot_label_lookup=plot_label_lookup, xticks=np.arange(-0.75,.76,0.25))
    if do_save:
        res_df.to_csv(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.csv'.format(y_col), index=False)
        fig.savefig(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.pdf'.format(y_col), bbox_inches="tight")
        residual_scatterplots(y_col, columns_chc, row_filter_true, arbor_ais_df_use, 'exact', plot_dir, plot_label_lookup=plot_label_lookup, robust=use_robust)

    y_col = 'num_cells_chc'
    columns_num = base_variables
    fig, ax, res_df, _ = ols_analysis_single(arbor_ais_df_use, row_filter_true, y_col, columns_num[::-1], num_conn_color, robust=use_robust, plot_label_lookup=plot_label_lookup, xticks=np.arange(-0.75,.76,0.25))
    if do_save:
        res_df.to_csv(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.csv'.format(y_col), index=False)
        fig.savefig(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}.pdf'.format(y_col), bbox_inches="tight")
        residual_scatterplots(y_col, columns_chc, row_filter_true, arbor_ais_df_use, 'exact', plot_dir, plot_label_lookup=plot_label_lookup, robust=use_robust)


In [None]:
for ycol in ['syn_net_chc', 'syn_mean_chc', 'num_cells_chc']:
    for xcol in base_variables:
        for use_robust in [True, False]:
            fig, ax = plt.subplots(figsize=(3,3))
            sns.regplot(x=xcol, y=ycol, data=arbor_ais_df_use, ax=ax, marker='o', color='k', scatter_kws={'s':8, 'color':(0.3, 0.3, 0.3)}, robust=use_robust, n_boot=100)
            sns.despine(ax=ax, offset=5)
            ax.set_xlabel(plot_label_lookup[xcol])
            ax.set_ylabel(plot_label_lookup[ycol])
            r=nan_pearsonr(arbor_ais_df_use[xcol], arbor_ais_df_use[ycol])[0]
            ax.set_title(f'R$^2$={r*r:.2f}')
            # fig.savefig(f'{plot_dir}/linear_plot_robust_{use_robust}_{ycol}_vs_{xcol}.pdf')

---

---
### Spatial considerations

In [None]:
d_vec = [5000, 7500, 10000, 15000]

tick_dict = {'syn_net_chc': np.arange(0,27,5),
             'syn_net_non': np.arange(0,27,5),
             'soma_y_adj': np.arange(0, 121, 20),
             'soma_x_um': np.arange(0, 251, 50),
             'n_syn_soma': np.arange(60,161,20),
             'soma_synapses': np.arange(40, 121, 20),
             'soma_area': np.arange(500, 901, 100),
             'soma_syn_density': np.arange(0.05, 0.15, 0.02),
             'num_cells_chc': np.arange(0,10,2.5),
             'syn_mean_chc': np.arange(0,8,2),
             'conn_frac': np.arange(0,1.01,0.2),
             'num_potential': np.arange(0, 21, 5),
             }


In [None]:
with open('../data/in/is_potential_ds.pkl', 'rb') as f:
    is_potential_ds = pickle.load(f)

In [None]:
fraction_cutoff = 0.1
d_max = 7500 
is_potential = is_potential_ds[d_max]

num_pot_df = pd.DataFrame({'pyc_root_id': complete_ais_ids, 'num_potential':[sum(is_potential[ii].values()) for ii in range(len(complete_ais_ids))]})
arbor_ais_df_pot = arbor_ais_df_use.merge(num_pot_df, left_on='post_pt_root_id', right_on='pyc_root_id', how='left').drop(columns=['pyc_root_id'])
arbor_ais_df_pot['conn_frac'] = arbor_ais_df_pot['num_cells_chc'] / arbor_ais_df_pot['num_potential']
mask_frac_df = pd.read_hdf('../data/mask_fraction_data_v185.hdf')
ais_oid_within_limits = mask_frac_df[mask_frac_df['d_{}'.format(d_max)] < fraction_cutoff]['root_id']
within_unmasked = np.isin(arbor_ais_df_pot['post_pt_root_id'], ais_oid_within_limits)
arbor_ais_df_pot = arbor_ais_df_pot[within_unmasked]

In [None]:
use_df = arbor_ais_df_pot 

x = 'num_potential'
y = 'syn_net_chc'
fig, ax = make_scatterplot(x, y, arbor_ais_df_pot,
                           figsize, plot_label_lookup, tick_dict,
                           xtick_int=tickintdict.get(x, True),
                           ytick_int=tickintdict.get(y, True),
                           xprecision=xprecision.get(x, 1))
ax.set_title(f'R = {nan_pearsonr(use_df[x], use_df[y])[0]:.2f}')
# fig.savefig(fname=f'{plot_dir}/scatterplots_{y}_vs_{x}.pdf', bbox_inches="tight")

x = 'conn_frac'
y = 'syn_net_chc'
fig, ax = make_scatterplot(x, y, arbor_ais_df_pot,
                           figsize, plot_label_lookup, tick_dict,
                           xtick_int=tickintdict.get(x, True),
                           ytick_int=tickintdict.get(y, True),
                           xprecision=xprecision.get(x, 1))
ax.set_title(f'R = {nan_pearsonr(use_df[x], use_df[y])[0]:.2f}')
# fig.savefig(fname=f'{plot_dir}/scatterplots_{y}_vs_{x}.pdf', bbox_inches="tight")

x = 'conn_frac'
y = 'num_potential'
fig, ax = make_scatterplot(x, y, arbor_ais_df_pot,
                           figsize, plot_label_lookup, tick_dict,
                           xtick_int=tickintdict.get(x, True),
                           ytick_int=tickintdict.get(y, True),
                           xprecision=xprecision.get(x, 1))
ax.set_title(f'R = {nan_pearsonr(use_df[x], use_df[y])[0]:.2f}')
# fig.savefig(fname=f'{plot_dir}/scatterplots_{y}_vs_{x}.pdf', bbox_inches="tight")

In [None]:
fraction_cutoff = 0.1
figsize = (2, 2)

n_pot_dist = {}
base_variables = ['pca_0', 'pca_1', 'pca_2']
data_dir = '../data'

for d_max in d_vec[0:1]:

    is_potential = is_potential_ds[d_max]

    num_pot_df = pd.DataFrame({'pyc_root_id': complete_ais_ids, 'num_potential':[sum(is_potential[ii].values()) for ii in range(len(complete_ais_ids))]})
    arbor_ais_df_pot = arbor_ais_df_use.merge(num_pot_df, left_on='post_pt_root_id', right_on='pyc_root_id', how='left').drop(columns=['pyc_root_id'])
    arbor_ais_df_pot['conn_frac'] = arbor_ais_df_pot['num_cells_chc'] / arbor_ais_df_pot['num_potential']

    mask_frac_df = pd.read_hdf(data_dir + '/mask_fraction_data_v{}.hdf'.format(data_version))
    ais_oid_within_limits = mask_frac_df[mask_frac_df['d_{}'.format(d_max)] < fraction_cutoff]['root_id']
    within_unmasked = np.isin(arbor_ais_df_pot['post_pt_root_id'], ais_oid_within_limits)

    fig, ax = plt.subplots(figsize=figsize)
    ax.hist(arbor_ais_df_pot[within_unmasked]['conn_frac'], bins=np.arange(0,1.01,0.1), edgecolor='w', color='k')
    ax.grid(True, axis='y')
    ax.set_axisbelow(True)
    sns.despine(offset=2, trim=False, ax=ax)
    ax.set_xlabel('Connectivity fraction')
    ax.set_yticks(np.arange(0,31,5))
    ax.set_ylabel('# AISes')

    # fig.savefig(fname=plot_dir+'/connectivity_fraction_histogram_dmax_{}.pdf'.format(d_max), bbox_inches="tight")

    conn_fract_described_df = arbor_ais_df_pot[within_unmasked]['conn_frac'].describe(percentiles=[0.5])
    # conn_fract_described_df.to_csv(plot_dir + '/connectivity_fraction_summary_v{}_dmax_{}.csv'.format(data_version, d_max))

    
    n_pot_dist[d_max] = arbor_ais_df_pot[['post_pt_root_id', 'num_potential', 'num_cells_chc', 'size_net_chc']].rename(columns={'num_potential':'num_potential_{}'.format(d_max),
                                                                                             'num_cells_chc': 'num_cells_chc_{}'.format(d_max),
                                                                                             'size_net_chc': 'size_net_chc_{}'.format(d_max),
                                                                                             'post_pt_root_id': 'post_pt_root_id_{}'.format(d_max)})
    n_pot_dist[d_max]['within_data_{}'.format(d_max)] = within_unmasked
    
    common_y = ['soma_y_adj', 'n_syn_soma', 'syn_net_non', 'soma_x_um']
    if 'label' in arbor_ais_df.columns:
        common_y += ['n_syn_soma', 'soma_syn_density']

    new_col_pairs = {}
    new_col_pairs['conn_frac'] = common_y
    new_col_pairs['num_potential'] = common_y

    tickintdict['conn_frac'] = False

    
    #### OLS plots
    
    for use_robust in [True, False]:
        y_col = 'conn_frac'
        columns_frac = base_variables
        fig, ax, res_df, _ = ols_analysis_single(arbor_ais_df_pot, within_unmasked, y_col, columns_frac[::-1], conn_frac_color,
                                              robust=use_robust, xticks=np.arange(-0.75,.76,0.25), plot_label_lookup=plot_label_lookup)
        # res_df.to_csv(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}_d{d_max}.csv'.format(y_col), index=False)
        # fig.savefig(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}_d{d_max}.pdf', bbox_inches="tight")

        y_col = 'num_potential'
        columns_pot = base_variables
        fig, ax, res_df,_ = ols_analysis_single(arbor_ais_df_pot, within_unmasked, y_col, columns_pot[::-1], num_pot_color,
                                              robust=use_robust, xticks=np.arange(-0.75,.76,0.25), plot_label_lookup=plot_label_lookup)
        # res_df.to_csv(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}_d{d_max}.csv'.format(y_col), index=False)
        # fig.savefig(f'{plot_dir}/{ls_prefix[use_robust]}_fit_factor_analysis_{y_col}_d{d_max}.pdf', bbox_inches="tight")

        for ycol in ['conn_frac', 'num_potential']:
            for xcol in base_variables:
                for use_robust in [True, False]:
                    fig, ax = plt.subplots(figsize=(3,3))
                    sns.regplot(x=xcol, y=ycol, data=arbor_ais_df_pot, ax=ax, marker='o', color='k', scatter_kws={'s':8, 'color':(0.3, 0.3, 0.3)}, robust=use_robust, n_boot=300)
                    sns.despine(ax=ax, offset=5)
                    ax.set_xlabel(plot_label_lookup[xcol])
                    ax.set_ylabel(plot_label_lookup[ycol])
                    r=nan_pearsonr(arbor_ais_df_pot[xcol], arbor_ais_df_pot[ycol])[0]
                    ax.set_title(f'R$^2$={r*r:.2f}')
                    # fig.savefig(f'{plot_dir}/linear_plot_robust_{use_robust}_{ycol}_vs_{xcol}.pdf')
                    residual_scatterplots(ycol, base_variables, arbor_ais_df_pot['post_pt_root_id']>0, arbor_ais_df_pot, 'exact', plot_dir, plot_label_lookup=plot_label_lookup, robust=use_robust)


In [None]:
dend_df = pd.read_hdf(base_dir+'/data/in/spatial_arbor_synapses_v185.h5', 'arbor_synapse_df')

In [None]:
sns.scatterplot(x='dend_syn', y='dend_area', data=dend_df)

In [None]:
dend_df['dend_syn_density'] = dend_df['dend_syn'] / dend_df['dend_area']

In [None]:
arbor_ais_density_df = arbor_ais_df_pot.merge(dend_df[['pt_root_id', 'dend_syn_density']], left_on='post_pt_root_id', right_on='pt_root_id', how='inner')

In [None]:
plot_label_lookup['dend_syn_density'] = 'Dend Syn. Density ($1/\mu m^2$)'
tick_dict['dend_syn_density'] = np.arange(0.19, 0.35, 0.03)

fig, ax = make_scatterplot('dend_syn_density', 'syn_net_chc', arbor_ais_density_df,
                           (2,2), plot_label_lookup, tick_dict,
                           xtick_int=False,
                           ytick_int=tickintdict.get(y, True),
                           xprecision=2)
fig.savefig(plot_dir + '/scatterplots_{}_v_{}.pdf'.format('dend_syn_density', 'syn_net_chc'), bbox_inches='tight')

In [None]:
stats.pearsonr(arbor_ais_density_df['dend_syn_density'], arbor_ais_density_df['syn_net_chc'])

In [None]:
plot_label_lookup['dend_syn_density'] = 'Dend Syn. Density ($1/\mu m^2$)'
tick_dict['dend_syn_density'] = np.arange(0.22, 0.32, 0.03)

fig, ax = make_scatterplot('dend_syn_density', 'syn_net_non', arbor_ais_density_df,
                           (2,2), plot_label_lookup, tick_dict,
                           xtick_int=False,
                           ytick_int=tickintdict.get(y, True),
                           xprecision=2)
fig.savefig(plot_dir + '/scatterplots_{}_v_{}.pdf'.format('dend_syn_density', 'syn_net_non'), bbox_inches='tight')

In [None]:
ais_synapse_data_all

In [None]:
arbor_ais_density_df.columns

---

In [None]:
arbor_ais_df.columns

In [None]:
plot_label_lookup['ais_radius'] = 'AIS rad ($\mu m$)'

In [None]:
fraction_cutoff = 0.1
figsize = (2, 2)
use_robust = False

n_pot_dist = {}

for d_max in d_vec:

    is_potential = is_potential_ds[d_max]

    num_pot_df = pd.DataFrame({'pyc_root_id': complete_ais_ids, 'num_potential':[sum(is_potential[ii].values()) for ii in range(len(complete_ais_ids))]})
    arbor_ais_df_pot = arbor_ais_df_r.merge(num_pot_df, left_on='post_pt_root_id', right_on='pyc_root_id', how='left').drop(columns=['pyc_root_id'])
    arbor_ais_df_pot['conn_frac'] = arbor_ais_df_pot['num_cells_chc'] / arbor_ais_df_pot['num_potential']

    mask_frac_df = pd.read_hdf(data_dir + '/mask_fraction_data_v{}.hdf'.format(data_version))
    ais_oid_within_limits = mask_frac_df[mask_frac_df['d_{}'.format(d_max)] < fraction_cutoff]['root_id']
    within_unmasked = np.isin(arbor_ais_df_pot['post_pt_root_id'], ais_oid_within_limits)

    fig, ax = plt.subplots(figsize=figsize)
    ax.hist(arbor_ais_df_pot[within_unmasked]['conn_frac'], bins=np.arange(0,1.01,0.1), edgecolor='w', color='k')
    ax.grid(True, axis='y')
    ax.set_axisbelow(True)
    sns.despine(offset=2, trim=False, ax=ax)
    ax.set_xlabel('Connectivity fraction')
    ax.set_yticks(np.arange(0,31,5))
    ax.set_ylabel('# AISes')

    arbor_ais_df_pot['conn_frac'].describe()
    fig.savefig(fname=plot_dir+'/connectivity_fraction_histogram_dmax_{}.pdf'.format(d_max), bbox_inches="tight")

    conn_fract_described_df = arbor_ais_df_pot[within_unmasked]['conn_frac'].describe(percentiles=[0.5])
    conn_fract_described_df.to_csv(plot_dir + '/connectivity_fraction_summary_v{}_dmax_{}.csv'.format(data_version, d_max))

    
    n_pot_dist[d_max] = arbor_ais_df_pot[['post_pt_root_id', 'num_potential', 'num_cells_chc', 'size_net_chc']].rename(columns={'num_potential':'num_potential_{}'.format(d_max),
                                                                                             'num_cells_chc': 'num_cells_chc_{}'.format(d_max),
                                                                                             'size_net_chc': 'size_net_chc_{}'.format(d_max),
                                                                                             'post_pt_root_id': 'post_pt_root_id_{}'.format(d_max)})
    n_pot_dist[d_max]['within_data_{}'.format(d_max)] = within_unmasked
    
    common_y = ['soma_y_adj', 'n_syn_soma', 'syn_net_non', 'soma_x_um']
    if 'label' in arbor_ais_df.columns:
        common_y += ['soma_synapses', 'soma_syn_density']

    new_col_pairs = {}
    new_col_pairs['conn_frac'] = common_y
    new_col_pairs['num_potential'] = common_y

    tickintdict['conn_frac'] = False

    for x in new_col_pairs:
        for y in new_col_pairs.get(x):
            fig, ax = make_scatterplot(y, x, arbor_ais_df_pot[within_unmasked],
                                       figsize, plot_label_lookup, tick_dict,
                                       xtick_int=tickintdict.get(y, True),
                                       ytick_int=tickintdict.get(x, True),
                                       xprecision=xprecision.get(y, 1))
            fig.savefig(fname=plot_dir+'/scatterplots_{x}_vs_{y}_dmax_{d}.pdf'.format(x=x, y=y, d=d_max), bbox_inches="tight")
    
    #### OLS plots
    
    y_col = 'conn_frac'
    columns_frac =  ['n_syn_soma', 'soma_y_um', 'soma_x_um', 'syn_net_non', 'ais_radius', 'size_mean_chc']
    fig, ax, res_df = ols_analysis_single(arbor_ais_df_pot, row_filter, y_col, columns_frac[::-1], conn_frac_color,
                                          robust=use_robust, xticks=np.arange(-0.75,.76,0.25))
    res_df.to_csv(plot_dir + '/ols_fit_{}_dmax_{}.csv'.format(y_col, d_max), index=False)
    fig.savefig(plot_dir + '/ols_fit_{}_dmax_{}.pdf'.format(y_col, d_max), bbox_inches="tight")

    y_col = 'num_potential'
    columns_pot = ['n_syn_soma', 'soma_y_um', 'soma_x_um', 'syn_net_non', 'ais_radius', 'size_mean_chc']
    fig, ax, res_df = ols_analysis_single(arbor_ais_df_pot, row_filter, y_col, columns_pot[::-1], num_pot_color,
                                          robust=use_robust, xticks=np.arange(-0.75,.76,0.25))
    res_df.to_csv(plot_dir + '/ols_fit_{}_dmax_{}.csv'.format(y_col, d_max), index=False)
    fig.savefig(plot_dir + '/ols_fit_{}_dmax_{}.pdf'.format(y_col, d_max), bbox_inches="tight")


    if 'label' in arbor_ais_df.columns:
        y_col = 'conn_frac'
        columns_frac =  ['soma_synapses', 'soma_y_um', 'soma_x_um', 'syn_net_non', 'ais_radius', 'size_mean_chc']
        fig, ax, res_df = ols_analysis_single(arbor_ais_df_pot, row_filter, y_col, columns_frac[::-1], conn_frac_color,
                                              robust=use_robust, xticks=np.arange(-0.75,.76,0.25))
        res_df.to_csv(plot_dir + '/ols_fit_soma_exact_{}_dmax_{}.csv'.format(y_col, d_max), index=False)
        fig.savefig(plot_dir + '/ols_fit_soma_exact_{}_dmax_{}.pdf'.format(y_col, d_max), bbox_inches="tight")

        y_col = 'num_potential'
        columns_pot = ['soma_synapses', 'soma_y_um', 'soma_x_um', 'syn_net_non', 'ais_radius', 'size_mean_chc']
        fig, ax, res_df = ols_analysis_single(arbor_ais_df_pot, row_filter, y_col, columns_pot[::-1], num_pot_color,
                                              robust=use_robust, xticks=np.arange(-0.75,.76,0.25))
        res_df.to_csv(plot_dir + '/ols_fit_soma_exact_{}_dmax_{}.csv'.format(y_col, d_max), index=False)
        fig.savefig(plot_dir + '/ols_fit_soma_exact_{}_dmax_{}.pdf'.format(y_col, d_max), bbox_inches="tight")

        #### Soma synapse density
        y_col = 'conn_frac'
        columns_frac =  ['soma_syn_density', 'soma_y_um', 'soma_x_um', 'syn_net_non', 'ais_radius', 'size_mean_chc']
        fig, ax, res_df = ols_analysis_single(arbor_ais_df_pot, row_filter, y_col, columns_frac[::-1], conn_frac_color,
                                              robust=use_robust, xticks=np.arange(-0.75,.76,0.25))
        res_df.to_csv(plot_dir + '/ols_fit_soma_density_{}_dmax_{}.csv'.format(y_col, d_max), index=False)
        fig.savefig(plot_dir + '/ols_fit_soma_density_{}_dmax_{}.pdf'.format(y_col, d_max), bbox_inches="tight")

        y_col = 'num_potential'
        columns_pot = ['soma_syn_density', 'soma_y_um', 'soma_x_um', 'syn_net_non', 'ais_radius', 'size_mean_chc']
        fig, ax, res_df = ols_analysis_single(arbor_ais_df_pot, row_filter, y_col, columns_pot[::-1], num_pot_color,
                                              robust=use_robust, xticks=np.arange(-0.75,.76,0.25))
        res_df.to_csv(plot_dir + '/ols_fit_soma_density_{}_dmax_{}.csv'.format(y_col, d_max), index=False)
        fig.savefig(plot_dir + '/ols_fit_soma_density_{}_dmax_{}.pdf'.format(y_col, d_max), bbox_inches="tight")

    else:
        print('No exact soma data...')

In [None]:
'ais_radius' in arbor_ais_df_pot.columns