In [1]:
import numpy as np
import pandas as pd 
import bokeh.io
import bokeh.plotting
import scikit_posthocs as posthoc

bokeh.io.output_notebook()

import urllib.request
# urllib.request.urlretrieve('https://raw.githubusercontent.com/charlesincharge/Caltech-CS155-2022/main/sets/set3/boosting_helper.py', 'boosting_helper.py')

# Analysis to make plots in Figure 3

## Figure 3b

In [46]:
df_imaging_quant = pd.read_csv("raw-data/fig3b.csv")
# df_rm_quant["animal id"] = df_rm_quant["animal ID"].astype(str)
df_imaging_quant["percent positive"] = df_imaging_quant["double pos"] / df_imaging_quant["reporter pos"] *100

# Add animal ID if separating means between animals
df_mean = df_imaging_quant.groupby(["region", "marker"])[["percent positive"]].agg(mean_percentage=("percent positive", "mean"), 
                                                                                      std_percentage=("percent positive", "std"), 
                                                                                      n=("percent positive", "count")).reset_index()
# Add animal ID if separating means between animals
df_imaging_quant = pd.merge(df_imaging_quant, df_mean, on=["region", "marker"])
df_imaging_quant["error"] = df_imaging_quant["std_percentage"] /  df_imaging_quant["n"].apply(np.sqrt)

df_imaging_quant["upper"] = df_imaging_quant.apply(lambda x: (x["mean_percentage"] + x["error"] if x["n"] > 2 else -1), axis=1) 
df_imaging_quant["lower"] = df_imaging_quant.apply(lambda x: (x["mean_percentage"] - x["error"] if x["n"] > 2 else -1), axis=1) 
df_imaging_quant.head()

rng = np.random.default_rng()
df_imaging_quant['jitter'] = rng.normal(0, 0.025, len(df_imaging_quant)) 
df_imaging_quant['cats'] = df_imaging_quant.apply(lambda x: (x['region'], x['jitter']), axis=1)
df_imaging_quant.head()

df_imaging_quant.head()

Unnamed: 0,file,variant,animal id,region,marker,reporter pos,double pos,marker pos,percent positive,mean_percentage,std_percentage,n,error,upper,lower,jitter,cats
0,49845_neun12_par-ctx_10x_2022-09-06_1,CAP-Mac,RM-009,Parietal lobe,neun,61,48,6076,78.688525,60.370285,11.567831,18,2.726564,63.096849,57.643721,-0.012921,"(Parietal lobe, -0.012920711567475915)"
1,49845_neun12_par-ctx_10x_2022-09-06_2,CAP-Mac,RM-009,Parietal lobe,neun,66,46,9039,69.69697,60.370285,11.567831,18,2.726564,63.096849,57.643721,-0.026976,"(Parietal lobe, -0.02697623006920712)"
2,49845_neun12_par-ctx_10x_2022-09-06_3,CAP-Mac,RM-009,Parietal lobe,neun,27,19,10728,70.37037,60.370285,11.567831,18,2.726564,63.096849,57.643721,-0.037355,"(Parietal lobe, -0.037354753524065795)"
3,49845_neun12_par-ctx_10x_2022-09-06_4,CAP-Mac,RM-009,Parietal lobe,neun,70,43,12340,61.428571,60.370285,11.567831,18,2.726564,63.096849,57.643721,0.024278,"(Parietal lobe, 0.024278234763781024)"
4,49845_neun12_par-ctx_10x_2022_07_20__11_52_37,CAP-Mac,RM-009,Parietal lobe,neun,102,63,8388,61.764706,60.370285,11.567831,18,2.726564,63.096849,57.643721,-0.000791,"(Parietal lobe, -0.0007910135871199462)"


In [47]:
x_range_brain = ['Frontal lobe', 'Parietal lobe', 'Temporal lobe', 'Caudate' , 'Putamen', 'Thalamus']
stain_color = bokeh.transform.factor_cmap('marker', palette=bokeh.palettes.Pastel1[3], factors=['s100', 'neun'])

animal_id = ['RM-009', 'RM-010']
markers = ['square', 'diamond'] 
marker_color=list(bokeh.palettes.Colorblind[8][-2:])

############### PLOT SETTINGS ###################
figure_width = 400
figure_height = 300
width = 0.35
dodge = width/2

marker_size = 5
error_size=4
error_line_width=0.5
############### PLOT SETTINGS ###################

p = bokeh.plotting.figure(x_range=x_range_brain, height=figure_height, width=figure_width)

############### PLOT SETTINGS ###################
p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=45
p.axis.major_tick_in = 0
p.axis.major_label_text_color = "#000000"
p.axis.major_label_text_font_size = "7pt"
p.axis.major_label_standoff = 0
p.axis.axis_label_text_align = "right"
p.add_layout(bokeh.models.Legend(), "right")
############### PLOT SETTINGS ###################

p.vbar(source=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'], x=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
       top='mean_percentage', width=width, fill_color=stain_color, line_color='black', legend_label="NeuN")
p.vbar(source=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'], x=bokeh.transform.dodge('region', dodge, range=p.x_range), 
       top='mean_percentage', width=width, fill_color=stain_color, line_color='black', legend_label="s100β")

source_error_s100 = bokeh.models.ColumnDataSource(data=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'])
source_error_neun = bokeh.models.ColumnDataSource(data=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'])

w_s100 = bokeh.models.Whisker(source=source_error_s100, base=bokeh.transform.dodge('region', dodge, range=p.x_range), 
                              upper='upper', lower='lower', level='overlay', line_width=error_line_width)
w_neun = bokeh.models.Whisker(source=source_error_neun, base=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
                            upper='upper', lower='lower', level='overlay', line_width=error_line_width)

w_s100.upper_head.line_width=error_line_width
w_s100.upper_head.size=error_size
w_s100.lower_head.line_width=error_line_width
w_s100.lower_head.size=error_size

w_neun.upper_head.line_width=error_line_width
w_neun.upper_head.size=error_size
w_neun.lower_head.line_width=error_line_width
w_neun.lower_head.size=error_size

p.add_layout(w_s100)
p.add_layout(w_neun)
p.y_range = bokeh.models.Range1d(0, 110)

p.scatter(x=bokeh.transform.dodge('cats', dodge, range=p.x_range), y='percent positive', 
          source=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', line_width=0.25, legend_field="animal id")

p.scatter(x=bokeh.transform.dodge('cats', -dodge, range=p.x_range), y='percent positive', 
          source=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', line_width=0.25)


bokeh.io.show(p)

Print numbers including percent of all neurons 

## Figure 3d

In [49]:
df_imaging_quant = pd.read_csv("raw-data/fig3d.csv")
# df_rm_quant["animal id"] = df_rm_quant["animal ID"].astype(str)
df_imaging_quant["percent positive"] = df_imaging_quant["double pos"] / df_imaging_quant["reporter pos"] *100

df_mean = df_imaging_quant.groupby(["region", "variant", "marker"])[['percent positive']].agg(mean_percentage=("percent positive", "mean"), std_percentage=("percent positive", "std"), n=("percent positive", "count")).reset_index()

df_imaging_quant = pd.merge(df_imaging_quant, df_mean, on=["region", "variant", "marker"])
df_imaging_quant['error'] = df_imaging_quant['std_percentage'] / df_imaging_quant['n'].apply(np.sqrt)

df_imaging_quant['upper'] = df_imaging_quant.apply(lambda x: (x['mean_percentage'] + x['error'] if x['n'] > 2 else -1), axis=1) 
df_imaging_quant['lower'] = df_imaging_quant.apply(lambda x: (x['mean_percentage'] - x['error'] if x['n'] > 2 else -1), axis=1) 

rng = np.random.default_rng()
df_imaging_quant['jitter'] = rng.normal(0, 0.05, len(df_imaging_quant)) 
df_imaging_quant['cats'] = df_imaging_quant.apply(lambda x: (x['region'], x['jitter']), axis=1)
df_imaging_quant.head()

Unnamed: 0,file,variant,animal id,region,marker,reporter pos,double pos,marker pos,percent positive,mean_percentage,std_percentage,n,error,upper,lower,jitter,cats
0,C010.7.2a_Cd,CAP-Mac,C010,Caudate,neun,40,24,1044,60.0,43.699656,17.457563,11,5.263653,48.963309,38.436003,0.002147,"(Caudate, 0.00214728627737086)"
1,C010.7.2b_Cd,CAP-Mac,C010,Caudate,neun,41,21,1074,51.219512,43.699656,17.457563,11,5.263653,48.963309,38.436003,0.022426,"(Caudate, 0.02242612998579697)"
2,C010.7.3a_Cd,CAP-Mac,C010,Caudate,neun,57,29,987,50.877193,43.699656,17.457563,11,5.263653,48.963309,38.436003,0.053415,"(Caudate, 0.0534152675559632)"
3,C010.7.3b_Cd,CAP-Mac,C010,Caudate,neun,48,20,1004,41.666667,43.699656,17.457563,11,5.263653,48.963309,38.436003,0.078337,"(Caudate, 0.07833701305884762)"
4,C010.7.4a_Cd,CAP-Mac,C010,Caudate,neun,44,13,1097,29.545455,43.699656,17.457563,11,5.263653,48.963309,38.436003,-0.066136,"(Caudate, -0.06613614543600221)"


In [52]:
animal_id = ['C002', 'C016', 'C017', 'C010']
markers = ['circle', 'triangle', 'square', 'diamond'] 
marker_color=list(bokeh.palettes.Colorblind[6][2:])

############### PLOT SETTINGS ###################
figure_width = 400
figure_height = 300
width = 0.35
dodge = width/2

marker_size = 5
error_size=4
error_line_width=0.5
############### PLOT SETTINGS ###################

p = bokeh.plotting.figure(x_range=x_range_brain, height=figure_height, width=figure_width)

############### PLOT SETTINGS ###################
p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=45
p.axis.major_tick_in = 0
p.axis.major_label_text_color = "#000000"
p.axis.major_label_text_font_size = "7pt"
p.axis.major_label_standoff = 0
p.axis.axis_label_text_align = "right"
p.add_layout(bokeh.models.Legend(), "right")
############### PLOT SETTINGS ###################

p.vbar(source=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'], x=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
       top='mean_percentage', width=width, fill_color=stain_color, line_color='black', legend_label="NeuN")
p.vbar(source=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'], x=bokeh.transform.dodge('region', dodge, range=p.x_range), 
       top='mean_percentage', width=width, fill_color=stain_color, line_color='black', legend_label="s100β")

source_error_s100 = bokeh.models.ColumnDataSource(data=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'])
source_error_neun = bokeh.models.ColumnDataSource(data=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'])

w_s100 = bokeh.models.Whisker(source=source_error_s100, base=bokeh.transform.dodge('region', dodge, range=p.x_range), 
                              upper='upper', lower='lower', level='overlay', line_width=error_line_width)
w_neun = bokeh.models.Whisker(source=source_error_neun, base=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
                            upper='upper', lower='lower', level='overlay', line_width=error_line_width)

w_s100.upper_head.line_width=error_line_width
w_s100.upper_head.size=error_size
w_s100.lower_head.line_width=error_line_width
w_s100.lower_head.size=error_size

w_neun.upper_head.line_width=error_line_width
w_neun.upper_head.size=error_size
w_neun.lower_head.line_width=error_line_width
w_neun.lower_head.size=error_size

p.add_layout(w_s100)
p.add_layout(w_neun)
p.y_range = bokeh.models.Range1d(0, 110)

p.scatter(x=bokeh.transform.dodge('cats', dodge, range=p.x_range), y='percent positive', 
          source=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', line_width=0.25, legend_field="animal id")

p.scatter(x=bokeh.transform.dodge('cats', -dodge, range=p.x_range), y='percent positive', 
          source=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', line_width=0.25)

bokeh.io.show(p)

## Figure 3e

In [35]:
df_imaging_quant = pd.read_csv("raw-data/fig3e.csv")
df_imaging_quant["percent positive"] = df_imaging_quant["double pos"] / df_imaging_quant["reporter pos"] *100

df_mean = df_imaging_quant.groupby(["region", "variant", "marker"])[['percent positive']].agg(mean_percentage=("percent positive", "mean"), std_percentage=("percent positive", "std"), n=("percent positive", "count")).reset_index()

df_imaging_quant = pd.merge(df_imaging_quant, df_mean, on=["region", "variant", "marker"])
df_imaging_quant['error'] = df_imaging_quant['std_percentage'] / df_imaging_quant['n'].apply(np.sqrt)

df_imaging_quant['upper'] = df_imaging_quant.apply(lambda x: (x['mean_percentage'] + x['error'] if x['n'] > 2 else -1), axis=1) 
df_imaging_quant['lower'] = df_imaging_quant.apply(lambda x: (x['mean_percentage'] - x['error'] if x['n'] > 2 else -1), axis=1) 

rng = np.random.default_rng()
df_imaging_quant['jitter'] = rng.normal(0, 0.05, len(df_imaging_quant)) 
df_imaging_quant['cats'] = df_imaging_quant.apply(lambda x: (x['region'], x['jitter']), axis=1)
df_imaging_quant.head()

Unnamed: 0,file,variant,animal id,region,marker,reporter pos,double pos,marker pos,percent positive,mean_percentage,std_percentage,n,error,upper,lower,jitter,cats
0,C002.7.14a_Cd,AAV9,C002,Caudate,neun,18,7,874,38.888889,29.925554,6.953591,9,2.317864,32.243418,27.60769,-0.011045,"(Caudate, -0.011045447858439084)"
1,C002.7.15a_Cd,AAV9,C002,Caudate,neun,26,4,927,15.384615,29.925554,6.953591,9,2.317864,32.243418,27.60769,-0.025929,"(Caudate, -0.02592916624511145)"
2,C002.7.16a_Cd,AAV9,C002,Caudate,neun,16,5,2678,31.25,29.925554,6.953591,9,2.317864,32.243418,27.60769,-0.10249,"(Caudate, -0.10249006071868405)"
3,C016.7.2a_Cd,AAV9,C016,Caudate,neun,51,14,783,27.45098,29.925554,6.953591,9,2.317864,32.243418,27.60769,0.03368,"(Caudate, 0.03368034735773055)"
4,C016.7.2b_Cd,AAV9,C016,Caudate,neun,43,14,873,32.55814,29.925554,6.953591,9,2.317864,32.243418,27.60769,-0.081886,"(Caudate, -0.08188575463790088)"


In [53]:
animal_id = ['C002', 'C016', 'C017', 'C010']
markers = ['circle', 'triangle', 'square', 'diamond'] 
marker_color=list(bokeh.palettes.Colorblind[6][2:])

############### PLOT SETTINGS ###################
figure_width = 400
figure_height = 300
width = 0.35
dodge = width/2

marker_size = 5
error_size=4
error_line_width=0.5
############### PLOT SETTINGS ###################

p = bokeh.plotting.figure(x_range=x_range_brain, height=figure_height, width=figure_width)

############### PLOT SETTINGS ###################
p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=45
p.axis.major_tick_in = 0
p.axis.major_label_text_color = "#000000"
p.axis.major_label_text_font_size = "7pt"
p.axis.major_label_standoff = 0
p.axis.axis_label_text_align = "right"
p.add_layout(bokeh.models.Legend(), "right")
############### PLOT SETTINGS ###################

p.vbar(source=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'], x=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
       top='mean_percentage', width=width, fill_color=stain_color, line_color='black', legend_label="NeuN")
p.vbar(source=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'], x=bokeh.transform.dodge('region', dodge, range=p.x_range), 
       top='mean_percentage', width=width, fill_color=stain_color, line_color='black', legend_label="s100β")

source_error_s100 = bokeh.models.ColumnDataSource(data=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'])
source_error_neun = bokeh.models.ColumnDataSource(data=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'])

w_s100 = bokeh.models.Whisker(source=source_error_s100, base=bokeh.transform.dodge('region', dodge, range=p.x_range), 
                              upper='upper', lower='lower', level='overlay', line_width=error_line_width)
w_neun = bokeh.models.Whisker(source=source_error_neun, base=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
                            upper='upper', lower='lower', level='overlay', line_width=error_line_width)

w_s100.upper_head.line_width=error_line_width
w_s100.upper_head.size=error_size
w_s100.lower_head.line_width=error_line_width
w_s100.lower_head.size=error_size

w_neun.upper_head.line_width=error_line_width
w_neun.upper_head.size=error_size
w_neun.lower_head.line_width=error_line_width
w_neun.lower_head.size=error_size

p.add_layout(w_s100)
p.add_layout(w_neun)
p.y_range = bokeh.models.Range1d(0, 110)

p.scatter(x=bokeh.transform.dodge('cats', dodge, range=p.x_range), y='percent positive', 
          source=df_imaging_quant.loc[df_imaging_quant['marker']=='s100'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', line_width=0.25, legend_field="animal id")

p.scatter(x=bokeh.transform.dodge('cats', -dodge, range=p.x_range), y='percent positive', 
          source=df_imaging_quant.loc[df_imaging_quant['marker']=='neun'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', line_width=0.25)

bokeh.io.show(p)

## Figure 3f

In [54]:
df_agm_biod = pd.read_csv("raw-data/fig3f.csv")

rng = np.random.default_rng()
df_agm_biod['jitter'] = rng.normal(0, 0.025, len(df_agm_biod)) 
df_agm_biod['cats'] = df_agm_biod.apply(lambda x: (x['region'], x['jitter']), axis=1)

df_agm_mean = df_agm_biod.groupby(['region', 'variant']).agg(mean_moi=("moi", "mean"), mean_vg=("vg/ug dna", "mean")).reset_index()
df_agm_biod = pd.merge(df_agm_biod, df_agm_mean, on=['region','variant'])
df_agm_biod.head()

Unnamed: 0,variant,animal id,tissue,region,moi,vg/ug dna,jitter,cats,mean_moi,mean_vg
0,AAV9,C002,Brain,Caudate,0.674905,112439.2194,0.021935,"(Caudate, 0.02193536434750775)",0.487271,81179.41114
1,AAV9,C016,Brain,Caudate,0.299637,49919.60288,-0.038968,"(Caudate, -0.038967729903750414)",0.487271,81179.41114
2,AAV9,C016,Brain,Cerebellum,0.085913,14313.10278,-0.017856,"(Cerebellum, -0.01785575308283403)",0.075749,12619.748875
3,AAV9,C002,Brain,Cerebellum,0.065585,10926.39497,0.019664,"(Cerebellum, 0.01966388646682602)",0.075749,12619.748875
4,AAV9,C002,Brain,Frontal lobe,0.915763,152566.1492,-0.000924,"(Frontal lobe, -0.0009244281607290817)",0.555924,92616.895735


In [69]:
brain = ['Frontal lobe', 'Parietal lobe', 'Temporal lobe', 'Occipital lobe', 
         'Hippocampus', 'Caudate' , 'Putamen', 'Thalamus' , 'Hypothalamus',
         'Medulla','Cerebellum']

x_range_brain = brain
variant_color = bokeh.transform.factor_cmap('variant', palette=["gray", "white"], factors=["AAV9", "CAP-Mac"])

animal_id = ['C002', 'C016', 'C017', 'C010']
markers = ['circle', 'triangle', 'square', 'diamond'] 
marker_color=list(bokeh.palettes.Colorblind[6][2:])

figure_width = 1000
figure_height = 400
width = 0.3
dodge = 0.15
marker_size = 4
error_size=5
error_line_width=0.25
marker_line_width=1

p = bokeh.plotting.figure(x_range=x_range_brain, height=figure_height, width=figure_width)


p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=45
p.axis.major_tick_in = 0
p.axis.major_label_text_font_size = "7pt"

p.vbar(source=df_agm_biod.loc[df_agm_biod['variant']=='AAV9'], x=bokeh.transform.dodge('region', -dodge, range=p.x_range), 
       top='mean_vg', width=width, fill_color=variant_color, line_color='black', legend_label="AAV9")
p.vbar(source=df_agm_biod.loc[df_agm_biod['variant']=='CAP-Mac'], x=bokeh.transform.dodge('region', dodge, range=p.x_range), 
       top='mean_vg', width=width, fill_color=variant_color, line_color='black', legend_label="CAP-Mac")

p.scatter(x=bokeh.transform.dodge('cats', -dodge, range=p.x_range), y='vg/ug dna', 
          source=df_agm_biod.loc[df_agm_biod['variant']=='AAV9'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black', legend_field="animal id")

p.scatter(x=bokeh.transform.dodge('cats', dodge, range=p.x_range), y='vg/ug dna', 
          source=df_agm_biod.loc[df_agm_biod['variant']=='CAP-Mac'], marker=bokeh.transform.factor_mark('animal id', markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap('animal id', marker_color, animal_id), line_color='black',  legend_field="animal id")


p.y_range = bokeh.models.Range1d(0, np.ceil(df_agm_biod['vg/ug dna'].max()*1.1))
bokeh.io.show(p)

ERROR:bokeh.core.validation.check:E-1006 (NON_MATCHING_DATA_SOURCES_ON_LEGEND_ITEM_RENDERERS): LegendItem.label is a field, but renderer data sources don't match: LegendItem(id='p63601', ...)
