In [1]:
import numpy as np
import scipy
import pandas as pd 
import bokeh.io
import bokeh.plotting
import scikit_posthocs as posthoc

bokeh.io.output_notebook()

# Extended Data Figure 3: CAG-XFP expression in non-brain tissue of Old World primates treated with AAV.

In [2]:
df_agm_biod = pd.read_csv("https://github.com/GradinaruLab/CAP-Mac/raw/main/raw-data/extended-data-fig3a.csv")

# Caclulate summary statistics
df_agm_mean = df_agm_biod.groupby(["tissue", "variant"]).agg(mean_vg=("vg/ug dna", "mean"),
                                                             error=("vg/ug dna","std"),
                                                             n=("vg/ug dna","count")).reset_index()
df_tissue_biod = pd.merge(df_agm_biod, df_agm_mean, on=["tissue","variant"])
df_tissue_biod["error"] = df_tissue_biod["error"] / df_tissue_biod["n"].apply(np.sqrt)

######## Make some changes for plotting neatness ############
# Note: we only calculate error bars on samples that have >2 measurements               
df_tissue_biod["upper"] = df_tissue_biod.apply(lambda x: (x["mean_vg"] + x["error"] if x["n"] > 2 else -1), axis=1) 
df_tissue_biod["lower"] = df_tissue_biod.apply(lambda x: (x["mean_vg"] - x["error"] if x["n"] > 2 else -1), axis=1) 
rng = np.random.default_rng()
df_tissue_biod["jitter"] = rng.normal(0, 0.025, len(df_agm_biod)) 
df_tissue_biod["cats"] = df_tissue_biod.apply(lambda x: (x["tissue"], x["jitter"]), axis=1)
######## Make some changes for plotting neatness ############

df_tissue_biod.head()

Unnamed: 0,variant,animal id,tissue,region,moi,vg/ug dna,mean_vg,error,n,upper,lower,jitter,cats
0,AAV9,C002,Brain,Caudate,0.674905,112439.2194,54145.527911,7421.540812,22,61567.068723,46723.987099,0.002734,"(Brain, 0.002734087304108627)"
1,AAV9,C016,Brain,Caudate,0.299637,49919.60288,54145.527911,7421.540812,22,61567.068723,46723.987099,0.017431,"(Brain, 0.017430759457662325)"
2,AAV9,C016,Brain,Cerebellum,0.085913,14313.10278,54145.527911,7421.540812,22,61567.068723,46723.987099,0.002061,"(Brain, 0.002061191800785939)"
3,AAV9,C002,Brain,Cerebellum,0.065585,10926.39497,54145.527911,7421.540812,22,61567.068723,46723.987099,-0.006637,"(Brain, -0.006636523884852554)"
4,AAV9,C002,Brain,Frontal lobe,0.915763,152566.1492,54145.527911,7421.540812,22,61567.068723,46723.987099,-0.041755,"(Brain, -0.04175467668306986)"


In [10]:
############### BOKEH FIGURE SETTINGS ###################
x_range = ["Brain", "Spinal cord", "DRG", "Kidney", "Heart", "Skeletal muscle"]

variant_color = bokeh.transform.factor_cmap("variant", palette=["gray", "white"], factors=["AAV9", "CAP-Mac"])

animal_id = ["C002", "C016", "C017", "C010"]
markers = ["circle", "triangle", "square", "diamond"] 
marker_color=list(bokeh.palettes.Colorblind[6][2:])

figure_width = 700
figure_height = 350
bar_width = .35
bar_fill_color="gray"
jitter_width = bar_width/2
marker_size = 10
bar_line_width = 1

error_size=10
error_line_width=1
marker_line_width=1

p = bokeh.plotting.figure(x_range=bokeh.models.FactorRange(*x_range), 
                          title="DNA biodistibution in 8-month old green monkey (intravenous delivery)", 
                          height=figure_height, width=figure_width)

p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=np.pi/4
p.axis.major_tick_in = 0
p.axis.major_label_text_color="#000000"
p.axis.major_label_text_font_size="12pt"
p.xaxis.group_text_font_size="12pt"
p.xaxis.group_text_color="#000000"
p.axis.major_label_standoff=5
p.add_layout(bokeh.models.Legend(), "right")
############### BOKEH FIGURE SETTINGS ###################

In [11]:
# Add bar and scatter plots
p.vbar(source=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"], x=bokeh.transform.dodge("tissue", -jitter_width, range=p.x_range), 
       top="mean_vg", bottom=1e-6, width=bar_width, fill_color=variant_color, line_color="black", legend_label="AAV9")
p.vbar(source=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-Mac"], x=bokeh.transform.dodge("tissue", jitter_width, range=p.x_range), 
       top="mean_vg", bottom=1e-6, width=bar_width, fill_color=variant_color, line_color="black", legend_label="CAP-Mac")


p.scatter(x=bokeh.transform.dodge("cats", -jitter_width, range=p.x_range), y="vg/ug dna", 
          source=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"], marker=bokeh.transform.factor_mark("animal id", markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap("animal id", marker_color, animal_id), line_color="black", line_width=marker_line_width, legend_group="animal id")

p.scatter(x=bokeh.transform.dodge("cats", jitter_width, range=p.x_range), y="vg/ug dna", 
          source=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-Mac"], marker=bokeh.transform.factor_mark("animal id", markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap("animal id", marker_color, animal_id), line_color="black", line_width=marker_line_width, legend_group="animal id")

############### ADD ERROR BARS ###################
source_error_AAV9 = bokeh.models.ColumnDataSource(data=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"])
source_error_C1 = bokeh.models.ColumnDataSource(data=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-C1"])

w_AAV9 = bokeh.models.Whisker(source=source_error_AAV9, base=bokeh.transform.dodge("tissue", -jitter_width, range=p.x_range), 
                              upper="upper", lower="lower", level="overlay", line_width=error_line_width)
w_C1 = bokeh.models.Whisker(source=source_error_C1, base=bokeh.transform.dodge("tissue", jitter_width, range=p.x_range), 
                            upper="upper", lower="lower", level="overlay", line_width=error_line_width)

w_AAV9.upper_head.line_width=error_line_width
w_AAV9.upper_head.size=error_size
w_AAV9.lower_head.line_width=error_line_width
w_AAV9.lower_head.size=error_size

w_C1.upper_head.line_width=error_line_width
w_C1.upper_head.size=error_size
w_C1.lower_head.line_width=error_line_width
w_C1.lower_head.size=error_size

p.add_layout(w_AAV9)
p.add_layout(w_C1)
############### ADD ERROR BARS ###################

y_max = np.ceil(df_tissue_biod.loc[df_tissue_biod["tissue"].isin(x_range)]["vg/ug dna"].max()*1.10)
p.y_range = bokeh.models.Range1d(1, y_max)
bokeh.io.show(p)

In [5]:
############### BOKEH FIGURE SETTINGS ###################
x_range = ["Liver", "Adrenal gland"]
variant_color = bokeh.transform.factor_cmap("variant", palette=["gray", "white"], factors=["AAV9", "CAP-Mac"])

animal_id = ["C002", "C016", "C017", "C010"]
markers = ["circle", "triangle", "square", "diamond"] 
marker_color=list(bokeh.palettes.Colorblind[6][2:])

figure_width = 700
figure_height = 350
bar_width = .35
bar_fill_color="gray"
jitter_width = bar_width/2
marker_size = 10
bar_line_width = 1

error_size=5
error_line_width=.25
marker_line_width=1

p = bokeh.plotting.figure(x_range=bokeh.models.FactorRange(*x_range),
                          title="DNA biodistibution in 8-month old green monkey (intravenous delivery)", 
                          height=figure_height, width=figure_width)

p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=np.pi/4
p.axis.major_tick_in = 0
p.axis.major_label_text_color="#000000"
p.axis.major_label_text_font_size="12pt"
p.xaxis.group_text_font_size="12pt"
p.xaxis.group_text_color="#000000"
p.axis.major_label_standoff=5
p.add_layout(bokeh.models.Legend(), "right")
############### BOKEH FIGURE SETTINGS ###################

In [6]:
# Add bar and scatter plots
p.vbar(source=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"], x=bokeh.transform.dodge("tissue", -jitter_width, range=p.x_range), 
       top="mean_vg", bottom=1e-6, width=bar_width, fill_color=variant_color, line_color="black", legend_label="AAV9")
p.vbar(source=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-Mac"], x=bokeh.transform.dodge("tissue", jitter_width, range=p.x_range), 
       top="mean_vg", bottom=1e-6, width=bar_width, fill_color=variant_color, line_color="black", legend_label="CAP-Mac")

p.scatter(x=bokeh.transform.dodge("cats", -jitter_width, range=p.x_range), y="vg/ug dna", 
          source=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"], marker=bokeh.transform.factor_mark("animal id", markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap("animal id", marker_color, animal_id), line_color="black", line_width=marker_line_width, legend_group="animal id")

p.scatter(x=bokeh.transform.dodge("cats", jitter_width, range=p.x_range), y="vg/ug dna", 
          source=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-Mac"], marker=bokeh.transform.factor_mark("animal id", markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap("animal id", marker_color, animal_id), line_color="black", line_width=marker_line_width, legend_group="animal id")

############### ADD ERROR BARS ###################
source_error_AAV9 = bokeh.models.ColumnDataSource(data=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"])
source_error_C1 = bokeh.models.ColumnDataSource(data=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-C1"])


w_AAV9 = bokeh.models.Whisker(source=source_error_AAV9, base=bokeh.transform.dodge("tissue", -jitter_width, range=p.x_range), 
                              upper="upper", lower="lower", level="overlay", line_width=error_line_width)
w_C1 = bokeh.models.Whisker(source=source_error_C1, base=bokeh.transform.dodge("tissue", jitter_width, range=p.x_range), 
                            upper="upper", lower="lower", level="overlay", line_width=error_line_width)

w_AAV9.upper_head.line_width=error_line_width
w_AAV9.upper_head.size=error_size
w_AAV9.lower_head.line_width=error_line_width
w_AAV9.lower_head.size=error_size

w_C1.upper_head.line_width=error_line_width
w_C1.upper_head.size=error_size
w_C1.lower_head.line_width=error_line_width
w_C1.lower_head.size=error_size

p.add_layout(w_AAV9)
p.add_layout(w_C1)
############### ADD ERROR BARS ###################

y_max = np.ceil(df_tissue_biod.loc[df_tissue_biod["tissue"].isin(x_range)]["vg/ug dna"].max()*1.10)
p.y_range = bokeh.models.Range1d(1, y_max)

bokeh.io.show(p)

In [7]:
############### BOKEH FIGURE SETTINGS ###################
x_range = ["Lung"]
variant_color = bokeh.transform.factor_cmap("variant", palette=["gray", "white"], factors=["AAV9", "CAP-Mac"])

animal_id = ["C002", "C016", "C017", "C010"]
markers = ["circle", "triangle", "square", "diamond"] 
marker_color=list(bokeh.palettes.Colorblind[6][2:])

figure_width = 700
figure_height = 350
bar_width = .35
bar_fill_color="gray"
jitter_width = bar_width/2
marker_size = 10
bar_line_width = 1

error_size=5
error_line_width=.25
marker_line_width=1

p = bokeh.plotting.figure(x_range=bokeh.models.FactorRange(*x_range), title="DNA biodistibution in 8-month old green monkey (intravenous delivery)", 
                          height=figure_height, width=figure_width)

p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=np.pi/4
p.axis.major_tick_in = 0
p.axis.major_label_text_color="#000000"
p.axis.major_label_text_font_size="12pt"
p.xaxis.group_text_font_size="12pt"
p.xaxis.group_text_color="#000000"
p.axis.major_label_standoff=5
p.add_layout(bokeh.models.Legend(), "right")
############### BOKEH FIGURE SETTINGS ###################

In [8]:
# Add bar and scatter plots
p.vbar(source=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"], x=bokeh.transform.dodge("tissue", -jitter_width, range=p.x_range), 
       top="mean_vg", bottom=1e-6, width=bar_width, fill_color=variant_color, line_color="black", legend_label="AAV9")
p.vbar(source=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-Mac"], x=bokeh.transform.dodge("tissue", jitter_width, range=p.x_range), 
       top="mean_vg", bottom=1e-6, width=bar_width, fill_color=variant_color, line_color="black", legend_label="CAP-Mac")

p.scatter(x=bokeh.transform.dodge("cats", -jitter_width, range=p.x_range), y="vg/ug dna", 
          source=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"], marker=bokeh.transform.factor_mark("animal id", markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap("animal id", marker_color, animal_id), line_color="black", line_width=marker_line_width, legend_group="animal id")

p.scatter(x=bokeh.transform.dodge("cats", jitter_width, range=p.x_range), y="vg/ug dna", 
          source=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-Mac"], marker=bokeh.transform.factor_mark("animal id", markers, animal_id), size=marker_size,
          color=bokeh.transform.factor_cmap("animal id", marker_color, animal_id), line_color="black", line_width=marker_line_width, legend_group="animal id")

############### ADD ERROR BARS ###################
source_error_AAV9 = bokeh.models.ColumnDataSource(data=df_tissue_biod.loc[df_tissue_biod["variant"]=="AAV9"])
source_error_C1 = bokeh.models.ColumnDataSource(data=df_tissue_biod.loc[df_tissue_biod["variant"]=="CAP-C1"])


w_AAV9 = bokeh.models.Whisker(source=source_error_AAV9, base=bokeh.transform.dodge("tissue", -jitter_width, range=p.x_range), 
                              upper="upper", lower="lower", level="overlay", line_width=error_line_width)
w_C1 = bokeh.models.Whisker(source=source_error_C1, base=bokeh.transform.dodge("tissue", jitter_width, range=p.x_range), 
                            upper="upper", lower="lower", level="overlay", line_width=error_line_width)

w_AAV9.upper_head.line_width=error_line_width
w_AAV9.upper_head.size=error_size
w_AAV9.lower_head.line_width=error_line_width
w_AAV9.lower_head.size=error_size

w_C1.upper_head.line_width=error_line_width
w_C1.upper_head.size=error_size
w_C1.lower_head.line_width=error_line_width
w_C1.lower_head.size=error_size

p.add_layout(w_AAV9)
p.add_layout(w_C1)
############### ADD ERROR BARS ###################

y_max = np.ceil(df_tissue_biod.loc[df_tissue_biod["tissue"].isin(x_range)]["vg/ug dna"].max()*1.10)
p.y_range = bokeh.models.Range1d(1, y_max)

bokeh.io.show(p)

## Statistics for DNA tissue biodistribution

In [9]:
tissue = []
p_val = []

for _, group in df_tissue_biod.groupby("tissue"):
    a=group.loc[group["variant"]=="AAV9"]["vg/ug dna"].to_numpy()
    b=group.loc[group["variant"]=="CAP-Mac"]["vg/ug dna"].to_numpy()
    
    if len(a) > 2 and len(b)>2:
        tissue.append(group["tissue"].iloc[0])
        p_val.append(scipy.stats.ttest_ind(a, b, equal_var=False)[1])
    
df_biod_stats = pd.DataFrame({"tissue":tissue, "p value": p_val})

print("Brain: AAV9 vs. AAV.CAP-Mac, P = %.2e." %(df_biod_stats.loc[df_biod_stats["tissue"]=="Brain", "p value"]))
print("Spinal cord: AAV9 vs. AAV.CAP-Mac, P = %.2e." %(df_biod_stats.loc[df_biod_stats["tissue"]=="Spinal cord", "p value"]))
print("Dorsal root ganglia: AAV9 vs. AAV.CAP-Mac, P = %.2e." %(df_biod_stats.loc[df_biod_stats["tissue"]=="DRG", "p value"]))
print("Kidney: AAV9 vs. AAV.CAP-Mac, P = %.2e." %(df_biod_stats.loc[df_biod_stats["tissue"]=="Kidney", "p value"]))

Brain: AAV9 vs. AAV.CAP-Mac, P = 2.29e-05.
Spinal cord: AAV9 vs. AAV.CAP-Mac, P = 2.46e-01.
Dorsal root ganglia: AAV9 vs. AAV.CAP-Mac, P = 5.56e-01.
Kidney: AAV9 vs. AAV.CAP-Mac, P = 2.30e-02.
