In [1]:
import numpy as np
import scipy
import pandas as pd 
import bokeh.io
import bokeh.plotting
import scikit_posthocs as posthoc

bokeh.io.output_notebook()

# Extended Data Fig. 6: CAP-Mac tropism in adult common marmoset compared to AAV9.

In [2]:
df_marm = pd.read_csv("https://github.com/GradinaruLab/CAP-Mac/raw/main/raw-data/extended-data-fig6.csv")

# Calculate summary statistics
df_marm["points"] = df_marm["copies/microgram"]
df_plot = df_marm.groupby(["tissue", "variant"])[["points"]].agg(mean=("points", "mean"),
                                                                        std=("points", "std"),
                                                                        n=("points", "count")).reset_index()
df_plot["error"] = df_plot["std"] / df_plot["n"].apply(np.sqrt)
df_plot = pd.merge(df_marm[["tissue", "variant", "points"]], df_plot, on=["tissue", "variant"])

######## Make some changes for plotting neatness ############
# Note: we only calculate error bars on samples that have >2 measurements
df_plot["upper"] = df_plot.apply(lambda x: (x["mean"] + x["error"] if x["n"] > 2 else -1), axis=1) 
df_plot["lower"] = df_plot.apply(lambda x: (x["mean"] - x["error"] if x["n"] > 2 else -1), axis=1)
rng = np.random.default_rng()
df_plot["jitter"] = rng.normal(0, 0.05, len(df_plot)) 
df_plot["cats"] = df_plot.apply(lambda x: (x["variant"], x["jitter"]), axis=1)
######## Make some changes for plotting neatness ############

df_plot.head()

Unnamed: 0,tissue,variant,points,mean,std,n,error,upper,lower,jitter,cats
0,brain,AAV9,129982500.0,119892300.0,66387350.0,11,20016540.0,139908800.0,99875760.0,0.083891,"(AAV9, 0.08389086106702538)"
1,brain,AAV9,102710400.0,119892300.0,66387350.0,11,20016540.0,139908800.0,99875760.0,-0.015181,"(AAV9, -0.015181061307259467)"
2,brain,AAV9,103318200.0,119892300.0,66387350.0,11,20016540.0,139908800.0,99875760.0,0.187615,"(AAV9, 0.18761494441271737)"
3,brain,AAV9,101978600.0,119892300.0,66387350.0,11,20016540.0,139908800.0,99875760.0,0.02171,"(AAV9, 0.021710002258574876)"
4,brain,AAV9,124415100.0,119892300.0,66387350.0,11,20016540.0,139908800.0,99875760.0,-0.036149,"(AAV9, -0.03614895623519607)"


## DNA quantification in adult marmoset brain (intravenous delivery)

In [3]:
############### BOKEH FIGURE SETTINGS ###################
x_range = ["AAV9", "CAP-Mac"]

variant_color = bokeh.transform.factor_cmap("variant", palette=["gray", "white"], factors=["AAV9", "CAP-Mac"])

markers = ["circle_x", "plus"] 
marker_color = ["white", "black"]

figure_width = 500
figure_height = 300
width = .75
dodge = 0

marker_size = 5
error_size=10
error_line_width=1

p = bokeh.plotting.figure(x_range=x_range, height=figure_height, width=figure_width, title="DNA quantification in adult marmoset brain (intravenous delivery)")

p.xgrid.visible=False
p.axis.minor_tick_line_width=0
p.xaxis.major_label_orientation=45
p.axis.major_tick_in = 0
p.axis.major_label_text_color = "#000000"
p.axis.major_label_text_font_size="12pt"
p.axis.axis_label_text_align = "right"
p.add_layout(bokeh.models.Legend(), "right")
############### BOKEH FIGURE SETTINGS ###################

In [4]:
# Add bar and scatter plots
p.vbar(source=df_plot.loc[df_plot["variant"]=="AAV9"], x=bokeh.transform.dodge("variant", -dodge, range=p.x_range), 
       top="mean", width=width, fill_color=variant_color, line_color="black", legend_label="AAV9")
p.vbar(source=df_plot.loc[df_plot["variant"]=="CAP-Mac"], x=bokeh.transform.dodge("variant", dodge, range=p.x_range), 
       top="mean", width=width, fill_color=variant_color, line_color="black", legend_label="CAP-Mac")

p.scatter(x=bokeh.transform.dodge("cats", -dodge, range=p.x_range), y="points", 
          source=df_plot.loc[df_plot["variant"]=="AAV9"], marker="circle", alpha=0.65,
          size=marker_size, color="black", line_color="black", line_width=0)

p.scatter(x=bokeh.transform.dodge("cats", dodge, range=p.x_range), y="points", 
          source=df_plot.loc[df_plot["variant"]=="CAP-Mac"], marker="circle", alpha=0.65,
          size=marker_size, color="black", line_width=0)

############### ADD ERROR BARS ###################
source_error_AAV9 = bokeh.models.ColumnDataSource(data=df_plot.loc[df_plot["variant"]=="AAV9"])
source_error_C1 = bokeh.models.ColumnDataSource(data=df_plot.loc[df_plot["variant"]=="CAP-Mac"])

w_AAV9 = bokeh.models.Whisker(source=source_error_AAV9, base=bokeh.transform.dodge("variant", -dodge, range=p.x_range), 
                              upper="upper", lower="lower", level="overlay", line_width=error_line_width)
w_C1 = bokeh.models.Whisker(source=source_error_C1, base=bokeh.transform.dodge("variant", dodge, range=p.x_range), 
                            upper="upper", lower="lower", level="overlay", line_width=error_line_width)

w_AAV9.upper_head.line_width=error_line_width
w_AAV9.upper_head.size=error_size
w_AAV9.lower_head.line_width=error_line_width
w_AAV9.lower_head.size=error_size

w_C1.upper_head.line_width=error_line_width
w_C1.upper_head.size=error_size
w_C1.lower_head.line_width=error_line_width
w_C1.lower_head.size=error_size

p.add_layout(w_AAV9)
p.add_layout(w_C1)
############### ADD ERROR BARS ###################

p.y_range = bokeh.models.Range1d(0, np.max(np.max([df_plot["upper"], df_plot["points"]]))*1.1)
bokeh.io.show(p)

In [5]:
tissue = []
p_val = []

for _, group in df_marm.groupby("tissue"):
    a=group.loc[group["variant"]=="AAV9"]["copies/microgram"].to_numpy()
    b=group.loc[group["variant"]=="CAP-Mac"]["copies/microgram"].to_numpy()
    
    if len(a) > 2 and len(b)>2:
        tissue.append(group["tissue"].iloc[0])
        p_val.append(scipy.stats.ttest_ind(a, b, equal_var=False)[1])
    
df_marm_stats = pd.DataFrame({"tissue":tissue, "p value": p_val})
print("DNA: AAV9 vs. AAV.CAP-Mac, P = %.5f." %(df_marm_stats.loc[df_marm_stats["tissue"]=="brain", "p value"]))

DNA: AAV9 vs. AAV.CAP-Mac, P = 0.00981.
