# scRNAseq

In [None]:
ENV["Code"] = "../../Code"
for folder in readdir(ENV["Code"]); push!(LOAD_PATH, normpath(ENV["Code"], folder));end

using Seaborn, PyPlot, PyCall, DataFrames
using NoLongerProblems_Pandas, Pandas
using MultipleTesting, HypothesisTests 
using SingleCellExperiment
using CSV, ProgressMeter,PrettyPlotting, ScikitLearn
@sk_import linear_model: LogisticRegression

include("../Databases/Cuartero2018.jl")
include("../Databases/Bhatt2012.jl")
include("../Databases/MF_SingleCell.jl")

include("Load_scRNAseqData.jl")

sce = SingleCellExperiment.fit_mu_std_alpha(sce, splitdataby = :Sample, assay = "CPM")
sce = SingleCellExperiment.fit_single_cell_logistic_regression(sce, assay = "CPM")
bhattgenes = Bhatt2012.inducible_genes_figure3()[!,:GeneSymbol];
sce = SingleCellExperiment.Shalek2014_module_score(collect(bhattgenes), sce,fitparameter = "mu", modulescore_name = :BhattGenesScore, untreated_pattern = "UT",comparedtothissample = "WT", assay = "CPM")

In [7]:
sce = SingleCellExperiment.fit_mu_std_alpha(sce, splitdataby = :Sample, assay = "lnCPMplus1")

bhattgenes= DataFrames.DataFrame(
    "GeneID" => Bhatt2012.inducible_genes_figure3()[!,:GeneSymbol], 
    "Class"=>Bhatt2012.inducible_genes_figure3()[!,:Class]
    );
sceBhatt = innerjoin(sce.rowData, bhattgenes,on = :GeneID)

Unnamed: 0_level_0,GeneID,TotalCounts,TotalCells,FractionCells,WT_UT__mu,WT_UT__std,WT_UT__alpha
Unnamed: 0_level_1,String31,Float64,Int64,Float64,Any,Any,Any
1,Ccnd2,279.0,164,0.00889902,7.17715,0.604247,0.0376569
2,Slfn4,117.0,88,0.00477508,7.40551,0.651266,0.0334728
3,Lhx2,80.0,63,0.00341853,7.35961,0.312568,0.0083682
4,Mx1,552.0,260,0.0141082,7.49252,0.680389,0.0753138
5,Fap,75.0,64,0.00347279,7.51507,0.522559,0.0292887
6,Itga5,1022.0,513,0.0278366,7.34853,0.426283,0.217573
7,Icosl,149.0,84,0.00455803,7.31429,0.18186,0.0292887
8,Il12rb1,30.0,26,0.00141082,7.40171,0.326519,0.0209205
9,Mmp14,392.0,182,0.00987574,6.88524,1.12759,0.0083682
10,Ccl3,35750.0,1215,0.0659287,8.14453,0.700594,0.707113


In [8]:
println("ln CPM plus 1 in expressing cells")
println("WT UT vs Rad21KO UT")

t = HypothesisTests.SignedRankTest([ii for ii in sceBhatt[!,"WT_UT__mu"]], [ii for ii in sceBhatt[!,"RAD21_UT__mu"]])
println(t)
p1 = pvalue(t)


println("Fraction Expressing cells")
println("WT UT vs Rad21KO UT")

t = HypothesisTests.SignedRankTest([ii for ii in sceBhatt[!,"WT_UT__alpha"]], [ii for ii in sceBhatt[!,"RAD21_UT__alpha"]])
println(t)
p1a = pvalue(t)

ln CPM plus 1 in expressing cells
WT UT vs Rad21KO UT
Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.170879
    95% confidence interval: (0.1542, 0.1995)

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           <1e-37

Details:
    number of observations:      497
    Wilcoxon rank-sum statistic: 97837.0
    rank sums:                   [97837.0, 18566.0]
    adjustment for ties:         0.0
    normal approximation (μ, σ): (39635.5, 3059.53)

Fraction Expressing cells
WT UT vs Rad21KO UT
Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0155742
    95% confidence interval: (0.0196, 0.0297)

Test summary:
    out

5.046706485254621e-28

In [9]:
println("ln CPM plus 1 in expressing cells")
println("WT 2H vs Rad21KO 2H")

t = HypothesisTests.SignedRankTest([ii for ii in sceBhatt[!,"WT_2H__mu"]], [ii for ii in sceBhatt[!,"RAD21_2H__mu"]])
println(t)
p2 = pvalue(t)

println("Fraction Expressing cells")
println("WT 2H vs Rad21KO 2H")

t = HypothesisTests.SignedRankTest([ii for ii in sceBhatt[!,"WT_2H__alpha"]], [ii for ii in sceBhatt[!,"RAD21_2H__alpha"]])
println(t)
p2a = pvalue(t)




ln CPM plus 1 in expressing cells
WT 2H vs Rad21KO 2H
Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0330481
    95% confidence interval: (-0.0054, 0.0544)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.1074

Details:
    number of observations:      497
    Wilcoxon rank-sum statistic: 66770.0
    rank sums:                   [66770.0, 56486.0]
    adjustment for ties:         0.0
    normal approximation (μ, σ): (5142.0, 3193.66)

Fraction Expressing cells
WT 2H vs Rad21KO 2H
Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0376344
    95% confidence interval: (0.043, 0.0591)

Test summary:

2.2779482647126536e-39

In [10]:
println("ln CPM plus 1 in expressing cells")
println("WT 8H vs Rad21KO 8H")
using HypothesisTests
t = HypothesisTests.SignedRankTest([ii for ii in sceBhatt[!,"WT_8H__mu"]], [ii for ii in sceBhatt[!,"RAD21_8H__mu"]])
println(t)
p3 = pvalue(t)
println("Fraction Expressing cells")

println("WT 8H vs Rad21KO 8H")
using HypothesisTests
t = HypothesisTests.SignedRankTest([ii for ii in sceBhatt[!,"WT_8H__alpha"]], [ii for ii in sceBhatt[!,"RAD21_8H__alpha"]])
println(t)
p3a = pvalue(t)


ln CPM plus 1 in expressing cells
WT 8H vs Rad21KO 8H
Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.34752
    95% confidence interval: (0.3302, 0.384)

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           <1e-66

Details:
    number of observations:      497
    Wilcoxon rank-sum statistic: 116375.0
    rank sums:                   [116375.0, 6385.0]
    adjustment for ties:         0.0
    normal approximation (μ, σ): (54995.0, 3184.01)

Fraction Expressing cells
WT 8H vs Rad21KO 8H
Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0384241
    95% confidence interval: (0.049, 0.0738)

Test summary:
    outco

3.842026142569548e-27

In [11]:
figure(figsize = (7, 6))


subplot(1,2,2)
y = "ln(CPM + 1) \n cells with transcripts detected"
pd = Pandas.DataFrame(sort!(sceBhatt, :Class))

pd = Pandas.melt(pd, value_vars  = ["WT_UT__mu", "RAD21_UT__mu" ,"WT_2H__mu", "RAD21_2H__mu", "WT_8H__mu", "RAD21_8H__mu"], value_name = y, id_vars = ["GeneID","Class"] )
pd["Time after LPS (h)"] = [replace(replace(replace(split(ii, "_")[2], "UT" => 0), "2H" => 2), "8H" => 8) for ii in pd["variable"]]
pd["Genotype"] = [replace(split(ii, "_")[1], "RAD21" =>"Rad21KO") for ii in pd["variable"]]

Seaborn.boxplot(data = pd, y = y,x = "Time after LPS (h)", hue = "Genotype",showfliers = false,  palette = ["darkgray", "red"])
pretty_axes2()

hs = [8.2, 8.7, 9.0]
ps = adjust([p1,p2,p3], Bonferroni())

for ii in 1:length(hs)
plot([-0.25+ii-1, 0.25+ii-1], [hs[ii], hs[ii]], lw = 0.75, c= "black")
annotate("P = "*string(round(ps[ii], sigdigits = 3)),xy = [ii-1, hs[ii]+0.1], va = "center", ha = "center")
end
squareplot()

ax = gca()
for line in ax.get_lines()
    line.set_color("black")
end
subplot(1,2,1)



y = "Fraction of cells \n with transcripts detected"
pd = Pandas.DataFrame(sort!(sceBhatt, :Class))

pd = Pandas.melt(pd, value_vars  = ["WT_UT__alpha", "RAD21_UT__alpha" ,"WT_2H__alpha", "RAD21_2H__alpha", "WT_8H__alpha", "RAD21_8H__alpha"], value_name = y, id_vars = ["GeneID","Class"] )
pd["Time after LPS (h)"] = [replace(replace(replace(split(ii, "_")[2], "UT" => 0), "2H" => 2), "8H" => 8) for ii in pd["variable"]]
pd["Genotype"] = [replace(split(ii, "_")[1], "RAD21" =>"Rad21KO") for ii in pd["variable"]]

Seaborn.boxplot(data = pd, y = y,x = "Time after LPS (h)", hue = "Genotype",showfliers = false, palette = ["darkgray", "red"])
pretty_axes2()

hs = [0.6, 1.05, 1.05]
ps = adjust([p1a,p2a,p3a], Bonferroni())

for ii in 1:length(hs)
plot([-0.25+ii-1, 0.25+ii-1], [hs[ii], hs[ii]], lw = 0.75, c= "black")
annotate("P = "*string(round(ps[ii], sigdigits = 2)),xy = [ii-1, hs[ii]+0.05], va = "center", ha = "center")
end

legend_removal()

ylim(-0.05, 1.1)
squareplot()
ax = gca()
for line in ax.get_lines()
    line.set_color("black")
end
plt.tight_layout()
savefigwithtext("figures/scRNAseq_mu_alpha_bhattgenes_WTRad21KO.svg")



LoadError: UndefVarError: plot not defined

In [12]:
col_colors_dict = Dict(
    "WT_UT" => "#f0f0f0",
    "WT_2H" => "#bdbdbd",
    "WT_8H" => "#636363",
    "RAD21_UT" => "#fee0d2",
    "RAD21_2H" => "#fc9272",
    "RAD21_8H" => "#de2d26",
    "RAD21" => "red",
    "WT" => "darkgrey",
    0 => "#deebf7",
    2 => "#9ecae1",
    8 => "#3182bd",
    ) 


sce.colData[!,"LPS"] = [replace(replace(replace(split(ii, "_")[2], "UT" => 0), "2H" => 2), "8H" => 8) for ii in sce.colData[!,"Sample"]]


sce = sort_cells!(sce, cols = [ :LPS, :Genotype,:BhattGenesScore], rev = [false, true, false])
subsce = select_these_genes(bhattgenes[!,:GeneID], sce)

g = Seaborn.clustermap(subsce.assays["lnCPMplus1"], figsize = (7, 4),yticklabels=false,xticklabels=false, col_cluster = false, col_colors = [col_colors_dict[ii] for ii in subsce.colData[!,:Genotype]], cmap="coolwarm")

ax = g.ax_heatmap
ax.set_ylabel("LPS inducible genes")
ax.set_xlabel("Cells")
ax.tick_params(axis="both", which="both", length=0)




savefigwithtext("figures/scRNAseq_heatmap_bhattgenes_WTRad21KO.pdf")




LoadError: PyError ($(Expr(:escape, :(ccall(#= /Users/irr15/.julia/packages/PyCall/BD546/src/pyeval.jl:38 =# @pysym(:PyEval_EvalCode), PyPtr, (PyPtr, PyPtr, PyPtr), o, globals, locals))))) <class 'FileNotFoundError'>
FileNotFoundError(2, 'No such file or directory')
  File "/Users/irr15/.julia/packages/PyCall/BD546/src/pyeval.jl", line 8, in <module>
    get!(_namespaces, m) do
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/pyplot.py", line 859, in savefig
    res = fig.savefig(*args, **kwargs)
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/figure.py", line 2311, in savefig
    self.canvas.print_figure(fname, **kwargs)
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/backend_bases.py", line 2210, in print_figure
    result = print_method(
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/backend_bases.py", line 1639, in wrapper
    return func(*args, **kwargs)
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/backends/backend_pdf.py", line 2586, in print_pdf
    file = PdfFile(filename, metadata=metadata)
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/backends/backend_pdf.py", line 562, in __init__
    fh, opened = cbook.to_filehandle(filename, "wb", return_opened=True)
  File "/Users/irr15/.julia/conda/3/lib/python3.8/site-packages/matplotlib/cbook/__init__.py", line 408, in to_filehandle
    fh = open(fname, flag, encoding=encoding)
