# InducibleGenes down in Rad21KO and the enhancers in the same TAD


SUBSETS:
- Gene subset = 723 genes inducible in WT (RNAseq WT 2hLPS vs WT UT) that are downreguated in the Rad21KO after 2h LPS
    - 341 associate to an inducible enhancer in the same TAD.
- Enhancer subset = 1,203 intergenic otsuni enhancers upregulated by LPS (GROseq WT 1hLPS vs WT UT), 
    - 1087 of them are in the same TAD as an inducible gene.


In [1]:
ENV["Code"] = "../../Code"
for folder in readdir(ENV["Code"]); push!(LOAD_PATH, normpath(ENV["Code"], folder));end

using DataFrames,CSV,Seaborn,Statistics
using NoLongerProblems, PrettyPlotting, NoLongerProblems_Pandas
using PyCall, RCall
import Pandas 

include(ENV["Code"]*"/../Code_Paper/Databases/Cuartero2018.jl")
include(ENV["Code"]*"/../Code_Paper/Code/GenomicTracks.jl")
include("Functions.jl")

┌ Info: Installing pyqt package to avoid buggy tkagg backend.
└ @ PyPlot /Users/irene/.julia/packages/PyPlot/XaELc/src/init.jl:118


maketablesforanalysis (generic function with 1 method)

### Generate BED file wit WT otsuni intergenic inducible enhancers 

In [2]:
# Otsuni Enhancers
enhancers = Cuartero2018.otsuni_enhancers()
enhancers[!,:coordinates] = "chr".*string.(enhancers[!,:chr]) .* ":" .* string.(enhancers[!,:start]) .* "-" .* string.(enhancers[!,:end])
bed_writer_cols(enhancers, "../../Code_Paper/Databases/Cuartero2018/OtsuniEnhancers.bed", chr = :chr, start_ = :start, end_ = :end)

# GROseq Inducible Enhancers
gro_enh = Cuartero2018.get_DESEq_OTSUNIENH("WT1", "WT0")
# Intergenic
gro_enh = gro_enh[gro_enh[!,:Inter_Intragenic].== "Intergenic", :]
bed_writer_cols(gro_enh, "../../Code_Paper/Databases/Cuartero2018/Transcribed_OtsuniEnhancersWT.bed", chr = :chr, start_ = :start, end_ = :end);
# upregulated after LPS
gro_enh = gro_enh[gro_enh[!,:log2FoldChange_Enh].>0, :]
# DE after LPS
gro_enh = gro_enh[gro_enh[!,:padj_Enh].<0.05, :]
bed_writer_cols(gro_enh, "../../Code_Paper/Databases/Cuartero2018/IntergenicGROseqInducible_OtsuniEnhancersWT.bed", chr = :chr, start_ = :start, end_ = :end);
gro_enh_wtKO = Cuartero2018.get_DESEq_OTSUNIENH("FL1", "WT1")

test = innerjoin(gro_enh[:, [:coordinates]], gro_enh_wtKO, on = :coordinates); n = nrow(test)

testde = test[test[!,:padj_Enh].< 0.05, :]; nde = nrow(testde)
testdeup = testde[testde[!,:log2FoldChange_Enh].> 0, :] ; nup = nrow(testdeup)
testdown = testde[testde[!,:log2FoldChange_Enh].< 0, :] ; ndown = nrow(testdown)

bed_writer_cols(testde, "../../Code_Paper/Databases/Cuartero2018/IntergenicGROseqInducible_OtsuniEnhancers_DE_Rad21KO1HLPS.bed", chr = :chr, start_ = :start, end_ = :end)
bed_writer_cols(testdown, "../../Code_Paper/Databases/Cuartero2018/IntergenicGROseqInducible_OtsuniEnhancers_down_Rad21KO1HLPS.bed", chr = :chr, start_ = :start, end_ = :end)
bed_writer_cols(testdeup, "../../Code_Paper/Databases/Cuartero2018/IntergenicGROseqInducible_OtsuniEnhancers_up_Rad21KO1HLPS.bed", chr = :chr, start_ = :start, end_ = :end)

println("""
    enhancers considered (Rad21KO 1hLPS vs WT): $n
\t    n DE = $nde
\t    n up = $nup
\t    n down = $ndown
    """)

    enhancers considered (Rad21KO 1hLPS vs WT): 1112
	    n DE = 67
	    n up = 3
	    n down = 64
    


### Generate Gene Sets

In [3]:
rnawtko = Cuartero2018.Cuartero2018Deseq(sample1 = "FL2",sample2 = "WT2")
rnawtko = rnawtko[(rnawtko[!,:log2FoldChange].<0).*(rnawtko[!,:padj].<0.05), :]
rnawt = Cuartero2018.Cuartero2018Deseq(sample1 = "WT2",sample2 = "WTUT")
rnawt = rnawt[(rnawt[!,:log2FoldChange].>0).*(rnawt[!,:padj].<0.05), :]
geneset = intersect(rnawt[:, :GeneSymbol], rnawtko[:, :GeneSymbol])

# Generate RNAseq tables
rnawt = Cuartero2018.Cuartero2018Deseq(sample1 = "WT2",sample2 = "WTUT")
    rename!(rnawt, :log2FoldChange => :log2FoldChange_Gene)
    rename!(rnawt, :padj => :padj_Gene)

rnako = Cuartero2018.Cuartero2018Deseq(sample1 = "FL2",sample2 = "FLUT")
    rename!(rnako, :log2FoldChange => :log2FoldChange_Gene)
    rename!(rnako, :padj => :padj_Gene)

# Generate enhancer tables, make sure we are considering the same enhancers
gro_enhWT = gro_enh
gro_enhRad21KO = Cuartero2018.get_DESEq_OTSUNIENH("FL1", "FL0")
gro_enhRad21KO = innerjoin(gro_enhRad21KO, gro_enhWT[!,[:coordinates]], on = :coordinates);


### Make tables for figures

In [4]:
gro_gene = rnawt
gro_enh = gro_enhWT
filename = "GeneRNAseqEnhGROseqlog2FC_WT.csv"
genotype = "WT"

tbwt = maketablesforanalysis(gro_gene, gro_enh, filename, genotype)


gro_gene = rnako
gro_enh = gro_enhRad21KO
filename = "GeneRNAseqEnhGROseqlog2FC_Rad21KO.csv"
genotype = "Rad21KO"

tbko = maketablesforanalysis(gro_gene, gro_enh, filename, genotype);

LoadError: UndefVarError: ProcessedData_mm9 not defined



### Analysis using the mean Enhlog2FoldChange per gene 

In [5]:
tb = tbwt
f = mean
genes = unique(tb[!,:GeneSymbol])
FC_gene = [f(tb[tb[!,:GeneSymbol].==ii, :Genelog2FoldChange]) for ii in genes]
FC_enh = [f(tb[tb[!,:GeneSymbol].==ii, :Enhlog2FoldChange]) for ii in genes]

x = FC_gene
y = FC_enh

using RCall
println("WT correlation test (mean Enhlog2FoldChange per gene)")
println(R"""
cor.test($x, $y,)
""")

wttb = DataFrames.DataFrame(
    GeneSymbol = genes,
    Genelog2FoldChange = FC_gene,
    Enhlog2FoldChange = FC_enh,
    Genotype = ["WT" for ii in FC_enh],
);

tb = tbko
genes = unique(tb[!,:GeneSymbol])
FC_gene = [f(tb[tb[!,:GeneSymbol].==ii, :Genelog2FoldChange]) for ii in genes]
FC_enh = [f(tb[tb[!,:GeneSymbol].==ii, :Enhlog2FoldChange]) for ii in genes]

x = FC_gene
y = FC_enh

println("Rad21KO correlation test (mean Enhlog2FoldChange per gene)")
println(R"""
cor.test($x, $y,)
""")

kotb = DataFrames.DataFrame(
    GeneSymbol = genes,
    Genelog2FoldChange = FC_gene,
    Enhlog2FoldChange = FC_enh,
    Genotype = ["Rad21KO" for ii in FC_enh],
);




LoadError: UndefVarError: tbwt not defined

In [6]:
t = vcat(wttb, kotb)
println("2-way ANOVA (Genelog2FoldChange~Enhlog2FoldChange*Genotype+GeneSymbol)")
println(R"""
t = $t
aov.s = lm(Genelog2FoldChange~Enhlog2FoldChange*Genotype+GeneSymbol, data = t)
summary(aov(aov.s))
""")

LoadError: UndefVarError: wttb not defined

In [7]:
import Pandas
using NoLongerProblems_Pandas

pd = Pandas.DataFrame(t)
Seaborn.lmplot(data = pd, x = "Enhlog2FoldChange", y = "Genelog2FoldChange", hue = "Genotype", palette = ["gray", "red"], 
    scatter_kws = Dict("s"=>5))
ylim(0, 6)
xlim(0, 6)
squareplot()
savefigwithtext("GeneFCvsEnhFC_GenevsMeanTADEnh.svg")

LoadError: UndefVarError: t not defined

### Analysis using all Enhlog2FoldChange per gene 
(unique gene-enh pairs, but their values duplicate if there are several genes or several enhancers in a TAD)

In [8]:
tb = tbwt
tb = tb[.!isnan.(tb[!,:Genelog2FoldChange]), :]
x = tb[!,:Genelog2FoldChange]
y = tb[!,:Enhlog2FoldChange]

using RCall

println("WT correlation test with duplicated values but unique pairs")
println(R"""
cor.test($x, $y,)
""")

tb = tbko
tb = tb[.!isnan.(tb[!,:Genelog2FoldChange]), :]
x = tb[!,:Genelog2FoldChange]
y = tb[!,:Enhlog2FoldChange]

using RCall

println("Rad21KO correlation test with duplicated values but unique pairs")
println(R"""
cor.test($x, $y,)
""")

LoadError: UndefVarError: tbwt not defined

In [9]:
t = vcat(tbwt, tbko, )

println("2-way ANOVA (Genelog2FoldChange~Enhlog2FoldChange*Genotype+GeneSymbol)")
println(R"""
t = $t
aov.s = lm(Genelog2FoldChange~Enhlog2FoldChange*Genotype+GeneSymbol, data = t)
summary(aov(aov.s))
""")

LoadError: UndefVarError: tbwt not defined

In [10]:
import Pandas
using NoLongerProblems_Pandas


pd = Pandas.DataFrame(t)
Seaborn.lmplot(data = pd, x = "Enhlog2FoldChange", y = "Genelog2FoldChange", hue = "Genotype", palette = ["gray", "red"], 
    scatter_kws = Dict("s"=>5))
ylim(0, 6)
xlim(0, 6)
squareplot()

LoadError: UndefVarError: t not defined

In [11]:
import Pkg; Pkg.status()

[32m[1m      Status[22m[39m `~/.julia/environments/v1.6/Project.toml`
 [90m [336ed68f] [39m[37mCSV v0.10.2[39m
 [90m [a93c6f00] [39m[37mDataFrames v1.3.2[39m
 [90m [31c24e10] [39m[37mDistributions v0.25.48[39m
 [90m [c2308a5c] [39m[37mFASTX v1.2.0[39m
 [90m [53c48c17] [39m[37mFixedPointNumbers v0.8.4[39m
 [90m [09f84164] [39m[37mHypothesisTests v0.10.6[39m
 [90m [7073ff75] [39m[37mIJulia v1.23.2[39m
 [90m [6a3955dd] [39m[37mImageFiltering v0.7.1[39m
 [90m [6218d12a] [39m[37mImageMagick v1.2.2[39m
 [90m [86fae568] [39m[37mImageView v0.10.15[39m
 [90m [916415d5] [39m[37mImages v0.25.1[39m
 [90m [96684042] [39m[37mInformationMeasures v0.3.1[39m
 [90m [4138dd39] [39m[37mJLD v0.13.1[39m
 [90m [093fc24a] [39m[37mLightGraphs v1.3.5[39m
 [90m [10e44e05] [39m[37mMATLAB v0.8.2[39m
 [90m [f8716d33] [39m[37mMultipleTesting v0.5.0[39m
 [90m [eadc2687] [39m[37mPandas v1.5.3[39m
 [90m [91a5bcdd] [39m[37mPlots v1.25.8[39m
 [9