# MeanCounts in my smFISH data validation set vs RNAseq FPKM

- Cells: BMD Macrophages
- Genotypes: WT, Rad21KO
- Timepoints: 0h, 2h, 8h, 10ng/ml LPS
- Replicates: 3
- Genes: 
 - Set1: Actb, Il12b, Stat1
 - Set2: Tnf, Cd40, Atf3

In [None]:
ENV["Code"] = "../../Code"
[push!(LOAD_PATH, string(ENV["Code"],"/", ii)) for ii in readdir(ENV["Code"])]
include("../Databases/smFISH_validation.jl")

using DataFrames
using Seaborn
using NoLongerProblems_Pandas
using NoLongerProblems
using CSV
using RCall
import Pandas
using PrettyPlotting
using NoLongerProblems_Pandas
using PyPlot
import Pandas, PyCall
using Statistics

# Calculate mean per sample and replicate
apply_func_per_sample_replicate = smFISH_validation.apply_func_per_sample_replicate
tb_set1 = smFISH_validation.apply_func_per_sample_replicate(smFISH_validation.smFISHdata("actb"), Statistics.mean, "MeanCounts")
tb_set2 = smFISH_validation.apply_func_per_sample_replicate(smFISH_validation.smFISHdata("atf3"), Statistics.mean, "MeanCounts")
tb = join_in_all_common_columns(tb_set1, tb_set2)
CSV.write("smFISH_MEAN.csv", tb)


# Add RNAseq FPKM to table
include("../Databases/Cuartero2018.jl")

fpkm = Cuartero2018.get_mean_FPKM();
tab = CSV.read("smFISH_MEAN.csv", DataFrames.DataFrame)
tab[!,:Sample_Gene] = tab[!,:Sample] .*"__".*tab[!,:Gene]

meantab = DataFrames.DataFrame(Sample_Gene = unique(tab[!,:Sample_Gene]))
meantab[!,:MeanCounts] = [mean(tab[tab[!,:Sample_Gene] .== ii, :MeanCounts]) for ii in meantab[!,:Sample_Gene]]
meantab[!,:Sample] = [split(ii, "__")[1] for ii in meantab[!,:Sample_Gene] ]
meantab[!,:Gene] = [split(ii, "__")[2] for ii in meantab[!,:Sample_Gene] ]

for name in names(fpkm)
    if startswith(string(name), "FL") 
      rename!(fpkm, Symbol(string(name))=>Symbol(replace(string(name), "FL"=> "Rad21KO")))
    end
end

fpkms = []
for ii in meantab[!,:Sample_Gene]
    samp = Symbol(split(ii, "__")[1])
    geneind = fpkm[!,:GeneSymbol].==split(ii, "__")[2]
     push!(fpkms, fpkm[geneind, samp][1])
end

meantab[!,:MeanFPKM] = fpkms
meantab[!,:log2MeanCounts] = log2.(meantab[!,:MeanCounts])
meantab[!,:log2FPKMplus1] = log2.(meantab[!,:MeanFPKM].+1)
CSV.write("smFISH_MEAN.csv", meantab)

# Plot Data

pd = Pandas.DataFrame(meantab)
figure(figsize = (3, 3))
py"""
import seaborn as sns
sns.regplot(data = $pd, x = "log2FPKMplus1", y = "log2MeanCounts", color = "grey")
sns.scatterplot(data = $pd, x = "log2FPKMplus1", y = "log2MeanCounts", hue = "Gene", s = 50, linewidth = 0)
"""
pretty_axes2()
legend_out_of_plot()
PrettyPlotting.savefigwithtext("log2FPKM_log2MeanCounts.svg")

### Correlation test (called from R)

In [None]:
R"""
tb <- $meantab
cor.test(tb$log2MeanCounts, tb$log2FPKM)
"""

### Correlation between replicates

In [None]:
new_df = DataFrames.DataFrame()

tab = CSV.read("smFISH_MEAN.csv", DataFrame)
tab[!,:Sample_Gene] = tab[!,:Sample] .*"__".*tab[!,:Gene]

rep1 = tab[tab[!,:Rep].== 1, [:Sample_Gene, :MeanCounts]]; rename!(rep1, :MeanCounts =>:Rep1)
rep2 = tab[tab[!,:Rep].== 2, [:Sample_Gene, :MeanCounts]]; rename!(rep2, :MeanCounts =>:Rep2)
rep3 = tab[tab[!,:Rep].== 3, [:Sample_Gene, :MeanCounts]]; rename!(rep3, :MeanCounts =>:Rep3)

reps = innerjoin(innerjoin(rep1, rep2, on =:Sample_Gene), rep3, on=:Sample_Gene)

reps[!,:Gene] = [split(ii, "__")[2] for ii in reps[!,:Sample_Gene] ]

set1 = ["Actb", "Il12b", "Stat1"]

exps1 = reps[[in(ii, set1) for ii in  reps[!,:Gene]], :]

exps2 = reps[[!in(ii, set1) for ii in  reps[!,:Gene]], :]

CSV.write("cormatset1.csv", exps1[!,["Rep1","Rep2","Rep3"]])
CSV.write("cormatset2.csv", exps2[!,["Rep1","Rep2","Rep3"]])


In [None]:
R"""
library(devtools)
library(ggplot2)
library(GGally)

set1<-read.csv("cormatset1.csv")

g<- ggcorr(data = set1,label= 1)

ggsave("cormatset1.svg", width =2.5, height = 2.5)
g
"""

In [None]:
R"""
set1<-read.csv("cormatset2.csv")

g<- ggcorr(data = set1,label= 1)

ggsave("cormatset2.svg", width =2.5, height = 2.5)
g
"""

In [None]:
import Pkg; Pkg.status()