# 1_Measure_rawintensities_Sertad2_exonchannel


In [1]:
include("../../Code/TSSs/src/TSSs.jl")
using ProgressMeter
using DataFrames
using Distances
using CSV
using RCall


ENV["Code"] = "../../Code"
for folder in readdir(ENV["Code"]); push!(LOAD_PATH, normpath(ENV["Code"], folder));end

using CSV
using NoLongerProblems_FileHandling
using NoLongerProblems
using NoLongerProblems_Pandas
using DataFrames
using FQfiles
using HypothesisTests
using MultipleTesting
using Seaborn
import Pandas

using PyPlot
using PrettyPlotting


function add_probetypes(e)
    col = Symbol("Probe (type)")
    type1 = []; type4 = []; type6 = []; 
    
    for ii in e[col]
        probes = split(replace(ii, " " => ""), ",")
        probes_pretty = [split(jj, "(")[1] for jj in probes]
        
        t1_ind = findall(x -> occursin("(1)", x), probes)
        t4_ind = findall(x -> occursin("(4)", x), probes) 
        t6_ind = findall(x -> occursin("(6)", x), probes) 
        
        t1 = if length(t1_ind) == 1 probes_pretty[t1_ind][1] else "NA" end; 
        t4 = if length(t4_ind) == 1 probes_pretty[t4_ind][1] else "NA" end; 
        t6 = if length(t6_ind) == 1 probes_pretty[t6_ind][1] else "NA" end; 
        
        push!(type1, t1); push!(type4, t4); push!(type6, t6); 
    end
    e[:type1] = type1; e[:type4] = type4; e[:type6] = type6; 
    e
    
end

function CellInfo(dir, exp_df)
    image_file = get_files_ending_with(dir, "Image.csv")
     cell_file = get_files_ending_with(dir, "TrueCells.csv")

    if .&(length(cell_file) == 1,  length(image_file) == 1) # Make sure that there is only one file with the image information
        image_file = normpath(dir, image_file[1])
        cell_file = normpath(dir, cell_file[1])
    else
        error("Image.csv or Cell file not found")
    end
    
    ima = DataFrames.DataFrame(CSV.read(image_file))[:, [:FileName_DAPI, :ImageNumber]]
    ima[:Image] = [split(split(ii, "_C1")[1], "_MAX")[1] for ii in ima[:FileName_DAPI]]
    
    ima[:WELL] = [split(ii, " ") for ii in ima[:Image]]
    ima[:WELL] = [1 for ii in ima[:WELL]]
    
     ima[:Well] = [split(split(ii, "S 0_")[2], "_X")[1] for ii in ima[:FileName_DAPI]]
    
    exp_df[:Well] = [split(ii, " (")[1] for ii in exp_df[:Well]]

    im_ = join(exp_df, ima, on = :Well, makeunique=true)
        

    
   
     cells = DataFrames.DataFrame(CSV.read(cell_file))
    cells = join(im_, cells, on = :ImageNumber)
    
    cells[:Image_Cell] = [cells[ii, :Image]*"__Cell_CP_"*string(cells[ii, :ObjectNumber]) for ii in 1:nrow(cells)]
    return dropmissing(cells)
    
end


function NuInfo(dir, exp_df)
    image_file = get_files_ending_with(dir, "Image.csv")
     cell_file = get_files_ending_with(dir, "TrueNuclei.csv")

    if .&(length(cell_file) == 1,  length(image_file) == 1) # Make sure that there is only one file with the image information
        image_file = normpath(dir, image_file[1])
        cell_file = normpath(dir, cell_file[1])
    else
        error("Image.csv or Cell file not found")
    end
    
    ima = DataFrames.DataFrame(CSV.read(image_file))[:, [:FileName_DAPI, :ImageNumber]]
    ima[:Image] = [split(split(ii, "_C1")[1], "_MAX")[1] for ii in ima[:FileName_DAPI]]
    
    ima[:WELL] = [split(ii, " ") for ii in ima[:Image]]
    ima[:WELL] = [1 for ii in ima[:WELL]]
    
     ima[:Well] = [split(split(ii, "S 0_")[2], "_X")[1] for ii in ima[:FileName_DAPI]]
    
    exp_df[:Well] = [split(ii, " (")[1] for ii in exp_df[:Well]]

    im_ = join(exp_df, ima, on = :Well, makeunique=true)
        

    
   
     cells = DataFrames.DataFrame(CSV.read(cell_file))
    cells = join(im_, cells, on = :ImageNumber)
    
    cells[:Image_Cell] = [cells[ii, :Image]*"__Cell_CP_"*string(cells[ii, :ObjectNumber]) for ii in 1:nrow(cells)]
    return dropmissing(cells)
    
end



function FQ_summary_MATURE(dir)
    file = get_files_containing(dir, "summary_MATURE")
    n = length(file)
    if n != 1
        error("ERROR: $n files found")
    end
    filename = string(dir, file[1])
    
     df = CSV.read(filename, delim = '\t', header = 5, skipto = 6)
    rename!(df, :CELL => :Cell)
    rename!(df, :FILE => :Image)
    
    df = FQfiles.fix_image_name(df)
    df = column_fusion(df, :Image, :Cell)

end

function cells_per_sample(df)
    sams = unique(df[:Name])
    ncells = [ count(x -> x == sam, df[:Name]) for sam in sams]
    new_df = DataFrames.DataFrame(:Sample=> sams, :N_Cells=> ncells)
end


function assign_red_green_labels(REP, max_green_for_red, min_red_for_red, max_red_for_green,  min_green_for_green, nrep)
    function DrawRectangles()
        topcorner = 1
        plot([max_green_for_red, max_green_for_red], [min_red_for_red, topcorner], c = "red")
        plot([0, 0], [min_red_for_red, topcorner], c = "red")
        plot([0, max_green_for_red], [min_red_for_red, min_red_for_red], c = "red")
        plot([0, max_green_for_red], [1, 1], c = "red")

        plot([min_green_for_green, 1], [max_red_for_green, max_red_for_green], c = "green")

        plot([1, 1], [0, max_red_for_green], c = "green")

        plot([min_green_for_green, min_green_for_green], [max_red_for_green, 0], c = "green")
        plot([min_green_for_green, 1], [0, 0], c = "green")
    end
    
    
    
    
    green = REP[:Intensity_UpperQuartileIntensity_FilteredGreen]
    red = REP[:Intensity_UpperQuartileIntensity_FilteredRed]
    labels = ["NotKnown" for ii in 1:length(red)]

    for ii in 1:length(red)
        r = red[ii]; g = green[ii]

        if .&(g > min_green_for_green, r < max_red_for_green)
            labels[ii] = "Green"
        elseif .&(r > min_red_for_red, g < max_green_for_red)
            labels[ii] = "Red"
        end

    end

    REP[:Label] = labels

    redcount = count(x -> x == "Red", labels)
    println("Red = $redcount")

    gcount = count(x -> x == "Green", labels)
    println("Green = $gcount")

    REP[:UpperQuartileIntensity_Green] =  REP[:Intensity_UpperQuartileIntensity_FilteredGreen] 
    REP[:UpperQuartileIntensity_Red] =  REP[:Intensity_UpperQuartileIntensity_FilteredRed]

    y = :UpperQuartileIntensity_Red
    x = :UpperQuartileIntensity_Green
    figure(figsize = (10, 5))

    subplot(1, 2, 1)

    sam = "WT"
    df = REP[REP[:Cells].== sam, :]
    scatter(df[x], df[y], s = 1, label = sam, c = "blue")
    
     sam = "Rad21KO"
    df = REP[REP[:Cells].== sam, :]
    scatter(df[x], df[y], s = 1, label = sam, c = "orange")
    
    
    
    DrawRectangles(); pretty_axes2(); ylabel(y); xlabel(x)
    
    subplot(1, 2, 2)

    sam = "Rad21KO+WT"
    df = REP[REP[:Cells].== sam, :]
    scatter(df[x], df[y], s = 1, label = sam, c = "purple")
    
    
    DrawRectangles(); pretty_axes2(); ylabel(y); xlabel(x)
    
    return REP



end


root1 = "/Volumes/lymphdev\$/IreneR/Confocal/SertadSpredTSSchange/Exp1/"

readdir(root1)

21-element Vector{String}:
 ".DS_Store"
 "CP_results"
 "IbidiChamberSlide_SertadSpredTSS.csv"
 "IbidiChamberSlide_SertadSpredTSS1.csv"
 "IbidiChamberSlide_SertadSpredTSS1.xlsx"
 "Segmentation_C2"
 "Segmentation_C3"
 "Segmentation_C4"
 "Segmentation_cells"
 "Segmentation_nucleous"
 "Untitled.cpproj"
 "lif"
 "lif2"
 "tiff2D"
 "tiff3D"
 "type1_sertad2"
 "type1_spred2"
 "type4_sertad2"
 "type4_spred2"
 "type6_sertad2"
 "type6_spred2"

In [2]:
function TSSs.TSS_raw_quant(t2, tss_folder, image_folder, n; xy = 0.189, zx = 0.5)
    
    images_pat = TSSs.get_image_patterns(t2)
    
    p = Progress(length(images_pat), 1)

    dfs = []

    for a in 1:length(images_pat)
        next!(p)
        

        pat = images_pat[a]

        # Get all the TSS in the images
        a = TSSs.find_outline(tss_folder, pat)
        tss = TSSs.cell_tss_dict(a)
        tss = TSSs.meassure_tss(tss, imagesfolder, pat, n, xy = xy, zx = zx)
    
        df = TSSs.analysis_singlechannel(tss; image = pat,  tss_name = :TSS2)
        push!(dfs, df)

    end
    
    d = TSSs.join_in_all_common_columns(dfs...)
    return d
    
end

In [3]:
gene = "Sertad2TSS_exon"

tss_c4 = root1 * "Segmentation_C2/_FQ_outline/sertad2/_TS_detect"
imagesfolder = root1 * "/tiff3D"
images_pat = TSSs.get_image_patterns(tss_c4)


d = TSSs.TSS_raw_quant(tss_c4, tss_c4, imagesfolder, 2; xy = 0.189, zx = 0.5)

if !in("TSS_raw", readdir())
    mkdir("TSS_raw")
end

CSV.write("TSS_raw/"*gene*"_exp1.csv", d)


gene = "Sertad2TSS_intron2"

tss_c4 = root1 * "Segmentation_C3/_FQ_outline/sertad2/_TS_detect"
imagesfolder = root1 * "/tiff3D"
images_pat = TSSs.get_image_patterns(tss_c4)


d = TSSs.TSS_raw_quant(tss_c4, tss_c4, imagesfolder, 3; xy = 0.189, zx = 0.5)

if !in("TSS_raw", readdir())
    mkdir("TSS_raw")
end

CSV.write("TSS_raw/"*gene*"_exp1.csv", d)

gene = "Sertad2TSS_intron1"

tss_c4 = root1 * "Segmentation_C4/_FQ_outline/sertad2/_TS_detect"
imagesfolder = root1 * "/tiff3D"
images_pat = TSSs.get_image_patterns(tss_c4)


d = TSSs.TSS_raw_quant(tss_c4, tss_c4, imagesfolder, 4; xy = 0.189, zx = 0.5)

if !in("TSS_raw", readdir())
    mkdir("TSS_raw")
end

CSV.write("TSS_raw/"*gene*"_exp1.csv", d)


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:13:20[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:12:55[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:13:06[39m


"TSS_raw/Sertad2TSS_intron1_exp1.csv"

In [4]:
gene = "Spred2TSS_exon"

tss_c4 = root1 * "Segmentation_C2/_FQ_outline/spred2/_TS_detect"
imagesfolder = root1 * "/tiff3D"
images_pat = TSSs.get_image_patterns(tss_c4)


d = TSSs.TSS_raw_quant(tss_c4, tss_c4, imagesfolder, 2; xy = 0.189, zx = 0.5)

if !in("TSS_raw", readdir())
    mkdir("TSS_raw")
end

CSV.write("TSS_raw/"*gene*"_exp1.csv", d)


gene = "Spred2TSS_intron2"

tss_c4 = root1 * "Segmentation_C3/_FQ_outline/spred2/_TS_detect"
imagesfolder = root1 * "/tiff3D"
images_pat = TSSs.get_image_patterns(tss_c4)


d = TSSs.TSS_raw_quant(tss_c4, tss_c4, imagesfolder, 3; xy = 0.189, zx = 0.5)

if !in("TSS_raw", readdir())
    mkdir("TSS_raw")
end

CSV.write("TSS_raw/"*gene*"_exp1.csv", d)

gene = "Spred2TSS_intron1"

tss_c4 = root1 * "Segmentation_C4/_FQ_outline/spred2/_TS_detect"
imagesfolder = root1 * "/tiff3D"
images_pat = TSSs.get_image_patterns(tss_c4)


d = TSSs.TSS_raw_quant(tss_c4, tss_c4, imagesfolder, 4; xy = 0.189, zx = 0.5)

if !in("TSS_raw", readdir())
    mkdir("TSS_raw")
end

CSV.write("TSS_raw/"*gene*"_exp1.csv", d)


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:13:56[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:14:03[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:13:16[39m


"TSS_raw/Spred2TSS_intron1_exp1.csv"

In [5]:

for gene in ["Sertad2TSS_exon", "Sertad2TSS_intron1", "Sertad2TSS_intron2", "Spred2TSS_exon", "Spred2TSS_intron1", "Spred2TSS_intron2"]

if !in("TSS_avgdot", readdir())
    mkdir("TSS_avgdot")
end

tss1 = CSV.read("TSS_raw/"*gene*"_exp1.csv", DataFrame)
    
    
typeprobe = ""
    
    if gene == "Sertad2TSS_exon" || gene == "Spred2TSS_intron1"
        typeprobe = "type6"
        elseif gene == "Sertad2TSS_intron1" || gene == "Spred2TSS_intron2"
        typeprobe = "type1"
        elseif gene == "Sertad2TSS_intron2" || gene == "Spred2TSS_exon"
        typeprobe = "type4"
    end
    
    if startswith(gene, "Sertad")   
        typeprobe = typeprobe*"_sertad2/"
    else
        typeprobe = typeprobe*"_spred2/"
    end
        

dot1_r1 = TSSs.int_brightest_pixel( TSSs.read_tiff_as_gray(root1 * typeprobe *"/_mRNA_AVG_ns.tif"); radious = 1)

dot1_r2 = TSSs.int_brightest_pixel( TSSs.read_tiff_as_gray(root1 * typeprobe * "/_mRNA_AVG_ns.tif"); radious = 2)

tss1[!,:TSS1_r1] = tss1[!,:locus1_int2_TSS2] ./ dot1_r1
tss1[!,:TSS1_r2] = tss1[!,:locus1_int2_TSS2] ./ dot1_r2

CSV.write("TSS_avgdot/"*gene*"_exp1.csv", tss1)
    
end


