In [None]:
using Revise

using Kate

### Test Environment

In [None]:
Kate.test()

### Set Parameters

In [None]:
# Sample name
sa = "human_rna_bulk_brain/"

# Strandedness: pe (paired end) or se (single end)
st = "pe"

# Fragment length, not needed if paired end
fr=51

# Standard deviation of read length, not needed if paired end
sd=0.05

# Reference transcriptome
paf = "../input/human_reference_transcriptome/Homo_sapiens.GRCh38.cdna.all.fa.gz"

pat = "../input/human_reference_transcriptome/enst_gene_name.tsv"

n_jo = 2

pas = joinpath("../input/", sa)

pao = joinpath("../output/", sa)

### Count transcripts

In [None]:
for fq1 in readdir(pas; join = true)
    
    println(fq1)

    if occursin("R1", fq1)
        
        fq2 = replace(fq1, "R1" => "R2")

        na = split(string(split(fq1, ".fastq")[1]), "/")[4]
        
        paco = joinpath(pao, "kallisto/", na)

        println(paco)
        
        Kate.count_transcript(
            paf,
            paco,
            n_jo,
            fq1,
            fq2,
            ty,
            fr,
            sd,
        )

    end

end

### Make transcript by sample

In [None]:
using CSV

using DataFrames

tpm_ = DataFrame()

for di in readdir(joinpath("../output/", sa, "kallisto/"), join = true)
    
    if !occursin("DS_Store", di)
        
        pa = joinpath(di, "abundance.tsv")

        println(pa)

        ab = DataFrame(CSV.File(pa, delim='\t'))

        append!(tpm_, ab)
    
    end

end

rename!(tpm_, :target_id => :id)

CSV.write(joinpath("../output/", sa, "transcript_x_sample.tsv"), tpm_)

### Make gene by sample

In [None]:
using Statistics

tr_ge = DataFrame(CSV.File(pat, delim="\t"))
    
tr_ge = rename!(tr_ge,  Dict("Transcript stable ID version" => :id, "Gene name" => :gene))

# Map transcript to gene name

co_tr_ge = sort!(rightjoin(tpm_, tr_ge, on = :id), :gene)

co_tr_ge = co_tr_ge[:, [:id, :tpm, :gene]]

# Save the mean tpm for each gene

gr = groupby(co_tr_ge, :gene)

tp_ge_sa = combine(grouped, :tpm => mean)

# Save gene by sample

CSV.write(joinpath("../output/", sa, "gene_x_sample.tsv"), tp_ge_sa)