In [None]:
using CSV
using Tables
using DataFrames
using Plots

In [None]:
# import patient info
pat_OS_numMut = DataFrame(CSV.File("pat_OS_numMut.csv"))

In [None]:
# filter patients with no mutations
pat_OS_numMut = pat_OS_numMut[(pat_OS_numMut.numMut .!= 0), :];

In [None]:
filesToPlot = String.(pat_OS_numMut[!, 2])
moreFilesToPlot="SSDoutputs/allMutSims/indivParams"
moreFilesToPlot=filter(!isdir,readdir(moreFilesToPlot))
append!(filesToPlot,moreFilesToPlot)
push!(filesToPlot,"WT")

filesToPlot=unique(filesToPlot)
filesToPlot

In [None]:
pathToPlot="SSDoutputs/allMutSims/"

In [None]:
maxTimeTC=float(60*120)
maxGens=8;

In [None]:
allData=[]
fileIndex=1
for file in filesToPlot
    if startswith(file,"DLBCL")
        treeArrayFile=pathToPlot*file*"/all_mut/treeStructure.csv"
        graphingArray=CSV.File(treeArrayFile,types=Float64, silencewarnings=true) |> Tables.matrix

    elseif file=="WT"
        treeArrayFile=pathToPlot*file*"/treeStructure.csv"
        graphingArray=CSV.File(treeArrayFile,types=Float64, silencewarnings=true) |> Tables.matrix

    else
        try
            treeArrayFile=pathToPlot*file*"/mut/treeStructure.csv"
            graphingArray=CSV.File(treeArrayFile,types=Float64, silencewarnings=true) |> Tables.matrix

        catch
            treeArrayFile=pathToPlot*"indivParams/"*file*"/mut/treeStructure.csv"
            graphingArray=CSV.File(treeArrayFile,types=Float64, silencewarnings=true) |> Tables.matrix
        end
    end


    cellCountArray=zeros(maxGens,Int64(maxTimeTC))

    for i in 1:size(graphingArray,1)
        thisCellMaxGen=Int64(graphingArray[i,end])
        cellsInThisCellMaxGen=2^(thisCellMaxGen)-1
        lastGenEnd=1
        for thisGen in 1:thisCellMaxGen
            cellsInThisGen=2^(thisGen-1)
            thisGenFateTime=graphingArray[i,thisGen]
            cellCountArray[thisGen,Int64(round(lastGenEnd)):Int64(round(thisGenFateTime))-1]=cellCountArray[thisGen,Int64(round(lastGenEnd)):Int64(round(thisGenFateTime))-1].+cellsInThisGen
            lastGenEnd=graphingArray[i,thisGen]

        end 
    end
    if isempty(allData)
        allData=zeros(length(filesToPlot),size(cellCountArray,2))
    end
    allData[fileIndex,:]=sum(cellCountArray,dims=1)
    fileIndex=fileIndex+1
end


In [None]:
df=DataFrame(allData',Symbol.(filesToPlot))
CSV.write("outputFile.csv",df)
df

# Now let's plot patient data

In [None]:
#### Load patient data
patients=unique(filter(n->startswith(n,"DLBCL"),names(df)))

In [None]:
cA=palette(:seaborn_colorblind,20)
#### Mut/Param data
muts2Params = DataFrame(CSV.File("muts2ParamsNEW.csv"))
# concatenate gene and alteration
muts2Params[!, "GeneAlt"] = string.(muts2Params[!, "GENE"], ":", muts2Params[!, "ALTERATION"])
## remove zeros
muts2Params = muts2Params[(muts2Params.PARAMETER .!= "0"), :];
#### Load combined output data

In [None]:
CombOp = DataFrame(CSV.File("combinedOutput2_noDupes.csv"))
# remove missing data
CombOp = CombOp[completecases(CombOp), :]
# concatenate gene and alteration
CombOp[!, "GeneAlt"] = string.(CombOp[!, "GENE"], ":", CombOp[!, "ALTERATION"]);

In [None]:
function GenConds(muts)
    Conds = DataFrame(Param = Any[], Val = Float64[])
        for i in 1:length(muts)        
            # check genes are in dictionary
            if haskey(param_mut, muts[i]) == true
                mut=muts[i]
                val=param_vals[mut]
                param=param_mut[mut]
                ParamVal = DataFrame(Param=param, Val=val)
                append!(Conds, ParamVal)
            else 
                continue
            end
        end
    return(Conds)
end

function GenConds2(muts_count)
    p2C=[]
    mA=[]
    for i in 1:nrow(muts_count)
        param=muts_count[i,1]
        push!(p2C, param)
        val=muts_count[i,2]
        NUM=muts_count[i,3]
        PV=val^NUM
        push!(mA, PV)
    end
    return(p2C, mA)
end        

# create mut dictionary
param_mut=Dict() 
# populate mut dictionary
for i in 1:nrow(muts2Params)
    GeneAlt = muts2Params[i,:GeneAlt]
    Param = muts2Params[i,:PARAMETER]
    param_mut[GeneAlt]=Param
end    

# create vals dictionary
param_vals=Dict()
# populate vals dictionary
for i in 1:nrow(muts2Params)
    GeneAlt = muts2Params[i,:GeneAlt]
    Val = muts2Params[i,:AMOUNT]
    param_vals[GeneAlt]=Val
end   

# create gene:alt dictionary
gene_alt=Dict() 
# populate mut dictionary
for i in 1:nrow(muts2Params)
    GeneAlt = muts2Params[i,:GeneAlt]
    Param = muts2Params[i,:PARAMETER]
    gene_alt[Param]=GeneAlt
end    

In [None]:
# get data for individual patient
function getPatConds(patient)
    patient_df = subset(CombOp, :PATIENT => ByRow(==(patient)))
    
    # we only need the dataframe to contain those mutations we think do something so filter out the rest.
    filter!(:GeneAlt => n -> haskey(param_mut, n), patient_df)
    
    # get mutations
    muts = Vector(patient_df[!, :GeneAlt])
    conds = GenConds(muts)
    muts_count = combine(groupby(conds, [:Param, :Val]), nrow => :count)
        
    ######################
    ### Set conditions ###
    ######################
    
    # set patient name
    try
        patient = patient_df[1,1]
    catch
        println("patient: "*patient*" has no mutations.")
        return (["WT",patient],["WT","all_muts"])
    end
    
    conditions = ["all_mut"]
    
    # get conditions
    conds_run = GenConds2(muts_count)
    
    condsToPlot = Any["WT"]
    params = conds_run[1]
    values = conds_run[2]
    
    for i in 1:length(conds_run[1])
        folderName = string.(params[i], "_", values[i])
        push!(condsToPlot, folderName)
    end
    push!(condsToPlot, patient)
    
    conds_muts = deepcopy(conds)
    conds_muts[!, "muts"] = muts
    
    GeneAlt = Any["WT"]
    for i in 1:length(params)
        param_df = subset(conds_muts, :Param => ByRow(==(params[i])))
        label=""
        thisIndex=1
        for row in eachrow(param_df)
            if label==""
                label=string.(param_df.muts[1])
            else
                label=label*"+"*string.(param_df.muts[thisIndex])
            end
            thisIndex+=1
        end
        push!(GeneAlt, label)
    end
    push!(GeneAlt, "all_muts")    
    
   return(condsToPlot, GeneAlt)
end

In [None]:
function plotCellNumbers(thisPatient)
    gr()
    # get conditions and names for conditions
    (conditionsForPat,namesForPatConds)=getPatConds(thisPatient)
    p1=plot()
    conditionIndex=1

    cA=palette(:seaborn_colorblind,length(conditionsForPat))

    for condition in conditionsForPat
        #if this is the all muts condition, use the black line we saved
        if conditionIndex==length(conditionsForPat)
            thisLineColor=:black
        else
            #otherwise plot in whatever colour is next in the color array with green removed
            thisLineColor=cA[conditionIndex]
        end
        if conditionIndex==length(conditionsForPat)
            linewidth=6
        elseif conditionIndex==1
            linewidth=6
        else
            linewidth=3
        end
        # just plot the cell numebrs in this condition
        plot!(df[!,Symbol(condition)],label=namesForPatConds[conditionIndex],linecolor=thisLineColor, linealpha=0.75,linewidth=linewidth)
        
        conditionIndex+=1
    end

    #formating 
    plot!(xticks = (0:60*24:maxTimeTC, string.(0:24:Int64(maxTimeTC))))
    plot!(ylabel="cell number")
    plot!(xlabel="time (h)")
    plot!(legend=:right,dpi=300,margin=10Plots.mm)
    
    
#    display(p1)
    return p1
end

In [None]:
function plotSynergyOldWay(thisPatient)
    gr()
    (conditionsForPat,namesForPatConds)=getPatConds(thisPatient)
    p1=plot()
    
    cA=palette(:seaborn_colorblind,length(conditionsForPat))


    # the first condition in the WT so lets store this
    WTCellNumbers=df[!,Symbol(conditionsForPat[1])]
    
    # here we'll store how many cells are added by each mutations starting with zero
    currentTotal=zeros(size(WTCellNumbers))
    
    conditionIndex=1
    # we need to loop through all conditions except the first (WT) and last (all muts)
    for condition in conditionsForPat[2:end-1]
        # subtract the WT simulation from this condition to figure out how many cells are added (or subtracted) by this mutation
        cellsAddedByMutation=df[!,Symbol(condition)].-WTCellNumbers
        
        # add this to the current total to get the effect of adding all mutations together
        currentTotal=currentTotal.+cellsAddedByMutation        
        conditionIndex+=1
    end
    # we need to plot the additive effect, which is the cells added by all mutations together on top of the WT
    additiveEffect=currentTotal
    plot!(additiveEffect,label="additive",linecolor=:red, linealpha=0.75,linewidth=6)

    #effect of all mutations over the WT
    allMutsEffect=df[!,thisPatient].-WTCellNumbers
    
    plot!(allMutsEffect,label="all_muts",linecolor=:black, linealpha=0.75,linewidth=6)
    thisLineColor=:black
    
    # the synergy region we want to shade is between the additive effect and all must sim    
    lower=minimum(hcat(allMutsEffect,additiveEffect),dims=2)
    upper=maximum(hcat(allMutsEffect,additiveEffect),dims=2)
    mutationEffect=df[!,thisPatient].-additiveEffect
    # plot that region with a red/blue ribbon
    plot!([allMutsEffect allMutsEffect],fillrange=[lower upper],fillalpha=0.35,c = [:red :blue],label = ["> additive (synergistic)" "<= additive"],linecolor=thisLineColor, linealpha=0.75,linewidth=3)
    plot!(xticks = (0:60*24:maxTimeTC, string.(0:24:Int64(maxTimeTC))))
    plot!(ylabel="cell number")
    plot!(xlabel="time (h)")
    plot!(legend=:right,dpi=300,margin=10Plots.mm)
#    display(p1)

    return p1
end

In [None]:
function plotSynergy(thisPatient)
    gr()
    (conditionsForPat,namesForPatConds)=getPatConds(thisPatient)
    p1=plot()
    
    cA=palette(:seaborn_colorblind,length(conditionsForPat))


    # the first condition in the WT so lets store this
    WTCellNumbers=df[!,Symbol(conditionsForPat[1])]
    
    # here we'll store how many cells are added by each mutations starting with zero
    currentTotal=zeros(size(WTCellNumbers))
    
    conditionIndex=1
    # we need to loop through all conditions except the first (WT) and last (all muts)
    for condition in conditionsForPat[2:end-1]
        # subtract the WT simulation from this condition to figure out how many cells are added (or subtracted) by this mutation
        cellsAddedByMutation=df[!,Symbol(condition)].-WTCellNumbers
        
        # add this to the current total to get the effect of adding all mutations together
        currentTotal=currentTotal.+cellsAddedByMutation        
        conditionIndex+=1
    end
    # we need to plot the additive effect, which is the cells added by all mutations together on top of the WT
    additiveEffect=currentTotal.+WTCellNumbers
    plot!(additiveEffect,label="additive",linecolor=:red, linealpha=0.75,linewidth=6)

    # this is the all muts sim
    plot!(df[!,thisPatient],label="all_muts",linecolor=:black, linealpha=0.75,linewidth=6)
    thisLineColor=:black
    
    # the synergy region we want to shade is between the additive effect and all must sim    
    lower=minimum(hcat(df[!,thisPatient],additiveEffect),dims=2)
    upper=maximum(hcat(df[!,thisPatient],additiveEffect),dims=2)
    
    # plot that region with a red/blue ribbon
    plot!([df[!,thisPatient] df[!,thisPatient]],fillrange=[lower upper],fillalpha=0.35,c = [:red :blue],label = ["> additive (synergistic)" "<= additive"],linecolor=thisLineColor, linealpha=0.75,linewidth=3)
    plot!(xticks = (0:60*24:maxTimeTC, string.(0:24:Int64(maxTimeTC))))
    plot!(ylabel="cell number")
    plot!(xlabel="time (h)")
    plot!(legend=:right,dpi=300,margin=10Plots.mm)
#    display(p1)

    return p1
end

In [None]:
function calculateMetrics(thisPatient)
    gr()
    (conditionsForPat,namesForPatConds)=getPatConds(thisPatient)
    
    # the first condition in the WT so lets store this
    WTCellNumbers=df[!,Symbol(conditionsForPat[1])]
    
    # here we'll store how many cells are added by each mutations starting with zero
    currentTotal=zeros(size(WTCellNumbers))
    
    conditionIndex=1
    # we need to loop through all conditions except the first (WT) and last (all muts)
    for condition in conditionsForPat[2:end-1]
        # subtract the WT simulation from this condition to figure out how many cells are added (or subtracted) by this mutation
        cellsAddedByMutation=df[!,Symbol(condition)].-WTCellNumbers
        
        # add this to the current total to get the effect of adding all mutations together
        currentTotal=currentTotal.+cellsAddedByMutation        
        conditionIndex+=1
    end
    # we need to plot the additive effect, which is the cells added by all mutations together on top of the WT
    additiveEffect=currentTotal.+WTCellNumbers


    # calculate and return a bunch of metrics
    peakCN=maximum(df[!,thisPatient]) # peak of allMuts simulation
    AUCCN=sum(df[!,thisPatient]) # AUC of allMuts simulation
    peakEIA=maximum(additiveEffect) # peak of allMuts simulation minus WT
    AUCEIA=sum(additiveEffect) # sum allMuts simulation minus WT
    peakSyn=maximum(df[!,thisPatient].-additiveEffect)
    AUCSyn=sum(df[!,thisPatient].-additiveEffect)
    onlyPosSyn=df[!,thisPatient].-additiveEffect
    onlyPosSyn[onlyPosSyn.<0].=0
    normalisedSynergy=AUCSyn/AUCEIA
    sumOnlyPosSyn=sum(onlyPosSyn)
    return (peakCN,AUCCN,peakEIA,AUCEIA,peakSyn,AUCSyn,sumOnlyPosSyn,normalisedSynergy)
end

In [None]:
function plotAllPlots(patient)
    p1=plotCellNumbers(patient)
    p2=plotSynergyOldWay(patient)
    p3=plotSynergy(patient)
    display(plot(p1,p2,p3,layout = (1,3),size=(3000,600)))
    savefig("plots/"*patient*".png")
end

In [None]:
#put plots in a folder
mkpath("plots")

#patientsToRun
patientsToRun=patients

#fill this array with all the metrics for all patients
allCalculatedMetrics=zeros(length(patientsToRun),8)
patientIndex=1

#loop through every patient
for patient in patientsToRun
    println(patient)
    
    #calculate all metrics for the patient and print them
    (peakCN,AUCCN,peakEIA,AUCEIA,peakSyn,AUCSyn,sumOnlyPosSyn,normalisedSyn)=calculateMetrics(patient)
    
    println("PeakCN: "*string(peakCN))
    println("AUCCN: "*string(AUCCN))
    println("peakEIA: "*string(peakEIA))
    println("AUCEIA: "*string(AUCEIA))
    println("peakSyn: "*string(peakSyn))
    println("AUCSyn: "*string(AUCSyn))
    println("Normalised Synergy: "*string(normalisedSyn))

    println("AUCOnlyPosSyn (AUC of red area only): "*string(sumOnlyPosSyn))    

    #save all the calulcated metrics to an array to write out afterwards
    allCalculatedMetrics[patientIndex,:]=[peakCN,AUCCN,peakEIA,AUCEIA,peakSyn,AUCSyn,sumOnlyPosSyn,normalisedSyn]

    #print the plots for the patient to check the metrics make sense.
    plotAllPlots(patient)
    patientIndex+=1
end
#names of the metrics for the data frame
metricNames=["peakCN","AUCCN","peakEIA","AUCEIA","peakSyn","AUCSyn","sumOnlyPosSyn","normalisedSyn"]
df=DataFrame(allCalculatedMetrics,Symbol.(metricNames))
df[!,:patientName] = patientsToRun
df=df[!, [end, 1,2,3,4,5,6,7,8]]
CSV.write("outputFileMetrics.csv",df)

In [None]:
OS_data = pat_OS_numMut[!, 1:3]

In [None]:
synData = DataFrame(CSV.File("outputFileMetrics.csv"))

In [None]:
rename!(synData, :patientName => :patient_ID)

In [None]:
patAllDat = innerjoin(OS_data, synData, on=:patient_ID)

In [None]:
CSV.write("pat_OS_Syn_res.csv", patAllDat)