In [None]:
using HTTP
using JSON3
using CSV
using DataFrames
using ProgressMeter

In [None]:
### load single nucleotide variants
snvs = DataFrame(CSV.File("snvs.csv"))

In [None]:
## each snv should be in the form: 
# single letter code for the original amino acid - location of the snv - single letter code for the new amino acid
# eg. L123R
# OncoKB might tolerate other formats but it's best to stick to this format if possible

In [None]:
#create an empty Dataframe with the headings SNV and Annotation
outputDF = DataFrame([[],[],[]], ["SNV", "Annotation","Effect"])
#for every row in the input

@showprogress dt=1 desc="Getting Annotations..." for row in eachrow(snvs)
    gene,alteration=split(row."SNV Name",":")
    println("gene, alt = ",gene,alteration)
    # if the snv starts with p. run the following line of code, otherwise comment it out
    alteration=alteration[3:end]
    # Build the JSON query including the appropriate headers and API key
    res = HTTP.get("https://www.oncokb.org/api/v1/annotate/mutations/byProteinChange", 
    ["Accept" => "application/json", "Authorization" => "Bearer *********YOURKEYHERE*********"],
    query=["hugoSymbol" => gene, "alteration" => alteration])

    # read in the body of the results
    result = JSON3.read(res.body)
    # create an output string of the format GENENAME:ALTERATION (without the p. bit)
    outputString=gene*":"*alteration
    # push to an output DF
    push!(outputDF,[outputString,result.oncogenic,result.mutationEffect.knownEffect])
end

In [None]:
# write the output DF
CSV.write("annotatedSNVs.csv", outputDF)

### potentially useful code to modify snv names

In [None]:
# create dict for aa single letter code
TL_code = ["Ala","Arg","Asn","Asp","Cys","Glu","Gln","Gly","His","Ile","Leu","Lys","Met","Phe","Pro","Ser",
            "Thr","Trp","Tyr","Val"]
SL_code = ["A","R","N","D","C","E","Q","G","H","I","L","K","M","F","P","S","T","W","Y","V"]
aa_single = Dict()
for i in 1:20
    aa_single[TL_code[i]] = SL_code[i]
end


# convert first aa to single letter code
SV_SLcode = []
for i in 1:length(SVs)
    SV = SVs[i]
    TL = SV[1:3]
    if TL in TL_code
        SL = aa_single[TL]
        push!(SV_SLcode, SL)
    else 
        push!(SV_SLcode, TL)
    end
end
SV_SLcode

aa2_list = []
pos_list = []
for i in 1:length(SVs)
    SV = SVs[i]
    println("processing "*SV)
    if length(SV) < 10
        if length(SV) > 3
            l = length(SV)
            aa2 = SV[(l-2):l]
                if aa2 in TL_code
                    SL = aa_single[aa2]
                    push!(aa2_list, SL)
                else 
                    push!(aa2_list, aa2)
                end
            pos = SV[1:(l-3)]
            push!(pos_list, pos)
        else
            push!(aa2_list, SV)
            push!(pos_list, SV)
        end
    elseif "Ter" in SV
        push!(aa2_list, SV)
        push!(pos_list, SV)
    end
end