In [502]:
"""  
    cleanNoteExpress(entry)

For an entry in the format of noteexpress, change it in an intermediate form, 
which is a string contaning each attribute as a pair connected by euqal sign "=". 
Each attribute in NoteExpress is displayed as: {AttributionName}: attrubute. The
transformed clean format is: AttributionName = attrubute
"""
function cleanNoteExpress(entry)
    ne2bibDic = Dict("{URL}: " => "URL = ", "{Author}: " => ",\nauthor = ", "{Title}: " => ",\ntitle = ",  
                  "{Date}: " => ",\ndate = ",  "{Journal}: " => ",\njournaltitle = ", 
                  "{Keywords}: " => ",\nkeywords = ",  "{Abstract}: " => ",\nabstract = ",
                  "{Year}: " => ",\nyear = ", "{DOI}: " => ",\ndoi = ", "{Pages}: " => ",\npages = ",
                  "{Reference Type}: " => ",\ntype = ", "{Issue}: " => ",\nnumber = ")
    for key in keys(ne2bibDic)
        #replace each attribute name in endnote to corresponding ones in bibtex
        entry = replace(entry, key => ne2bibDic[key])  
    end
    entry = entry * ",\nlangid = 中文"
    return entry
end

"""  
    ne2bib1(entry)

For a returned string of function cleanNoteExpress, enclose each attribute by the curly brackets 

"""
function addBracesToAttribute(entry)
    bib0Split = split(entry, ",\n")
    #println(bib0Split)
    for i in eachindex(bib0Split)
        pairi = bib0Split[i]
        key, val = split(pairi, " = ")
        val = string("{", val, "},\n")
        bib0Split[i] = string(key, " = ", val)
    end
    return prod(bib0Split)
end

"""  
    addHead(entry)

For a returned string of function ne2bib2, use regular expression to extract the type of the entry, 
namely, whether it is a journal artical, or a thesis, and add the head of a bibtex entry based on 
this infomation

"""
function addHead(entry)
    entryType = match(r"type = \{(?<typeName>\w*)\}", entry)[:typeName]
    #println(entryType)
    author = match(r"author = \{(?<author>\w{1,3}).*\}", entry)[:author]
    year = match(r"year = \{(?<year>\w*)\}", entry)[:year]
    refKey = string(author, year)
    headDict = Dict("学位论文"=>"@thesis{$(refKey),\n", "期刊"=>"@article{$(refKey),\n")
    #println(refKey)
    headOfEntry = headDict[entryType]
    #println(headOfEntry)
    bibEntry = string(headOfEntry, entry, "}")
    return bibEntry
end

function cnki2NoteExpress2bib(file)
    #file = "cnki2NoteExpress.eln"
    f = open(file)
    lines = readlines(f) # return an array of lines in the file
    #println(lines)
    entrySplit = split(prod(lines), "{URL}") # concatenate all lines into one long string and split
    entrySplit = entrySplit[entrySplit .!= ""] # exclude empty strings in the array
    entryAll = ["{URL}"*i for i in entrySplit] # put all entries in one long string
    #println(entryAll)

    x = [cleanNoteExpress(entry) for entry in entryAll]
    #println(x)
    y = [addBracesToAttribute(i) for i in x]
    z = [addHead(i) for i in y]

    open(string(file, ".bib"), "w") do f
        for i in z
            write(f, i * "\n\n\n")
        end
    end
 end

cnki2NoteExpress2bib (generic function with 1 method)

In [506]:
cnki2NoteExpress2bib("termStructureNoteExpress.eln");

In [501]:
function xmlEntry2Dict(xmlentry)
    #attrs = [match(r"\<(?<attr>\w+)\>(?<val>[\w;]*)</\w+>", string(i)) for i in a];
    attrs = [match(r"\<(?<attr>\w+)\>(?<val>.*)</\w+>", string(i)) for i in xmlentry]
    attrDict = Dict([attr[:attr]=>attr[:val] for attr in attrs if attr!=nothing]);
    bibAttr0 = ["issue", "author", "pageRange", "pubDate", "Issn", "source(journalName)", 
    "Summary(abstract)", "year", "SrcDataBase", "uri", "ISBN", "title", "Roll(volumn)", "KeyWord"]
    bibAttr = ["title", "author", "year", "source", "Roll", "issue", "PageRange", "Issn", "Page", 
            "summary", "SrcDataBase", "uri", "keyWord"]
    bibAttrDict = Dict("title"=>"title", "author"=>"author", "year"=>"year", 
               "source"=>"journaltitle", "Roll"=>"volume", "issue"=>"number",
               "PageRange"=>"pages", "Page"=>"pages", "Issn"=>"issn", "summary"=>"abstract", 
               "uri"=>"url", "keyWord"=>"keywords", "SrcDataBase"=>"SrcDataBase")
    d = Dict{String, String}()
    for i in bibAttr
        if haskey(attrDict, i)
            push!(d, bibAttrDict[i]=>attrDict[i])#
        end
    end
    return d
end

function entrydict2bib(entrydict)
    entryType = entrydict["SrcDataBase"]
    author = entrydict["author"]
    year = entrydict["year"]
    refKey = string(author, year)
    entryhead = ""
    if entryType == "学位论文"
        entryhead = string(entryhead, "@thesis{$(refKey),\n")
    else
        entryhead = string(entryhead, "@article{$(refKey),\n")
    end
    entrybody = ""
    delete!(entrydict, "SrcDataBase")#this attribute is not needed
    for key in keys(entrydict)
        entrybody = string(entrybody, key, " = {", entrydict[key], "},\n")
    end
    bibEntry = string(entryhead, entrybody, "}") 
    return bibEntry
end

function cnkixml2bib0(file)
    #using LightXML
    #file = "termStructureNoteExpressInCnki.eln"
    xdoc = parse_file(file)
    entryArrayXmlNodes = collect(child_elements(root(xdoc)))

    entryArrayDicts = [cleanEntry(collect(child_elements(ces[i]))) for i in 1:length(entryArrayXmlNodes)]
    entryArraybib = [entrydict2bib(entrydict) for entrydict in entryArrayDicts]

    open(string(file, ".bib"), "w") do f
        for i in entryArraybib
            write(f, i * "\n\n\n")
        end
    end
 end

function cnkixml2bib(file)
    f = open(file)
    lines = readlines(f)
    xmlText = prod(lines)
    entriesMatchXML = collect(eachmatch(r"(?<entry><DATA>.*?</DATA>)", xmlText))
    entriesXMLArray = [entryMatch[:entry] for entryMatch in entriesMatchXML]


    entryArrayDicts = [cleanEntry(collect(child_elements(ces[i]))) for i in 1:length(entriesXMLArray)]
    entryArraybib = [entrydict2bib(entrydict) for entrydict in entryArrayDicts]

    open(string(file, ".bib"), "w") do f
        for i in entryArraybib
            write(f, i * "\n\n\n")
        end
    end
 end

cnkixml2bib (generic function with 1 method)

In [504]:
cnkixml2bib("termStructureNoteExpressInCnki.eln")