Skip to content

Commit

Permalink
updating function comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mcmcgrath13 committed May 14, 2018
1 parent e949b7f commit dcee09d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 12 deletions.
2 changes: 1 addition & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ Depth = 6 -->
| Function | Description |
| :------- | :---------- |
|[BioServices.jl](https://github.com/BioJulia/BioServices.jl)| Interface to EUtils and UMLS APIs|
|[PubMedMiner.jl](https://github.com/bcbi/PubMedMiner.jl) | Examples of comorbidity studies using PubMed artciles|
|[PubMedMiner.jl](https://github.com/bcbi/PubMedMiner.jl) | Examples of comorbidity studies using PubMed articles|
31 changes: 22 additions & 9 deletions src/Processes/medline_load.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ using DataFrames
load_medline(db_con, output_dir; start_file = 1, end_file = 928, year=2018, test=false)
Given a MySQL connection and optionally the start and end files, fetches the medline files, parses the xml, and loads into a MySQL DB (assumes tables already exist). The raw (xml.gz) and parsed (csv) files will be stored in the output_dir.
###Arguments
* db_con: A MySQL Connection to a db (tables must already be created - see PubMed.create_tables!)
* output_dir : root directory where the raw and parsed files should be stored
* start_file : which medline file should the loading start at
* end_file: which medline file should the loading end at (default is last file in 2018 baseline)
* year: which year medline is (current is 2018)
* test: if true, a sample file will be downloaded, parsed, and loaded instead of the baseline files
"""
function load_medline(db_con::MySQL.Connection, output_dir::String; start_file::Int = 1, end_file::Int = 928, year::Int=2018, test::Bool = false)

Expand All @@ -17,6 +26,10 @@ function load_medline(db_con::MySQL.Connection, output_dir::String; start_file::
set_innodb_checks!(db_con,0,0,0)
drop_mysql_keys!(db_con)

if test
start_file = 1
end_file = 1
end

info("Getting files from Medline")
@sync for n = start_file:end_file
Expand Down Expand Up @@ -47,9 +60,9 @@ function load_medline(db_con::MySQL.Connection, output_dir::String; start_file::
end

"""
init(mysql_host::String, mysql_user::String, mysql_pwd::String, mysql_db::String, overwrite::bool)
init_medline(output_dir, test=false)
Sets up environment (folders), and connects to MySQL DB and FTP Server returns these connections.
Sets up environment (folders), and connects to medline FTP Server and returns the connection.
"""
function init_medline(output_dir::String, test::Bool=false)
## SET UP ENVIRONMENT
Expand Down Expand Up @@ -79,8 +92,8 @@ end


"""
get_file_name(fnum::Int, year::Int = 2018)
Returns the medline file name given the file number.
get_file_name(fnum::Int, year::Int = 2018, test = false)
Returns the medline file name given the file number and year.
"""
function get_file_name(fnum::Int, year::Int, test::Bool=false)
nstr = lpad(fnum,4,0) # pad iterator with leading zeros so total length is 4
Expand All @@ -92,9 +105,9 @@ function get_file_name(fnum::Int, year::Int, test::Bool=false)
end

"""
get_ml_file(fname::String, conn::ConnContext)
get_ml_file(fname::String, conn::ConnContext, output_dir)
Retrieves the file with fname /files. Returns the HTTP response.
Retrieves the file with fname and puts in medline/raw_files. Returns the HTTP response.
"""
function get_ml_file(fname::String, conn::ConnContext, output_dir::String)
println("Getting file: ", fname)
Expand All @@ -113,7 +126,7 @@ end


"""
get_ftp_con()
get_ftp_con(test = false)
Get an FTP connection
"""
function get_ftp_con(test::Bool = false)
Expand All @@ -127,9 +140,9 @@ function get_ftp_con(test::Bool = false)
end

"""
parse_ml_file(fname::String)
parse_ml_file(fname::String, output_dir::String)
Parses the medline xml file into a dictionary of dataframes
Parses the medline xml file into a dictionary of dataframes. Saves the resulting CSV files to medline/parsed_files.
"""
function parse_ml_file(fname::String, output_dir::String)
println("Parsing file: ", fname)
Expand Down
4 changes: 2 additions & 2 deletions src/PubMed/pubmed_to_csv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ end

# Note: If needed it could be further refactored to to that author, journal is a type
"""
PubMedArticle
Type that matches the NCBI-XML contents for a PubMedArticle
parse(xml::EzXML.Node)
Parses a PubMedArticleSet that matches the NCBI-XML format
"""
#Constructor from EzXML article element
function parse(xml::EzXML.Node)
Expand Down

0 comments on commit dcee09d

Please sign in to comment.