Skip to content

Commit

Permalink
adding csv column to db column check
Browse files Browse the repository at this point in the history
  • Loading branch information
mcmcgrath13 committed May 14, 2018
1 parent dcee09d commit 288886a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
15 changes: 13 additions & 2 deletions src/DBUtils/DBUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,24 @@ end

"""
col_match(con, tablename, data_values)
Checks if each column in the dataframe has a matching column in the table
Checks if each column in the dataframe has a matching column in the table.
"""
function col_match(con, tablename::String, data_values::DataFrame)
cols = String.(data_values.colindex.names)
col_match(con, tablename, cols)
end

"""
col_match(con, tablename, col_names)
Checks if each column in the csv/data frame has a matching column in the table.
"""

function col_match(con, tablename::String, col_names::Vector{String})
all_match = true

table_cols = select_columns(con, tablename)
for col in data_values.colindex.names

for col in col_names
this_match = false
for tc in table_cols
if tc == string(col)
Expand Down
17 changes: 9 additions & 8 deletions src/PubMed/pubmed_sql_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ function db_insert!(db::MySQL.Connection, articles::Dict{String,DataFrame}, csv_

# check if column names all exist in mysql table
if !col_match(db, table, df)
error("each DataFrame column must match the name of a table column")
error("Each DataFrame column must match the name of a table column. $table had mismatches.")
end

path = joinpath(csv_path, "$(csv_prefix)$(table).csv")
Expand Down Expand Up @@ -458,14 +458,15 @@ function db_insert!(db::MySQL.Connection, csv_path::String = pwd(), csv_prefix::
path = joinpath(csv_path, "$(csv_prefix)$(table).csv")
drop_csv && push!(paths,path)

headers = CSV.read(path, rows = 1, datarow=1)
headers = CSV.read(path, rows = 2)
# return headers

cols_string = ""
for i = 1:length(headers)
cols_string *= headers[1,i]*","
cols = String.(headers.colindex.names)
if !col_match(db, table, cols)
error("Each CSV column must match the name of a table column. $table had mismatches.")
end
cols_string = cols_string[1:end-1]

cols_string = join(cols, ",")

# Save article data (MySQL.stream from df)
ins_sql = """LOAD DATA LOCAL INFILE '$path' INTO TABLE $table CHARACTER SET latin1 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' IGNORE 1 LINES ($cols_string)"""
Expand All @@ -492,7 +493,7 @@ function db_insert!(db::MySQL.Connection, pmid::Int64, articles::Dict{String,Dat
if ismatch(r"$mesh*", table)
# check if column names all exist in mysql table
if !col_match(db, table, df)
error("each DataFrame column must match the name of a table column")
error("Each DataFrame column must match the name of a table column. $table had mismatches.")
end

path = joinpath(csv_path, "$(csv_prefix)$(table).csv")
Expand Down Expand Up @@ -522,7 +523,7 @@ function db_insert!(db::SQLite.DB, articles::Dict{String,DataFrame}, csv_path::S

# check if column names all exist in mysql table
if !col_match(db, table, df)
error("each DataFrame column must match the name of a table column")
error("Each DataFrame column must match the name of a table column. $table had mismatches.")
end

for i = 1:size(df)[1]
Expand Down

0 comments on commit 288886a

Please sign in to comment.