From 288886ad60abfabb39d6e95128134dbf9fa26236 Mon Sep 17 00:00:00 2001 From: mcmcgrath13 Date: Mon, 14 May 2018 13:51:22 -0400 Subject: [PATCH] adding csv column to db column check --- src/DBUtils/DBUtils.jl | 15 +++++++++++++-- src/PubMed/pubmed_sql_utils.jl | 17 +++++++++-------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/DBUtils/DBUtils.jl b/src/DBUtils/DBUtils.jl index abc55ae..389b122 100644 --- a/src/DBUtils/DBUtils.jl +++ b/src/DBUtils/DBUtils.jl @@ -152,13 +152,24 @@ end """ col_match(con, tablename, data_values) -Checks if each column in the dataframe has a matching column in the table +Checks if each column in the dataframe has a matching column in the table. """ function col_match(con, tablename::String, data_values::DataFrame) + cols = String.(data_values.colindex.names) + col_match(con, tablename, cols) +end + +""" + col_match(con, tablename, col_names) +Checks if each column in the csv/data frame has a matching column in the table. +""" + +function col_match(con, tablename::String, col_names::Vector{String}) all_match = true table_cols = select_columns(con, tablename) - for col in data_values.colindex.names + + for col in col_names this_match = false for tc in table_cols if tc == string(col) diff --git a/src/PubMed/pubmed_sql_utils.jl b/src/PubMed/pubmed_sql_utils.jl index c22e998..d44d155 100644 --- a/src/PubMed/pubmed_sql_utils.jl +++ b/src/PubMed/pubmed_sql_utils.jl @@ -423,7 +423,7 @@ function db_insert!(db::MySQL.Connection, articles::Dict{String,DataFrame}, csv_ # check if column names all exist in mysql table if !col_match(db, table, df) - error("each DataFrame column must match the name of a table column") + error("Each DataFrame column must match the name of a table column. $table had mismatches.") end path = joinpath(csv_path, "$(csv_prefix)$(table).csv") @@ -458,14 +458,15 @@ function db_insert!(db::MySQL.Connection, csv_path::String = pwd(), csv_prefix:: path = joinpath(csv_path, "$(csv_prefix)$(table).csv") drop_csv && push!(paths,path) - headers = CSV.read(path, rows = 1, datarow=1) + headers = CSV.read(path, rows = 2) # return headers - cols_string = "" - for i = 1:length(headers) - cols_string *= headers[1,i]*"," + cols = String.(headers.colindex.names) + if !col_match(db, table, cols) + error("Each CSV column must match the name of a table column. $table had mismatches.") end - cols_string = cols_string[1:end-1] + + cols_string = join(cols, ",") # Save article data (MySQL.stream from df) ins_sql = """LOAD DATA LOCAL INFILE '$path' INTO TABLE $table CHARACTER SET latin1 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' IGNORE 1 LINES ($cols_string)""" @@ -492,7 +493,7 @@ function db_insert!(db::MySQL.Connection, pmid::Int64, articles::Dict{String,Dat if ismatch(r"$mesh*", table) # check if column names all exist in mysql table if !col_match(db, table, df) - error("each DataFrame column must match the name of a table column") + error("Each DataFrame column must match the name of a table column. $table had mismatches.") end path = joinpath(csv_path, "$(csv_prefix)$(table).csv") @@ -522,7 +523,7 @@ function db_insert!(db::SQLite.DB, articles::Dict{String,DataFrame}, csv_path::S # check if column names all exist in mysql table if !col_match(db, table, df) - error("each DataFrame column must match the name of a table column") + error("Each DataFrame column must match the name of a table column. $table had mismatches.") end for i = 1:size(df)[1]