Skip to content

Commit

Permalink
Verbatim label parser improved to extract partial date.
Browse files Browse the repository at this point in the history
  • Loading branch information
proceps committed Aug 5, 2020
1 parent 88f7351 commit 8aba07e
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 1 deletion.
27 changes: 27 additions & 0 deletions lib/utilities/dates.rb
Expand Up @@ -762,6 +762,13 @@ def self.date_regex_from_verbatim_label(text)
date[:end_date_day] = matchdata1[3]
date[:end_date_month] = matchdata1[1]
date[:end_date_year] = matchdata1[4]
# June - July 1947
elsif matchdata1 = text.match(/\W(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|nov|dec|viii|vii|iv|vi|v|ix|xi|xii|x|iii|ii|i)\.?\s?[-–]\s?(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|nov|dec|viii|vii|iv|vi|v|ix|xi|xii|x|iii|ii|i)\.?[,-–\/]?\s?(\d{4}|['´`ʹʼˊ]?\s?\d{2})\D/)
date[:verbatim_date] = matchdata1[0].strip
date[:start_date_month] = matchdata1[1]
date[:start_date_year] = matchdata1[3]
date[:end_date_month] = matchdata1[2]
date[:end_date_year] = matchdata1[3]
# Jun 29 1947 Jun 29, 1947 June 29, 1947 VI-29-1947 X.25.2000 Jun 29, '47 June 29, '47 VI-4-08
elsif matchdata1 = text.match(/\W(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|nov|dec|viii|vii|iv|vi|v|ix|xii|xi|x|iii|ii|i)\.?\s?[-–_,\/]?\s?(\d\d?)[\.;,]?\s?[-–_\/\.',\s]\s?(\d{4}|['´`ʹʼˊ]?\s?\d{2})\D/)
date[:verbatim_date] = matchdata1[0].strip
Expand All @@ -786,6 +793,11 @@ def self.date_regex_from_verbatim_label(text)
date[:start_date_day] = matchdata1[2]
date[:start_date_month] = matchdata1[1]
date[:start_date_year] = matchdata1[3]
# Jun 1947 June 1947 VI 1947 VI-1947 X.2000
elsif matchdata1 = text.match(/\W(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|nov|dec|viii|vii|iv|vi|v|ix|xii|xi|x|iii|ii|i)[\.,]?\s?[-–,]?\s?(\d{4}|['´`ʹʼˊ]?\s?\d{2})\D/)
date[:verbatim_date] = matchdata1[0].strip
date[:start_date_month] = matchdata1[1]
date[:start_date_year] = matchdata1[2]
end

return {} if date[:verbatim_date].blank?
Expand Down Expand Up @@ -923,6 +935,21 @@ def self.date_regex_from_verbatim_label(text)
else
return {}
end
elsif date[:start_date_day].nil? &&
Date.valid_date?(date[:start_date_year].to_i, date[:start_date_month].to_i, 1) &&
Date.parse(date[:start_date_year].to_s + '-' + date[:start_date_month].to_s + '-1') <= Date.today &&
date[:start_date_year].to_s + '-' + date[:start_date_month].to_s + '-1' > '1700-01-01'
if date[:end_date_day].nil? && date[:end_date_year]
Date.valid_date?(date[:end_date_year].to_i, date[:end_date_month].to_i, 1) &&
Date.parse(date[:end_date_year].to_s + '-' + date[:end_date_month].to_s + '-1') <= Date.today &&
Date.parse(date[:end_date_year].to_s + '-' + date[:end_date_month].to_s + '-1') >= Date.parse(date[:start_date_year].to_s + '-' + date[:start_date_month].to_s + '-1') &&
date[:end_date_year].to_s + '-' + date[:end_date_month].to_s + '-1' > '1700-01-01'
return date
elsif date[:end_date_year].nil?
return date
else
return {}
end
else
return {}
end
Expand Down
9 changes: 8 additions & 1 deletion spec/lib/utilities/dates_spec.rb
Expand Up @@ -257,7 +257,14 @@
'text, 6/29/47, text' => '29/6/1947///',
"text, 6/29/'47, text" => '29/6/1947///',
"text, 7.10.94, text" => '10/7/1894///',
"text, 5-17-97, text" => '17/5/1897///', }
"text, 5-17-97, text" => '17/5/1897///',
'text, Jun - Jul 1947, text' => '/6/1947//7/1947',
'text, June - July, 1947, text' => '/6/1947//7/1947',
'text, VI-X 1947, text' => '/6/1947//10/1947',
'text, Jun 1947, text' => '/6/1947///',
'text, June, 1947, text' => '/6/1947///',
'text, VI 1947, text' => '/6/1947///',
}

@entry = 0

Expand Down

0 comments on commit 8aba07e

Please sign in to comment.