Skip to content

Commit

Permalink
Merge pull request #327 from FamilySearch/extract-labels
Browse files Browse the repository at this point in the history
Extract labels into YAML
  • Loading branch information
tychonievich committed Aug 3, 2023
2 parents aec2392 + b0e3471 commit 0bcefcd
Show file tree
Hide file tree
Showing 223 changed files with 460 additions and 3 deletions.
19 changes: 16 additions & 3 deletions build/uri-def.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def find_cat_tables(txt, g7, tagsets):
sect = txt[i:j].replace('(Latter-Day Saint Ordinance)','`ord`') ## <- hack for ord-STAT
for entry in re.finditer(r'`([A-Z0-9_]+)` *\| *(.*?) *[|\n]', sect):
enum, meaning = entry.groups()
label = None
h1 = sect.find('\n|',sect.rfind('\n#',0,entry.start()))+1
h2 = sect.find('\n', h1)
header = [_.strip() for _ in sect[h1:h2].strip('| ').split('|')]
Expand Down Expand Up @@ -92,6 +93,7 @@ def find_cat_tables(txt, g7, tagsets):
enums.setdefault(key,[]).append(pfx)
elif 'month-' in pfx:
yamltype = 'month'
label = re.sub(r'(,| \().*', '', meaning[0])
k1 = sect.find('`', sect.find('URI for this calendar is', entry.start()))+1
k2 = sect.find('`', k1)
cal = sect[k1:k2]
Expand All @@ -109,15 +111,22 @@ def find_cat_tables(txt, g7, tagsets):
if pfx.startswith('g7:'):
if pfx[3:] in g7:
raise Exception(pfx+' defined as an enumeration and a '+g7[pfx[3:]][0])
g7[pfx[3:]] = (yamltype, meaning)
if label:
g7[pfx[3:]] = (yamltype, meaning, None, label)
else:
g7[pfx[3:]] = (yamltype, meaning)
return enums, calendars

def find_calendars(txt, g7):
"""Looks for sections defining a `g7:cal-` URI"""
for bit in re.finditer(r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7:(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)', txt):
m = re.search('The epoch markers? ([`_A-Z0-9, and]+) (is|are) permitted', bit.group(1))
marker = [] if not m else re.findall(r'[A-Z0-9_]+', m[1])
g7[bit.group(2)] = ('calendar',[bit.group(1)], marker)
m = re.match(r'^The ([A-Z][A-Za-z]* )+calendar', bit.group(1))
if m:
g7[bit.group(2)] = ('calendar',[bit.group(1)], marker, m.group(0)[4:-9])
else:
g7[bit.group(2)] = ('calendar',[bit.group(1)], marker)


def joint_card(c1,c2):
Expand Down Expand Up @@ -215,7 +224,7 @@ def find_descriptions(txt, g7, ssp):
if uri not in ssp:
raise Exception('Found section for '+uri+' but no gedstruct')
if uri.startswith('g7:'):
g7.setdefault(uri[3:],('structure',[],ssp[uri]))[1].extend((
g7.setdefault(uri[3:],('structure',[],ssp[uri],name.strip()))[1].extend((
name.strip(),
desc.strip()
))
Expand Down Expand Up @@ -352,6 +361,10 @@ def expand_prefix(txt, prefixes):
print('\nspecification:', file=fh)
for desc in g7[tag][1]:
print(yaml_str_helper(' - ', desc), file=fh)

if len(g7[tag]) > 3:
print('\nlabel:',repr(g7[tag][3]), file=fh)

if g7[tag][0] == 'structure':
d = g7[tag][2]
payload = expand_prefix(d['pay'],prefixes) if d['pay'] is not None else 'null'
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ABBR
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ specification:
- A short name of a title, description, or name used for sorting, filing, and
retrieving records.

label: 'Abbreviation'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ADDR
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ specification:
Duplicating information bloats files and introduces the potential for
self-contradiction. ADR1, ADR2, and ADR3 should not be added to new files.

label: 'Address'

payload: http://www.w3.org/2001/XMLSchema#string

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ADOP
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ specification:
- Creation of a legally approved child-parent relationship that does not
exist biologically.

label: 'Adoption'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ADOP-FAMC
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ specification:
performed the adoption; or by using a FAM where the adopting individual is
the only HUSB/WIFE.

label: 'Family child'

payload: "@<https://gedcom.io/terms/v7/record-FAM>@"

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ADR1
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ specification:

ADR1 should not be added to new files; see ADDRESS_STRUCTURE for more.

label: 'Address Line 1'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ADR2
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ specification:

ADR2 should not be added to new files; see ADDRESS_STRUCTURE for more.

label: 'Address Line 2'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ADR3
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ specification:

ADR3 should not be added to new files; see ADDRESS_STRUCTURE for more.

label: 'Address Line 3'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/AGE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ specification:
- The age of the individual at the time an event occurred, or the age listed
in the document.

label: 'Age at event'

payload: https://gedcom.io/terms/v7/type-Age

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/AGNC
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ specification:
or events, or an organization responsible for creating or archiving
records.

label: 'Responsible agency'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ALIA
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ specification:
mutually linked by symmetric pairs of ALIA pointers. A future version of
this specification may adjust the definition of ALIA.

label: 'Alias'

payload: "@<https://gedcom.io/terms/v7/record-INDI>@"

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ANCI
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ specification:
- Indicates an interest in additional research for ancestors of this
individual. (See also DESI).

label: 'Ancestor interest'

payload: "@<https://gedcom.io/terms/v7/record-SUBM>@"

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ANUL
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- annulment
- Declaring a marriage void from the beginning (never existed).

label: 'Annulment'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/ASSO
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ specification:
2 ASSO @I2@
3 ROLE CLERGY

label: 'Associates'

payload: "@<https://gedcom.io/terms/v7/record-INDI>@"

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/AUTH
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ specification:
an unpublished source, this may be an individual, a government agency,
church organization, or private organization.

label: 'Author'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BAPL
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ specification:
- The event of baptism performed at age 8 or later by priesthood authority of
The Church of Jesus Christ of Latter-day Saints. (See also BAPM)

label: 'Baptism, Latter-Day Saint'

payload: null

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BAPM
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- baptism
- Baptism, performed in infancy or later. (See also BAPL and CHR.)

label: 'Baptism'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BARM
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- Bar Mitzvah
- The ceremonial event held when a Jewish boy reaches age 13.

label: 'Bar Mitzvah'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BASM
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ specification:
- The ceremonial event held when a Jewish girl reaches age 13, also known as
“Bat Mitzvah.”

label: 'Bas Mitzvah'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BIRT
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- birth
- Entering into life.

label: 'Birth'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BLES
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ specification:
- Bestowing divine care or intercession. Sometimes given in connection with a
naming ceremony.

label: 'Blessing'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/BURI
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- burial
- Disposing of the mortal remains of a deceased person.

label: 'Burial'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CALN
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
from the holdings of a repository. Despite the word “number” in the name,
may contain any character, not just digits.

label: 'Call number'

payload: http://www.w3.org/2001/XMLSchema#string

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CAST
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ specification:
based on racial or religious differences, or differences in wealth,
inherited rank, profession, or occupation.

label: 'Caste'

payload: http://www.w3.org/2001/XMLSchema#string

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CAUS
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
death event to show cause of death, such as might be listed on a death
certificate.

label: 'Cause'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CHAN
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ specification:
recent, although only the most recent change is described by the DATE
substructure.

label: 'Change'

payload: null

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CHIL
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ specification:
- The child in a family, whether biological, adopted, foster, sealed, or
other relationship.

label: 'Child'

payload: "@<https://gedcom.io/terms/v7/record-INDI>@"

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CHR
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- christening
- Baptism or naming events for a child.

label: 'Christening'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CHRA
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- adult christening
- Baptism or naming events for an adult person.

label: 'Christening, adult'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CITY
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ specification:
- City
- The name of the city used in the address. See ADDRESS_STRUCTURE for more.

label: 'City'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CONF
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- confirmation
- Conferring full church membership.

label: 'Confirmation'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CONL
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ specification:
- The religious event by which a person receives membership in The Church of
Jesus Christ of Latter-day Saints. (See also CONF)

label: 'Confirmation, Latter-Day Saint'

payload: null

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CONT
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
during serialization and is never present in parsed datasets. See Lines for
more.

label: 'Continued'

payload: null

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/COPR
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ specification:
- A copyright statement, as appropriate for the copyright laws applicable to
this data.

label: 'Copyright'

payload: http://www.w3.org/2001/XMLSchema#string

substructures: {}
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CORP
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ specification:
- The name of the business, corporation, or person that produced or
commissioned the product.

label: 'Corporate name'

payload: http://www.w3.org/2001/XMLSchema#string

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CREA
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ specification:
to the initial creation, it should not be modified after the structure is
created.

label: 'Creation'

payload: null

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CREM
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ specification:
- cremation
- Disposal of the remains of a person’s body by fire.

label: 'Cremation'

payload: Y|<NULL>

substructures:
Expand Down
2 changes: 2 additions & 0 deletions extracted-files/tags/CROP
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ specification:
- TOP or TOP + HEIGHT exceed the image height.
- CROP applied to a non-image or image without a defined pixel unit.

label: 'Crop'

payload: null

substructures:
Expand Down
Loading

0 comments on commit 0bcefcd

Please sign in to comment.