 ## Prise en main de la librairie GEDCOM

### Conventions : 
Tag for Spouse FamiliyElement : 'FAMS'
$\\$
Tag for Childrens FamiliyElement : 'FAMC'

In [46]:
from gedcom.element.individual import IndividualElement
from gedcom.element.element import Element
from gedcom.parser import Parser
from gedcom.element.family import FamilyElement
import gedcom.tags
import numpy as np
import pandas as pd
import string
#from gedcom.element.element import Element
#from gedcom.element.file import FileElement
#from gedcom.element.object import ObjectElement

gedcom_parser = Parser()

In [47]:
file_path = 'Queen_Eliz_II.ged'
gedcom_parser = Parser()
gedcom_parser.parse_file(file_path)
root_child_elements = gedcom_parser.get_root_child_elements()

def find_child_data(first_names,last_name):
    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            if element.surname_match(last_name):
                if element.given_name_match(first_names):
                    L1 = []
                    L2 = []
                    (first, last) = element.get_name()
                    print("Information about " + first + " " + last + ":")
                    data = element.get_child_elements()
                    for child in data :
                        L1 += [child]
                        L2 += [child.get_tag()]
                    return L1, L2
    return "No matching person"

In [48]:
find_child_data("Elizabeth II Alexandra Mary", "Windsor")

Information about Elizabeth II Alexandra Mary Windsor:


([<gedcom.element.element.Element at 0x1443b76a0>,
  <gedcom.element.element.Element at 0x1443b76d0>,
  <gedcom.element.element.Element at 0x1443b7700>,
  <gedcom.element.element.Element at 0x1443b7730>,
  <gedcom.element.element.Element at 0x1443b7820>,
  <gedcom.element.element.Element at 0x1443b77f0>,
  <gedcom.element.element.Element at 0x1443b7910>,
  <gedcom.element.element.Element at 0x1443b7940>],
 ['RIN', '_UID', '_UPD', 'NAME', 'SEX', 'BIRT', 'FAMS', 'FAMC'])

In [49]:
def find_IndividualElement(first_names,last_name):
    """
    Get Individual element of ged with first names and last name

    Parameters
    ---
    first_names : str
        complete first names of the individual
    last_name : str
        last names of the individual 

    Returns 
    ---
    bool 
        Tells if the individual was found
    IndividualElement
        IndividualElement corresponding to the individual in the ged

    """
    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            if element.surname_match(last_name):
                return (True, element)
    return [False, element]

In [50]:
def get_Spouse_FamilyElement(Individual):
    """
    Get Spouse FamilyElements of an individual

    Parameters
    ---
    Individual : IndividualElement

    Returns
    ---
    list of FamilyElement 
    """
    return gedcom_parser.get_families(Individual,"FAMS")
    

In [51]:
def get_Children_FamilyElement(Individual):
    """
    Get Childrens FamilyElements of an individual

    Parameters
    ---
    Individual : IndividualElement

    Returns
    ---
    list of FamilyElement 
    """
    return gedcom_parser.get_families(Individual,"FAMC")

In [52]:
def get_spouse(Individual):
    """
    Doesn't work
    """
    Family = get_Spouse_FamilyElement(Individual)[0]
    return gedcom_parser.get_family_members(Family,"FAMILY_MEMBERS_TYPE_ALL")

### Tests

In [53]:
get_Spouse_FamilyElement(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

[<gedcom.element.family.FamilyElement at 0x14640f310>]

In [54]:
get_Spouse_FamilyElement(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])[0].get_tag()

'FAM'

In [55]:
get_Children_FamilyElement(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

[<gedcom.element.family.FamilyElement at 0x146406fd0>]

In [56]:
A = find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1]
B = find_IndividualElement("George VI", "Windsor")[1]
gedcom_parser.find_path_to_ancestor(A,B,None)[0].get_name()

('George V', 'Windsor')

In [57]:
C = find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1]
D = find_IndividualElement("George V", "Windsor")[1]
gedcom_parser.find_path_to_ancestor(A,B,None)[0].get_name()

('George V', 'Windsor')

In [58]:
E = find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1]
F = find_IndividualElement("Edward VII", "Wettin")[1]
gedcom_parser.find_path_to_ancestor(A,B,None)[0].get_name()

('George V', 'Windsor')

In [59]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])

[<gedcom.element.individual.IndividualElement at 0x1443ae340>,
 <gedcom.element.individual.IndividualElement at 0x1443ae8e0>,
 <gedcom.element.individual.IndividualElement at 0x1443aed90>]

In [60]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])[0].get_name()

('George V', 'Windsor')

In [61]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])[1].get_name()

('Mary of Teck (May)', '')

In [62]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])[2].get_name()

('George VI', 'Windsor')

In [63]:
get_spouse(find_IndividualElement('Mary of Teck (May)', '')[1])[0].get_name()

('SIR WILLIAM VII KNIGHT ENGLAND', 'Gascoigne')

### Fonctions Parents/Childrens

In [64]:
def get_all_Spouse_FamilyElement():
    Spouse_FamilyElements = []
    N = 0
    for element in root_child_elements:
        if isinstance(element, FamilyElement):
            if element.get_tag() == gedcom.tags.GEDCOM_TAG_FAMILY_SPOUSE :
                Spouse_FamilyElements += [element]
    return Spouse_FamilyElements


In [65]:
get_all_Spouse_FamilyElement()

[]

In [66]:
#get_Childrens(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

In [67]:
gedcom_parser.get_parents(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

[<gedcom.element.individual.IndividualElement at 0x1443a49a0>,
 <gedcom.element.individual.IndividualElement at 0x1443a4e80>]

### Raisonnement : 
Construction d'une Dataframe panda Individual / Family1 / Family2 $\\$
Construction d'une Dataframe panda Family / Children1 / FamilyChildren2 / ... $\\$
Construction d'une Dataframe panda Family / Mother / Father $\\$
Dataframe of Families connections with other families

Parcours du graphe : 
On part d'un des 2, on part des 2 côté avec les 2 dernières dataframe jusqu'à trouver la famille du 2e

In [68]:
def get_IndivFamily_DataFrame(file_path='Queen_Eliz_II.ged'):
    """
    Creates DataFrame of children & spouse families keys of all individuals 
    indexed by their keys, from a gedcom file.

    Parameters
    ---
    file_path : str
        path of the gedcom file

    Returns 
    ---
    pd.DataFrame 
        dataframe of children & spouse families keys of individuals
    """
    gedcom_parser.parse_file(file_path)
    root_child_elements = gedcom_parser.get_root_child_elements()
    
    T = []
    
    #Go through indivduals and get their families
    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            L = [element.get_pointer()]
            for child_element in element.get_child_elements() :
                if child_element.get_tag() == gedcom.tags.GEDCOM_TAG_FAMILY_SPOUSE :
                    L += [child_element.get_value()]
                elif child_element.get_tag() == gedcom.tags.GEDCOM_TAG_FAMILY_CHILD :
                    L += [child_element.get_value()]
            T += [L]

    #Add NaN where information is missing
    full_T = [line+['NaN']*(3-len(line)) for line in T]

    #Create the DataFrame
    df = pd.DataFrame(
    {
        'INDI' : [full_T[k][0] for k in range(len(full_T))],
        'FAMS' : [full_T[k][1] for k in range(len(full_T))],
        'FAMC' : [full_T[k][2] for k in range(len(full_T))],
    })

    return df

In [115]:
def get_FamChildrens_DataFrame(df):
    """
    """
    df1 = df.set_index('INDI',inplace=False)

    df2 = pd.DataFrame(
    {
        'FAM' : df['FAMS'].drop_duplicates(),
    })
    df2['1FAMC'] = np.NaN
    df2.set_index('FAM',inplace=True)
    N_max = 1

    for family in df2.index :
        N = 0
        for indi in df[df['FAMC'] == f'{family}']['INDI'] :
            N += 1
            if N > N_max :
                N_max = N
                df2[f'{N_max}FAMC'] = np.NaN
            #df2.at[f'{family}',f'{N}FAMC'] = f'{indi}'
            df2.at[f'{family}',f'{N}FAMC'] = df1.at[f'{indi}','FAMS']
    return df2

In [116]:
#pd.set_option('display.max_rows', None)
df = get_IndivFamily_DataFrame()
get_FamChildrens_DataFrame(df)

Unnamed: 0_level_0,1FAMC,2FAMC,3FAMC,4FAMC,5FAMC
FAM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
@F285@,,,,,
@F286@,@F285@,,,,
@F78@,@F303@,,,,
@F75@,@F187@,,,,
@F76@,@F75@,,,,
...,...,...,...,...,...
@F6017@,@F6013@,,,,
@F6018@,@F6017@,,,,
@F6019@,@F6018@,,,,
@F6020@,@F6017@,,,,


In [117]:
def get_FamSpouse_DataFrame(df):
    """
    """
    df1 = df.set_index('INDI',inplace=False)

    df2 = pd.DataFrame(
    {
        'FAM' : df['FAMS'].drop_duplicates(),
    })
    df2['1FAMS'] = np.NaN
    df2.set_index('FAM',inplace=True)
    N_max = 1

    for family in df2.index :
        N = 0
        for indi in df[df['FAMS'] == f'{family}']['INDI'] :
            N += 1
            if N > N_max :
                N_max = N
                df2[f'{N_max}FAMS'] = np.NaN
            #df2.at[f'{family}',f'{N}'] = f'{indi}'
            df2.at[f'{family}',f'{N}FAMS'] = df1.at[f'{indi}','FAMC']
    return df2

In [118]:
#pd.set_option('display.max_rows', None)
df = get_IndivFamily_DataFrame()
get_FamSpouse_DataFrame(df)

Unnamed: 0_level_0,1FAMS,2FAMS
FAM,Unnamed: 1_level_1,Unnamed: 2_level_1
@F285@,@F286@,
@F286@,@F287@,
@F78@,,@F304@
@F75@,@F76@,@F209@
@F76@,@F77@,@F214@
...,...,...
@F6017@,@F6018@,@F6020@
@F6018@,@F6019@,
@F6019@,,
@F6020@,,


In [120]:
def get_FamLinks_DataFrame():
    df = get_IndivFamily_DataFrame()
    df3 = pd.concat([get_FamSpouse_DataFrame(df),get_FamChildrens_DataFrame(df)],axis=1)
    return df3

In [121]:
get_FamLinks_DataFrame()

Unnamed: 0_level_0,1FAMS,2FAMS,1FAMC,2FAMC,3FAMC,4FAMC,5FAMC
FAM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
@F285@,@F286@,,,,,,
@F286@,@F287@,,@F285@,,,,
@F78@,,@F304@,@F303@,,,,
@F75@,@F76@,@F209@,@F187@,,,,
@F76@,@F77@,@F214@,@F75@,,,,
...,...,...,...,...,...,...,...
@F6017@,@F6018@,@F6020@,@F6013@,,,,
@F6018@,@F6019@,,@F6017@,,,,
@F6019@,,,@F6018@,,,,
@F6020@,,,@F6017@,,,,
