 ## Prise en main de la librairie GEDCOM

### Conventions : 
Tag for Spouse FamiliyElement : 'FAMS'
$\\$
Tag for Childrens FamiliyElement : 'FAMC'

In [5]:
from gedcom.element.individual import IndividualElement
from gedcom.element.element import Element
from gedcom.parser import Parser
from gedcom.element.family import FamilyElement
import gedcom.tags
import numpy as np
import pandas as pd
import string
#from gedcom.element.element import Element
#from gedcom.element.file import FileElement
#from gedcom.element.object import ObjectElement

gedcom_parser = Parser()

In [30]:
file_path = 'Queen_Eliz_II.ged'
gedcom_parser = Parser()
gedcom_parser.parse_file(file_path)
root_child_elements = gedcom_parser.get_root_child_elements()

def find_child_data(first_names,last_name):
    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            if element.surname_match(last_name):
                if element.given_name_match(first_names):
                    L1 = []
                    L2 = []
                    (first, last) = element.get_name()
                    print("Information about " + first + " " + last + ":")
                    data = element.get_child_elements()
                    for child in data :
                        L1 += [child]
                        L2 += [child.get_tag()]
                    return L1, L2
    return "No matching person"

In [10]:
find_child_data("Elizabeth II Alexandra Mary", "Windsor")

Information about Elizabeth II Alexandra Mary Windsor:


([<gedcom.element.element.Element at 0x10866aac0>,
  <gedcom.element.element.Element at 0x10866aaf0>,
  <gedcom.element.element.Element at 0x10866ab20>,
  <gedcom.element.element.Element at 0x10866ab50>,
  <gedcom.element.element.Element at 0x10866ac40>,
  <gedcom.element.element.Element at 0x10866ac10>,
  <gedcom.element.element.Element at 0x10866ad30>,
  <gedcom.element.element.Element at 0x10866ad60>],
 ['RIN', '_UID', '_UPD', 'NAME', 'SEX', 'BIRT', 'FAMS', 'FAMC'])

In [11]:
def find_IndividualElement(first_names,last_name):
    """
    Get Individual element of ged with first names and last name

    Parameters
    ---
    first_names : str
        complete first names of the individual
    last_name : str
        last names of the individual 

    Returns 
    ---
    bool 
        Tells if the individual was found
    IndividualElement
        IndividualElement corresponding to the individual in the ged

    """
    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            if element.surname_match(last_name):
                return (True, element)
    return [False, element]

In [12]:
def get_Spouse_FamilyElement(Individual):
    """
    Get Spouse FamilyElements of an individual

    Parameters
    ---
    Individual : IndividualElement

    Returns
    ---
    list of FamilyElement 
    """
    return gedcom_parser.get_families(Individual,"FAMS")
    

In [13]:
def get_Children_FamilyElement(Individual):
    """
    Get Childrens FamilyElements of an individual

    Parameters
    ---
    Individual : IndividualElement

    Returns
    ---
    list of FamilyElement 
    """
    return gedcom_parser.get_families(Individual,"FAMC")

In [14]:
def get_spouse(Individual):
    """
    Doesn't work
    """
    Family = get_Spouse_FamilyElement(Individual)[0]
    return gedcom_parser.get_family_members(Family,"FAMILY_MEMBERS_TYPE_ALL")

### Tests

In [15]:
get_Spouse_FamilyElement(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

[<gedcom.element.family.FamilyElement at 0x10a6927c0>]

In [16]:
get_Spouse_FamilyElement(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])[0].get_tag()

'FAM'

In [17]:
get_Children_FamilyElement(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

[<gedcom.element.family.FamilyElement at 0x10a6924c0>]

In [18]:
A = find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1]
B = find_IndividualElement("George VI", "Windsor")[1]
gedcom_parser.find_path_to_ancestor(A,B,None)[0].get_name()

('George V', 'Windsor')

In [19]:
C = find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1]
D = find_IndividualElement("George V", "Windsor")[1]
gedcom_parser.find_path_to_ancestor(A,B,None)[0].get_name()

('George V', 'Windsor')

In [20]:
E = find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1]
F = find_IndividualElement("Edward VII", "Wettin")[1]
gedcom_parser.find_path_to_ancestor(A,B,None)[0].get_name()

('George V', 'Windsor')

In [21]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])

[<gedcom.element.individual.IndividualElement at 0x108661760>,
 <gedcom.element.individual.IndividualElement at 0x108661d00>,
 <gedcom.element.individual.IndividualElement at 0x10866a1f0>]

In [22]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])[0].get_name()

('George V', 'Windsor')

In [23]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])[1].get_name()

('Mary of Teck (May)', '')

In [24]:
get_spouse(find_IndividualElement("George VI", "Windsor")[1])[2].get_name()

('George VI', 'Windsor')

In [25]:
get_spouse(find_IndividualElement('Mary of Teck (May)', '')[1])[0].get_name()

('SIR WILLIAM VII KNIGHT ENGLAND', 'Gascoigne')

### Fonctions Parents/Childrens

In [26]:
def get_all_Spouse_FamilyElement():
    Spouse_FamilyElements = []
    N = 0
    for element in root_child_elements:
        if isinstance(element, FamilyElement):
            if element.get_tag() == gedcom.tags.GEDCOM_TAG_FAMILY_SPOUSE :
                Spouse_FamilyElements += [element]
    return Spouse_FamilyElements


In [27]:
get_all_Spouse_FamilyElement()

[]

In [28]:
get_Childrens(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

NameError: name 'get_Childrens' is not defined

In [None]:
gedcom_parser.get_parents(find_IndividualElement("Elizabeth II Alexandra Mary", "Windsor")[1])

[<gedcom.element.individual.IndividualElement at 0x1089e0d90>,
 <gedcom.element.individual.IndividualElement at 0x1089e92b0>]

### Raisonnement : 
Construction d'une Dataframe panda Individual / Family1 / Family2 $\\$
Construction d'une Dataframe panda Family / Children1 / FamilyChildren2 / ... $\\$
Construction d'une Dataframe panda Family / Mother / Father $\\$
Dataframe of Families connections with other families

Parcours du graphe : 
On part d'un des 2, on part des 2 côté avec les 2 dernières dataframe jusqu'à trouver la famille du 2e

In [12]:
def get_IndivFamily_DataFrame(file_path='Queen_Eliz_II.ged'):
    """
    Creates DataFrame of children & spouse families keys of all individuals 
    indexed by their keys, from a gedcom file.

    Parameters
    ---
    file_path : str
        path of the gedcom file

    Returns 
    ---
    pd.DataFrame 
        dataframe of children & spouse families keys of individuals
    """
    gedcom_parser.parse_file(file_path)
    root_child_elements = gedcom_parser.get_root_child_elements()
    T = []
    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            L = [element.get_pointer()]
            for child_element in element.get_child_elements() :
                if child_element.get_tag() == gedcom.tags.GEDCOM_TAG_FAMILY_SPOUSE :
                    L += [child_element.get_value()]
                elif child_element.get_tag() == gedcom.tags.GEDCOM_TAG_FAMILY_CHILD :
                    L += [child_element.get_value()]
            T += [L]

    full_T = [line+['NaN']*(3-len(line)) for line in T]

    df = pd.DataFrame(
    {
        'INDI' : [full_T[k][0] for k in range(len(full_T))],
        'FAMS' : [full_T[k][1] for k in range(len(full_T))],
        'FAMC' : [full_T[k][2] for k in range(len(full_T))],
    })

    df = df.set_index('INDI')

    return df



In [9]:
df1 = get_IndivFamily_DataFrame()

In [11]:
def is_present(string,list):
    """
    Verify if an element is already in a list or not
    """
    for i in list :
        if i == string :
            return True
    return False

In [13]:
def get_FamChildrens_DataFrame(df):
    fams = df1['FAMS'].drop_duplicates()
    fams = fams.set_index('FAMS')
    for fam in fams :
        N = 1
        for indi in df1['FAMC' == fam] :
            N += 1



IndentationError: expected an indented block (2870166054.py, line 5)