In [1]:
def data_extracting(file_in_path, file_out_path, options):
    """
    This function processes a text file containing Latin data, extracting specific lines based on given options and generating a new file with those lines. It also creates a list of unique lemmas present in the file.

    Args:
    - file_in_path (str): Path to the input text file containing Latin data.
    - file_out_path (str): Path to the output text file where filtered lines will be saved.
    - options (tuple): A tuple containing the start strings to filter lines in the input file. For example, ('V', 'V;IND', 'V;SBJV').

    The function will iterate over each line of the input file, add each unique verb to a list, and write to the output file those lines whose third element starts with one of the options specified in 'options'. Finally, it prints the total number of processed lines and the list of unique verbs sorted alphabetically.

    Returns:
    None - The function prints the total number of processed lines and the lemma list, but does not return any value.
    """
    count = 0
    lemmas = []

    with open(file_in_path, 'r', encoding='utf-8') as file_in, open(file_out_path, 'w', encoding='utf-8') as file_out:
        for line in file_in:
            elements = line.strip().split('\t')
            if elements[0] not in lemmas:
                lemmas.append(elements[0])
            if len(elements) >= 3 and elements[2].startswith(options):
                file_out.write(line)
                count += 1

    lemmas.sort()
    print(f'lines: {count}')
    #print('lemmas:', lemmas)

In [6]:
# Extracting all finite forms of the verbs

options = ('V;SBJV', 'V;IND', 'V;IMP')
new_data = data_extracting('verbs.txt', 'indicative_subjunctive_imperative_paradigms.txt', options)

lines: 44466
verbs: ['-pleō', 'abarceō', 'aberceō', 'abhorrēscō', 'abnumerō', 'aborior', 'aborīscor', 'absonō', 'abstō', 'abōminor', 'abūtor', 'accieō', 'accipitrō', 'acontizō', 'acēdior', 'adbellō', 'adbītō', 'addormiō', 'addormīscō', 'addēnseō', 'adfector', 'adfleō', 'adfor', 'adformīdō', 'adfremō', 'adfriō', 'adgaudeō', 'adgeniculor', 'adgredior', 'adhortor', 'adincrēscō', 'adipīscor', 'adloquor', 'adlubēscō', 'adlūctor', 'adlūdiō', 'admeō', 'admoderor', 'admodulor', 'admurmuror', 'admētior', 'admīror', 'adnictō', 'adnītor', 'adnūbilō', 'adnūtriō', 'adnūtō', 'adopīnor', 'adorior', 'adpetissō', 'adpostulō', 'adprecor', 'adprēnsō', 'adsector', 'adsellor', 'adsentior', 'adsentor', 'adsequor', 'adsonō', 'adstipulor', 'adsībilō', 'adtestor', 'adtolerō', 'adtollō', 'adtorqueō', 'adulēscentior', 'adurgeō', 'advectō', 'adveneror', 'adversor', 'adzēlor', 'adōsculor', 'adūlor', 'adūtor', 'aeditumor', 'aegreō', 'aegrēscō', 'aemulor', 'aeruscō', 'affector', 'affleō', 'affor', 'afformīdō', 'affr

In [3]:
# Extracting only indicative and subjunctive mood.

options = ('V;SBJV', 'V;IND')
new_data = data_extracting('verbs.txt', 'indicative_subjunctive_paradigms.txt', options)

lines: 38235


In [7]:
# Extracting only indicative mood.

options = ('V;IND',)
new_data = data_extracting('verbs.txt', 'indicative_paradigms.txt', options)

lines: 22939
verbs: ['-pleō', 'abarceō', 'aberceō', 'abhorrēscō', 'abnumerō', 'aborior', 'aborīscor', 'absonō', 'abstō', 'abōminor', 'abūtor', 'accieō', 'accipitrō', 'acontizō', 'acēdior', 'adbellō', 'adbītō', 'addormiō', 'addormīscō', 'addēnseō', 'adfector', 'adfleō', 'adfor', 'adformīdō', 'adfremō', 'adfriō', 'adgaudeō', 'adgeniculor', 'adgredior', 'adhortor', 'adincrēscō', 'adipīscor', 'adloquor', 'adlubēscō', 'adlūctor', 'adlūdiō', 'admeō', 'admoderor', 'admodulor', 'admurmuror', 'admētior', 'admīror', 'adnictō', 'adnītor', 'adnūbilō', 'adnūtriō', 'adnūtō', 'adopīnor', 'adorior', 'adpetissō', 'adpostulō', 'adprecor', 'adprēnsō', 'adsector', 'adsellor', 'adsentior', 'adsentor', 'adsequor', 'adsonō', 'adstipulor', 'adsībilō', 'adtestor', 'adtolerō', 'adtollō', 'adtorqueō', 'adulēscentior', 'adurgeō', 'advectō', 'adveneror', 'adversor', 'adzēlor', 'adōsculor', 'adūlor', 'adūtor', 'aeditumor', 'aegreō', 'aegrēscō', 'aemulor', 'aeruscō', 'affector', 'affleō', 'affor', 'afformīdō', 'affr