In [7]:
from tf.app import use
A = use('bhsa', hoist=globals())

This is Text-Fabric 9.1.1
Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html

122 features found and 0 ignored


# Classes

## Classification Classes

In [117]:
"""
A class with data used to assign difficulty weights to passages based
on the lexical frequencies of words in the passage. 

ranks: a list of string categories for lexical frequency ranges
ranges: a 2D list of the numeric range for each rank
weights: a list of the weight penalties assigned per word for each rank
"""
class Rank:

    def __init__(self, name, ranks, ranges, weights):
        self.name = name
        self.ranks = ranks 
        self.ranges = ranges 
        self.weights = weights 

    # Auxiliary function to create a single rank_scale dictionary.
    def get_rank_dict(self):
        rank_dict = {}
        for i in range(len(self.ranks)):
            rank_dict[self.ranks[i]] = {
                'range': self.ranges[i],
                'weight': self.weights[i]
            }
        return rank_dict 

In [128]:
""" 
A class to store different ranking scales. 
"""
class Ranks:

    # Using 2-elem lists is far faster than searching entire ranges. 
    # Rather than if i in range(), check if i > l[0] and <= l[1].
    # Using this method scales runtime from ~0:04:30 to ~0:00:15.
    _3_ranks = Rank(
        "3_ranks",
        ['Frequent', 'Uncommon', 'Rare'],
        [
            [100, 51000],
            [10, 100],
            [1, 10],
        ],
        [1, 3, 7]
    )
   
    _4_ranks = Rank(
        "4_ranks",
        ['Frequent', 'Medium', 'Uncommon', 'Rare'],
        [
            [100, 51000],
            [50, 100],
            [10, 50],
            [1, 10],
        ],
        [1, 4, 5, 8]
    )

    _5_ranks_a = Rank(
        "5a_ranks",
        ['Frequent', 'Common', 'Medium', 'Uncommon', 'Rare'],
        [
            [500, 51000],
            [250, 500],
            [150, 250],
            [50, 150],
            [1, 50],
        ],
        [1, 2, 3, 5, 8]
    )

    _5_ranks_b = Rank(
        "5b_ranks",
        ['Frequent', 'Common', 'Infrequent', 'Rare', 'Scarce'],
        [
            [200, 51000],
            [100, 200],
            [50, 100],
            [20, 50],
            [1, 20],
        ],
        [1, 1.5, 3, 5, 8]
    )

    _7_ranks = Rank(
        "7_ranks",
        ['Abundant', 'Frequent', 'Common', 'Average', 'Uncommon', 'Rare', 'Scarce'],
        [
            [800, 51000],
            [400, 800],
            [200, 400],
            [100, 200],
            [50, 100],
            [15, 50],
            [1, 15],
        ],
        [1, 1.1, 1.3, 1.7, 3, 5.5, 8.5]
    )

    _9_ranks = Rank(
        "9_ranks",
        ['Abundant', 'Frequent', 'Common', 'Average', 'Uncommon', 'Rare', 'Scarce', 'Scarcer', 'Scarcest'],
        [
            [1000, 51000],
            [400, 1000],
            [200, 400],
            [100, 200],
            [50, 100],
            [30, 50],
            [20, 30],
            [10, 20],
            [1, 10]
        ],
        [1, 1.1, 1.3, 1.7, 3, 5.5, 8, 9, 10]
    )

    _10_ranks = Rank(
        "10_ranks",
        ['Abundant', 'Frequent', 'Common', 'Average', 'Uncommon', 'Rare', 'Rarer', 'Scarce', 'Scarcer', 'Scarcest'],
        [
            [1000, 51000],
            [400, 1000],
            [200, 400],
            [100, 200],
            [50, 100],
            [40, 50],
            [30, 40],
            [20, 30],
            [10, 20],
            [1, 10]
        ],
        [1, 1.1, 1.3, 1.7, 3, 5.5, 7, 8, 9, 10]
    )

    all_ranks = [
        _3_ranks,
        _4_ranks,
        _5_ranks_a,
        _5_ranks_b,
        _7_ranks,
        _9_ranks,
        _10_ranks
    ]

In [66]:
"""
A class that contains data to help assign difficulty weights to 
any portion of Hebrew text that has more than one word. 
"""
class Classify:
    """ 
    Notes on stop_words_types and other exclusion lists. 

    Most prepositions, articles, and conjunctions don't
    add any meaningul weight to a text and could thus be exlcuded.
    
    Example use:
    words = [w for w in passage if F.sp.v(w) not in stop_words_types]
    
    Note: the only Heb article is 'הַ' with 30,386 occurences. There are some 
    preps and conjs that have few occurences, so I recommend not using
    stop_words_types when weighing passages and using stop_words instead.
    """
    stop_words_types = ['prep', 'art', 'conj']
    # Check if F.voc_lex_utf8.v(word) is in this list. If
    # so it can be excluded since it occurs so often. 
    stop_words = ['אֵת', 'בְּ', 'לְ', 'הַ', 'וְ']
    # If you take verb data into account when weighing a
    # paragraph, these common types could be excluded. 
    easy_vtypes = ['perf', 'impf', 'wayq']
    easy_vstems = ['qal', 'hif', 'nif', 'piel']

## Hebrew Passage Classes

In [81]:
"""
A class that contains a Hebrew passage, consisting of paragraphs 
as marked by a petach (פ) or samech (ס) in the Masoretic Text. If 
a book like Psalms, which lacks paragaph markers, is encountered,
the passages are split at the chapter level. 
"""
class Passage:

    def __init__(self, id):
        self.id = id
        self.verses = [] # a list of verse node ints. 
        self.words = [] # a list of word node ints. 
        self.start_word = 0
        self.end_word = 0
        self.word_count = 0
        self.weight1 = 0
        self.weight2a = 0 # all words denom
        self.weight2b = 0 # unique words
        self.weight3a = 0 # all word denom
        self.weight3b = 0
        self.verb_types_present = set()
        self.verb_stems_present = set()
        self.word_ranks_data = {}
        self.start_ref = ''
        self.end_ref = ''

    paragraph_markers = {'פ': 'open', 'ס': 'closed'}

    # Returns a list of all words present in the passage.
    def get_all_words(self):
        words = []
        for verse in self.verses:
            for word in L.i(verse, otype='word'):
                words.append(word)
        return words

    # Returns a list of all words present in a specified verse in the passage.
    def get_vs_words(verse):
        verse_words = [w for w in L.i(verse, otype='word')]
        return verse_words

    # Returns a String of all the text in the passage.
    def get_text(self):
        return T.text(self.verses, fmt='text-orig-full')

    """
    get_vs_weights() returns a dictionary mapping each verse node in 
    the passage to a weight. It takes rank_scale as input, an instance
    of Classify(args).rank_scale() (see notes in Classify for instantiaion).
    """
    def get_vs_weights(self, rank_scale):
        # A dictionary mapping verse nodes to weights.
        verse_weights = {}
        # Iterate over verses in the passage.
        for verse in self.verses:
            verse_weight = 0
            words = self.get_vs_words(verse)
            # Add the scaled word weights to the verse's total weight.
            for word in words:
                if F.voc_lex_utf8.v(word) not in Classify().stop_words:
                    for rank in rank_scale.keys():
                        lex_freq = F.freq_lex.v(word)
                        range = rank_scale[rank]['range']
                        if lex_freq >= range[0] and lex_freq < range[1]:
                            verse_weight += rank_scale[rank]['weight']
            # Add the verse's weight to the dictionary at this verse's key. 
            verse_weight /= len(words)
            verse_weights[verse] = round(verse_weight, 4)
        
        return verse_weights

    def get_passage_weight1(self, rank_scale):
        total_weight = 0
        # Iterate over words in the passage.
        for word in self.words:
            if F.voc_lex_utf8.v(word) not in Classify().stop_words:
                # Iterate over the ranks present in the rank scale. 
                for rank in rank_scale.keys():
                    lex_freq = F.freq_lex.v(word)
                    range = rank_scale[rank]['range']
                    if lex_freq >= range[0] and lex_freq < range[1]:
                        # Give a half penalty for proper nouns. 
                        if F.sp.v(word) == 'nmpr': # proper noun
                            total_weight += (rank_scale[rank]['weight']) / 2
                        # Give a full penalty for other word types. 
                        else:
                            total_weight += rank_scale[rank]['weight']
        total_weight /= len(self.words)
        
        return round(total_weight, 4)

    # Only penalize once per lexical value.  
    def get_passage_weight2(self, rank_scale, div_all=True):
        total_weight = 0
        unique_words = set()
        # Iterate over words in the passage.
        for word in self.words:
            lex = F.voc_lex_utf8.v(word)
            if lex not in Classify().stop_words and lex not in unique_words:
                # Iterate over the ranks present in the rank scale. 
                for rank in rank_scale.keys():
                    lex_freq = F.freq_lex.v(word)
                    range = rank_scale[rank]['range']
                    if lex_freq >= range[0] and lex_freq < range[1]:
                        # Give a half penalty for proper nouns. 
                        if F.sp.v(word) == 'nmpr': # proper noun
                            total_weight += (rank_scale[rank]['weight']) / 2
                        # Give a full penalty for other word types. 
                        else:
                            total_weight += rank_scale[rank]['weight']
                unique_words.add(lex)
        # Compare using all words as denominator vs. unique words.
        if div_all:
            total_weight /= len(self.words)
        else:
            total_weight /= len(unique_words)
        
        return round(total_weight, 4)

    # Decrease penalty for each occurance. 
    def get_passage_weight3(self, rank_scale, div_all=True):
        word_weights = {}
        # Iterate over words in the passage.
        for word in self.words:
            lex = F.voc_lex_utf8.v(word)
            if lex not in Classify().stop_words:
                # Add partial penalty for reocurring words. 
                if lex in word_weights.keys():
                    # Only gradually decrease penalty for rarer words. 
                    # Decreases by 1 point per occurance. 
                    word_weights[lex]['count'] += 1
                    if F.freq_lex.v(word) < 100:
                        count = word_weights[lex]['count']
                        penalty = word_weights[lex]['penalty']
                        new_weight = penalty - count 
                        added_weight = new_weight if new_weight >= 1 else 1
                        word_weights[lex]['weight'] += added_weight
                    else:
                        word_weights[lex]['weight'] += word_weights[lex]['penalty']
                # Add full penalty for the first occurance. 
                else:
                    # Add word to hash table
                    word_weights[lex] = {'count':0, 'weight':0, 'penalty':0}
                    # Iterate over the ranks present in the rank scale. 
                    for rank in rank_scale.keys():
                        lex_freq = F.freq_lex.v(word)
                        range = rank_scale[rank]['range']
                        if lex_freq >= range[0] and lex_freq < range[1]:
                            # Give a half penalty for proper nouns. 
                            if F.sp.v(word) == 'nmpr': # proper noun
                                word_weights[lex]['penalty'] = (rank_scale[rank]['weight']) / 2
                            # Give a full penalty for other word types. 
                            else:
                                word_weights[lex]['penalty'] = rank_scale[rank]['weight']
                    word_weights[lex]['weight'] += word_weights[lex]['penalty']
                    word_weights[lex]['count'] += 1
        # Get the sum of all word weights. 
        total_weight = sum([w for w in [word_weights[k]['weight'] for k in word_weights.keys()]])
        # Compare using all words as denominator vs. unique words.
        if div_all:
            total_weight /= len(self.words)
        else:
            total_weight /= len(word_weights)
        
        return round(total_weight, 4)

In [68]:
"""
A class to store passages. It includes methods to sort the passages
by attributes such as word count, weight, and canonical order. It
also stores the rank scale used to create the passage list. The passages
will be stored in order_sorted by default. 
"""
class Passages:
    
    def __init__(self, rank_scale={}):
        self.rank_scale = rank_scale
        self.order_sorted = []
        self.word_count_sorted = []
        self.weight_sorted1 = {}
        self.weight_sorted2a = {}
        self.weight_sorted2b = {}
        self.weight_sorted3a = {}
        self.weight_sorted3b = {}
    
    def word_count_sort(self):
        return sorted(self.order_sorted, key=lambda p: p.word_count)

    def weight_sort1(self):
        sorted_list = sorted(self.order_sorted, key=lambda p: p.weight1)
        return {sorted_list[i]:i for i in range(len(sorted_list))}

    def weight_sort2a(self):
        sorted_list = sorted(self.order_sorted, key=lambda p: p.weight2a)
        return {sorted_list[i]:i for i in range(len(sorted_list))}

    def weight_sort2b(self):
        sorted_list = sorted(self.order_sorted, key=lambda p: p.weight2b)
        return {sorted_list[i]:i for i in range(len(sorted_list))}

    def weight_sort3a(self):
        sorted_list = sorted(self.order_sorted, key=lambda p: p.weight3a)
        return {sorted_list[i]:i for i in range(len(sorted_list))}

    def weight_sort3b(self):
        sorted_list = sorted(self.order_sorted, key=lambda p: p.weight3b)
        return {sorted_list[i]:i for i in range(len(sorted_list))}

    # A function to display the rank scale as a multi-line string. 
    def print_scale(self):
        scale = self.rank_scale
        output_text = ""
        for rank in scale.keys():
            range = scale[rank]['range'] 
            weight = scale[rank]['weight']
            output = f"w{weight} for {range[0]}-{range[1]} occ"
            output_text += f"{rank}: {output}\n"
        return output_text

# Methods

## Passage Retrieval

In [96]:
"""
Used by get_passages()

Check whether we have reached the end of a valid passage as defined by 
passage_size and paragraph markers. If we have, or if a new book (or new 
chapter in certain books like Psalms), then mark the passage as valid by
setting its value to True. 
In certain cases we will not want to add the current verse to the current
passage, so we will set add_verse to False. 
"""
def valid_passage(passage, verse, passage_size):
    is_valid = False
    add_verse = True
    # Get the string value at the end of the verse. 
    verse_ending = T.text(verse).split()[-1]
    verse_book = L.u(verse, otype='book')[0]
    verse_chapter = L.u(verse, otype='chapter')[0]
    ps_119 = 427315 # node for Psalm 119.
    # Check if we've reached a new book, if yes, end the paragraph.
    if L.u(passage.verses[-1], otype='book')[0] != verse_book:
        is_valid = True 
        add_verse = False
    # Check if the current verse is in the following books.
    # Since they lack enough paragraph markers to make meaningful passages,
    # we create passages at the chapter level. 
    elif verse_book in [T.bookNode('Ruth'), T.bookNode('Jonah'), T.bookNode('Ecclesiastes'), T.bookNode('Psalms')]:
        if verse_chapter != L.u(passage.verses[-1], otype='chapter')[0]:
            is_valid = True 
            add_verse = False
        # If Psalm 119, split up into 8 verse sections to preserve acrostic.
        elif verse_chapter == ps_119:
            if (verse-1) % 8 == 0:
                is_valid = True 
                add_verse = False
    # Otherwise check if we have reached the end of a paragraph. 
    elif verse_ending in passage.paragraph_markers.keys() \
    and len(passage.get_all_words()) >= passage_size:
        is_valid = True 

    return is_valid, add_verse

In [50]:
"""
Used by get_passages

Update all of the data of a passage instance once its end verse 
has been reached. 
"""
def update_passage_data(passage, rank_scale):
    # TODO to print nouns in red. 
    passage.word_ranks_data = {k:{'occ':0, 'words':set()} for k in rank_scale.keys()}
    passage.words = passage.get_all_words()

    passage.start_word = passage.words[0]
    passage.end_word = passage.words[-1]
    passage.word_count = len(passage.words)

    passage.weight1 = passage.get_passage_weight1(rank_scale)
    passage.weight2a = passage.get_passage_weight2(rank_scale)
    passage.weight2b = passage.get_passage_weight2(rank_scale, div_all=False)
    passage.weight3a = passage.get_passage_weight3(rank_scale)
    passage.weight3b = passage.get_passage_weight3(rank_scale, div_all=False)

    # Update the passage's word frequency and verb data.
    for word in passage.words:
        # Update the types and stems of verbs present. 
        # if F.sp.v(word) == 'verb':
        #     if F.vt.v(word) not in c.easy_vtypes:
        #         passage.verb_types_present.add(F.vt.v(word))
        #     if F.vt.v(word) not in c.easy_vstems:
        #         passage.verb_stems_present.add(F.vs.v(word))
        # Update the word_ranks_data dictionary with
        # the words in each category.
        for rank in rank_scale.keys():
            lex_freq = F.freq_lex.v(word)
            range = rank_scale[rank]['range']
            if lex_freq >= range[0] and lex_freq < range[1]:
                passage.word_ranks_data[rank]['occ'] += 1
                passage.word_ranks_data[rank]['words'].add(F.voc_lex_utf8.v(word))
                    
    # Update the passage's start and end reference.
    start_ref = T.sectionFromNode(passage.verses[0])
    end_ref = T.sectionFromNode(passage.verses[-1])
    passage.start_ref = f"{start_ref[0][:6]} {start_ref[1]}:{start_ref[2]}"
    passage.end_ref = f"{end_ref[0][:6]} {end_ref[1]}:{end_ref[2]}"

In [92]:
"""
Iterates over verses in the OT and combines them into passages. 
The function returns a list of Passage objects. 

rank_scale - a dictionary generated by Ranks().rank_scales()
    For example:
        rank_scales = Ranks().rank_scales(Ranks().all_ranks)[index]

start_node - the verse node at which get_passages will begin. 

end_node - the verse node at which get_passages will finish executing.

passage_size - the minimum words in a passage, unless a chapter is shorter
than that (e.g., Psalm 117).
"""
def get_passages(
    rank_scale, 
    start_node=0,
    end_node=len(F.otype.s('verse')), 
    passage_size=100
    ):

    # A list of all passages.
    passages = []

    # Initiate the id counter and instantiate the first passage.
    passage_id = 1
    passage = Passage(id=passage_id)

    # Iterate through all verses in the OT. 
    for verse in F.otype.s('verse')[start_node:end_node]:

        # Check if the string is a paragraph marker and if the paragraph is large enough.  
        if len(passage.verses) > 1:
            valid, add_verse = valid_passage(passage, verse, passage_size)
            if valid:

                # We have reached the end of the passage so we update all of its attribute values.
                if add_verse:
                    passage.verses.append(verse)
                update_passage_data(passage, rank_scale)
                passages.append(passage)
                # Begin a new passage. 
                passage_id += 1
                passage = Passage(id=passage_id)

                # The current verse is in a new chapter or book so we append it to the
                # verses of the newly created passage as its start verse. 
                if not add_verse:
                    passage.verses.append(verse)
            # We haven't reached a new passage yet, so add the current verse to its list. 
            else:
                passage.verses.append(verse)

        # Add the first verse to the passage. 
        else:
            passage.verses.append(verse)

    return passages

## Display and Export Passage Data

In [52]:
# A helper function to format printing output. 
# Takes a set of words as input and returns a string.
def format_output(output):
    # Sort the words by alphabetical order.
    output = sorted(list(output))
    formatted = ''
    # Add spacing between the words until the
    # last word is reached. 
    for item in output:
        if item != output[-1]:
            formatted += item + '  '
        else:
            formatted += item
    # Return a string of the formatted words. 
    return formatted

### As Xlsx

In [None]:
from collections import OrderedDict
import xlsxwriter

data = {"1":["xyz",""],"2":["abc","def"],"3":["zzz",""]}

# Use an OrderedDict to maintain the order of the columns
data = OrderedDict((k,data.get(k)) for k in sorted(data.keys()))

# Open an Excel workbook
workbook = xlsxwriter.Workbook('dict_to_excel.xlsx')

# Set up a format
book_format = workbook.add_format(properties={'bold': True, 'font_color': 'red'})

# Create a sheet
worksheet = workbook.add_worksheet('dict_data')

# Write the headers
for col_num, header in enumerate(data.keys()):
    worksheet.write(0,col_num, int(header))

# Save the data from the OrderedDict into the excel sheet
for row_num,row_data in enumerate(zip(*data.values())):
    for col_num, cell_data in enumerate(row_data):
        if cell_data ==  "xyz":
            worksheet.write(row_num+1, col_num, cell_data, book_format)
        else:
            worksheet.write(row_num+1, col_num, cell_data)

# Close the workbook
workbook.close()

### As CSV

In [100]:
# Output data to CSV
import csv

"""
passages - an instance of Passages()
_file - a file name to save to
lim - the number of rank columns to display
"""
def to_csv(passages, _file, lim):
    rank_scale = passages.rank_scale
    rr = list(rank_scale.keys())[-lim:] # rarest ranks
    with open(_file, mode='w', encoding='utf-8') as file:
        writer = csv.writer(file, delimiter=',')
        writer.writerow([
            passages.print_scale()
        ])
        headings = [
            'Rank',
            'Reference', 
            'Weight',
            'Words',
        ]
        for i in range(len(rr)):
            headings.append(
                f"{rr[i]} ({rank_scale[rr[i]]['range'][0]}-{rank_scale[rr[i]]['range'][1]})"
            )        
        writer.writerow(headings)
        for p in passages.weight_sorted1:
            row = [
                passages.weight_sorted1[p]+1,
                f"{p.start_ref} - {p.end_ref}", 
                p.weight1, 
                p.word_count,
            ]
            for i in range(len(rr)):
                row.append(
                    f"{p.word_ranks_data[rr[i]]['occ']}  {format_output(p.word_ranks_data[rr[i]]['words'])}",
                )
            writer.writerow(row)

### As DataFrame

In [None]:
import pandas as pd
from IPython.display import display, HTML

In [136]:
def weight_comparisons_df(passages, display_order, lim):
    rank_scale = passages.rank_scale.get_rank_dict()
    rr = list(rank_scale.keys())[-lim:] # rarest ranks
    df_cols = [
        'Reference',
        'Words',
        'Weight', 
        'R2a',
        'R2b',
        'R3a',
        'R3b',
    ]
    for i in range(len(rr)):
        df_cols.append(f"{rr[i]} ({rank_scale[rr[i]]['range'][0]}-{rank_scale[rr[i]]['range'][1]})")    
    row_list = []
    for p in display_order:
        row_dict = {}
        row = [
            f"{p.start_ref} - {p.end_ref}", 
            p.word_count, 
            p.weight1,
            # Go into the dict to get the ranking for that weight
            passages.weight_sorted2a[p]+1,
            passages.weight_sorted2b[p]+1,
            passages.weight_sorted3a[p]+1,
            passages.weight_sorted3b[p]+1,
        ]
        for i in range(len(rr)):
            row.append(f"{p.word_ranks_data[rr[i]]['occ']}  {format_output(p.word_ranks_data[rr[i]]['words'])}")
        for i in range(len(row)):
            row_dict[df_cols[i]] = row[i]
        row_list.append(row_dict)

    df = pd.DataFrame(row_list, columns=df_cols)
    return df

In [134]:
def custom_df(passages, display_order, weight, lim):
    rank_scale = passages.rank_scale.get_rank_dict()
    rr = list(rank_scale.keys())[-lim:] # rarest ranks
    df_cols = [
        'Rank',
        'Reference', 
        'Weight',
        'Words',
    ]
    for i in range(len(rr)):
        df_cols.append(f"{rr[i]} ({rank_scale[rr[i]]['range'][0]}-{rank_scale[rr[i]]['range'][1]})")    
    row_list = []
    for p in display_order:
        row_dict = {}
        index = eval(f"passages.{weight_dict[weight]}[p]+1")
        row = [
            index,
            f"{p.start_ref} - {p.end_ref}", 
            eval(f"p.{weight}"),
            p.word_count,
        ]
        for i in range(len(rr)):
            row.append(f"{p.word_ranks_data[rr[i]]['occ']}  {format_output(p.word_ranks_data[rr[i]]['words'])}")
        for i in range(len(row)):
            row_dict[df_cols[i]] = row[i]
        row_list.append(row_dict)

    df = pd.DataFrame(row_list, columns=df_cols)
    return df

In [107]:
weight_dict = {
    "weight1": "weight_sorted1",
    "weight2a": "weight_sorted2a",
    "weight2b": "weight_sorted2b",
    "weight3a": "weight_sorted3a",
    "weight3b": "weight_sorted3b",
}

In [131]:
import time

def all_ranks():
    start = time.time()
    rank_scales = Ranks().all_ranks[5]
    all_passage_rankings = []
    for r_s in [rank_scales]:
        rank_scale = r_s.get_rank_dict()
        all_p = Passages(rank_scale=r_s)
        all_p.order_sorted = get_passages(
            rank_scale, 
            # start_node=0,
            # end_node=100, 
            # passage_size=100
        )
        all_p.word_count_sorted = all_p.word_count_sort()
        all_p.weight_sorted1 = all_p.weight_sort1()
        all_p.weight_sorted2a = all_p.weight_sort2a()
        all_p.weight_sorted2b = all_p.weight_sort2b()
        all_p.weight_sorted3a = all_p.weight_sort3a()
        all_p.weight_sorted3b = all_p.weight_sort3b()
        all_passage_rankings.append(all_p)
        end = time.time()
        print(r_s.name, "complete", end-start)
    return all_passage_rankings

In [132]:
passages = all_ranks()

9_ranks complete 21.879775285720825


In [137]:
df = weight_comparisons_df(passages[0], passages[0].weight_sorted1, 5)
display(HTML(
    df.head(50).to_html(index=True))
)

Unnamed: 0,Reference,Words,Weight,R2a,R2b,R3a,R3b,Uncommon (50-100),Rare (30-50),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
0,2_Chro 34:29 - 2_Chro 34:33,157,0.7411,39,4,1,8,2 יֹאשִׁיָּהוּ עֵדוּת,1 קָטָן,0,1 עֹמֶד,0
1,Deuter 30:11 - Deuter 30:20,249,0.7723,40,19,2,48,9 אֹרֶךְ בְּרָכָה דבק נדח עֵבֶר פלא קָרֹוב רָחֹוק,3 ארך עוד קְלָלָה,0,0,0
2,Jeremi 26:7 - Jeremi 26:15,261,0.7764,17,21,3,267,3 חָדָשׁ מַדּוּעַ תפשׂ,4 חרב מַעֲלָל נָקִי קהל,0,1 שִׁלֹו,0
3,1_King 22:52 - 1_King 22:54,66,0.8121,197,1,5,2,3 אַחְאָב יְהֹושָׁפָט כעס,1 אֲחַזְיָהוּ,1 נְבָט,0,0
4,Exodus 14:26 - Exodus 14:31,152,0.8214,117,76,4,47,3 פָּרָשׁ שְׂמֹאל,0,0,3 אֵיתָן יַבָּשָׁה נער,0
5,Joshua 22:1 - Joshua 22:6,144,0.8302,137,31,13,15,4 אֲחֻזָּה דבק מִשְׁמֶרֶת עֵבֶר,0,0,2 גָּדִי רְאוּבֵנִי,0
6,Number 27:15 - Number 27:23,169,0.8417,20,27,9,323,3 אֶלְעָזָר,3 נוּן סמך,1 הֹוד,0,1 אוּר
7,2_Samu 19:39 - 2_Samu 19:43,138,0.8486,102,51,14,66,3 חרה מַדּוּעַ קָרֹוב,3 גִּלְגָּל גנב נשׁק,0,1 בַּרְזִלַּי,2 כִּמְהָם כִּמְהָן
8,2_Samu 19:6 - 2_Samu 19:9,148,0.8561,125,14,16,16,3 יבשׁ לין מלט,2 נְעוּרִים פִּלֶגֶשׁ,1 לוּ,0,0
9,Judges 21:23 - Judges 21:25,68,0.8596,364,18,19,4,0,1 גזל,0,1 חול,0


In [80]:
to_csv(passages, 'temp.csv', 5)

In [122]:
for w in weight_dict.keys():
    display_order = eval(f"passages.{weight_dict[w]}")
    df = custom_df(passages, display_order, w, 5)
    display(HTML(
        df.head(10).to_html(index=False))
    )

Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
1,2_Chro 34:29 - 2_Chro 34:33,0.7411,157,1 קָטָן,0,0,1 עֹמֶד,0
2,Deuter 30:11 - Deuter 30:20,0.7843,249,1 עוד,2 ארך קְלָלָה,0,0,0
3,Jeremi 26:7 - Jeremi 26:15,0.7879,261,2 מַעֲלָל נָקִי,2 חרב קהל,0,1 שִׁלֹו,0
4,Exodus 14:26 - Exodus 14:31,0.8214,152,0,0,0,3 אֵיתָן יַבָּשָׁה נער,0
5,1_King 22:52 - 1_King 22:54,0.8235,66,0,1 אֲחַזְיָהוּ,1 נְבָט,0,0
6,Joshua 22:1 - Joshua 22:6,0.8302,144,0,0,0,2 גָּדִי רְאוּבֵנִי,0
7,Number 27:15 - Number 27:23,0.8462,169,2 סמך,1 נוּן,1 הֹוד,0,1 אוּר
8,2_Samu 19:39 - 2_Samu 19:43,0.8594,138,2 גִּלְגָּל גנב,1 נשׁק,0,1 בַּרְזִלַּי,2 כִּמְהָם כִּמְהָן
9,2_King 8:1 - 2_King 8:6,0.8607,191,2 סָרִיס תְּבוּאָה,0,0,2 גֵּיחֲזִי,0
10,Jeremi 32:36 - Jeremi 32:41,0.8658,139,3 בֶּטַח דֶּבֶר יִרְאָה,0,2 קֶצֶף שׂושׂ,0,0


Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
1,Number 4:38 - Number 4:49,0.2722,198,2 מַשָּׂא,2 מְרָרִי,0,2 גֵּרְשֹׁון,0
2,1_King 13:11 - 1_King 13:32,0.3071,535,18 אַרְיֵה מרה נְבֵלָה,8 אֵי חבשׁ ספד,1 כחשׁ,1 אֵלָה,0
3,Genesi 5:18 - Genesi 5:27,0.3163,147,0,0,0,8 חֲנֹוךְ לֶמֶךְ,8 יֶרֶד מְתוּשֶׁלַח
4,Number 1:28 - Number 1:35,0.3191,128,4 זְבוּלֻן יִשָּׂשׂכָר,4 תֹּולֵדֹות,0,0,0
5,Number 9:15 - Number 9:23,0.3264,203,0,2 ארך,0,0,0
6,Number 1:36 - Number 1:43,0.3307,127,2 אָשֵׁר,4 תֹּולֵדֹות,0,0,0
7,Ezekie 33:1 - Ezekie 33:9,0.3536,196,0,4 צפה,8 זהר,0,0
8,Genesi 5:9 - Genesi 5:17,0.373,126,0,0,0,0,15 אֱנֹושׁ יֶרֶד מַהֲלַלְאֵל קֵינָן
9,Ezekie 39:25 - Ezekie 43:9,0.3739,2475,78 אֵילָם אָשָׁם אור בֶּטַח בדל הֵנָּה לִשְׁכָּה מַעֲלָה צֵלָע קֹומָה רחם,39 גָּבֹהַּ גָּג חַלֹּון חרד כְּלִמָּה כְּפִיר מְלֹא מעל עֻמָּה פְּנִימִי קנא,46 אַיִל חִיצֹון מַעַל מֹוצָא סַף פֶּגֶר רחב שְׁבוּת,71 גָּדֵר גָּזִית גָּלוּת גֹּבַהּ דָּרֹום זֹה כנס מְזוּזָה מִקְצֹועַ מַעֲלֶה מַרְאָה עַשְׁתֵּי עָשֹׂור פְּנִימָה פֵּשֶׁת פָּתִיל קָרֵב רבע תִּיכֹון תִּמֹרָה תַּחְתֹּון תָּא,50 אִיתֹון אַצִּיל אַתִּיק אטם אצל בִּנְיָה בִּנְיָן גְּדֶרֶת גִּזְרָה דוח הֲגִינָה זְנוּת חֹל טֹפַח כְּבָר מִבְנֶה מַהֲלָךְ מוּסָב מוּסָדָה עָב קָצוּר רִצְפָה שְׁפַתַּיִם שׁלשׁ שָׂחִיף
10,Number 29:17 - Number 29:25,0.3748,151,0,0,0,1 עַשְׁתֵּי,0


Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
1,1_King 22:52 - 1_King 22:54,1.3359,66,0,1 אֲחַזְיָהוּ,1 נְבָט,0,0
2,Jeremi 35:12 - Jeremi 35:16,1.3686,142,1 מַעֲלָל,0,0,2 רֵכָב,2 יְהֹונָדָב
3,2_Chro 34:29 - 2_Chro 34:33,1.3736,157,1 קָטָן,0,0,1 עֹמֶד,0
4,Number 4:38 - Number 4:49,1.3821,198,2 מַשָּׂא,2 מְרָרִי,0,2 גֵּרְשֹׁון,0
5,Psalms 134:1 - Psalms 134:3,1.3861,33,1 מַעֲלָה,0,0,0,0
6,2_King 8:1 - 2_King 8:6,1.4356,191,2 סָרִיס תְּבוּאָה,0,0,2 גֵּיחֲזִי,0
7,Number 1:36 - Number 1:43,1.4483,127,2 אָשֵׁר,4 תֹּולֵדֹות,0,0,0
8,Number 1:28 - Number 1:35,1.4589,128,4 זְבוּלֻן יִשָּׂשׂכָר,4 תֹּולֵדֹות,0,0,0
9,Exodus 24:12 - Exodus 24:18,1.4812,143,2 ירה לוּחַ,1 סִינַי,0,1 חוּר,0
10,2_Chro 21:4 - 2_Chro 21:11,1.4829,172,3 פשׁע,0,3 יְהֹורָם,1 לִבְנָה,1 נִיר


Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
1,2_Chro 34:29 - 2_Chro 34:33,0.7411,157,1 קָטָן,0,0,1 עֹמֶד,0
2,Deuter 30:11 - Deuter 30:20,0.7763,249,1 עוד,2 ארך קְלָלָה,0,0,0
3,Jeremi 26:7 - Jeremi 26:15,0.7879,261,2 מַעֲלָל נָקִי,2 חרב קהל,0,1 שִׁלֹו,0
4,Exodus 14:26 - Exodus 14:31,0.8082,152,0,0,0,3 אֵיתָן יַבָּשָׁה נער,0
5,2_King 8:1 - 2_King 8:6,0.8136,191,2 סָרִיס תְּבוּאָה,0,0,2 גֵּיחֲזִי,0
6,Joshua 8:1 - Joshua 8:17,0.8197,450,6 ארב בזז,13 עַי,3 יצת נתק,1 עָקֵב,1 מַאְרָב
7,1_King 22:52 - 1_King 22:54,0.8235,66,0,1 אֲחַזְיָהוּ,1 נְבָט,0,0
8,Number 27:15 - Number 27:23,0.8284,169,2 סמך,1 נוּן,1 הֹוד,0,1 אוּר
9,Joshua 22:1 - Joshua 22:6,0.8302,144,0,0,0,2 גָּדִי רְאוּבֵנִי,0
10,Judges 20:20 - Judges 20:29,0.8326,238,1 ארב,4 גִּבְעָה,3 פִּינְחָס צום שׁלף,0,0


Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
1,Psalms 134:1 - Psalms 134:3,1.6417,33,1 מַעֲלָה,0,0,0,0
2,1_King 22:52 - 1_King 22:54,1.6984,66,0,1 אֲחַזְיָהוּ,1 נְבָט,0,0
3,Malach 3:22 - Malach 3:24,1.7339,52,0,0,1 חֵרֶם,1 חֹרֵב,1 אֵלִיָּה
4,Judges 21:23 - Judges 21:25,1.8734,68,0,1 גזל,0,1 חול,0
5,Jeremi 32:36 - Jeremi 32:41,2.0058,139,3 בֶּטַח דֶּבֶר יִרְאָה,0,2 קֶצֶף שׂושׂ,0,0
6,1_Samu 24:17 - 1_Samu 24:23,2.091,137,0,2 גמל,1 מַמְלֶכֶת,1 מְצוּדָה,0
7,2_Chro 34:29 - 2_Chro 34:33,2.1155,157,1 קָטָן,0,0,1 עֹמֶד,0
8,Deuter 27:9 - Deuter 27:14,2.1313,114,4 אָשֵׁר זְבוּלֻן יִשָּׂשׂכָר שִׁמְעֹון,1 קְלָלָה,0,0,3 גְּרִזִים סכת עֵיבָל
9,Exodus 24:12 - Exodus 24:18,2.133,143,2 ירה לוּחַ,1 סִינַי,0,1 חוּר,0
10,2_Chro 21:4 - 2_Chro 21:11,2.1664,172,3 פשׁע,0,3 יְהֹורָם,1 לִבְנָה,1 נִיר


In [121]:
weight = "weight3a"
# display_order = eval(f"passages.{weight_dict[weight]}")
display_order = passages.word_count_sorted
df = custom_df(passages, display_order, weight, 5)
display(HTML(
    df.tail(30).to_html(index=False))
)
weight = "weight3b"
display_order = eval(f"passages.{weight_dict[weight]}")
df = custom_df(passages, display_order, weight, 5)
display(HTML(
    df.head(10).to_html(index=False))
)

Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
584,Genesi 2:1 - Genesi 3:15,1.238,803,19 ברא גַּן יצר עִמָּד צֵלָע,15 אֵי חבא מַאֲכָל נָחָשׁ עֵשֶׂב צמח תְּאֵנָה תֹּולֵדֹות,10 חמד כּוּשׁ נְשָׁמָה עֵזֶר פקח פרד תַּאֲוָה,16 ישׁן מטר נפח נשׁא עֵדֶן עֵירֹם עָלֶה עָקֵב עָרֹום עָרוּם פְּרָת שֹׁהַם,15 אֵד אֵיבָה בְּדֹלַח גִּיחֹון גָּחֹון חֲגֹורָה חֲוִילָה חִדֶּקֶל פִּישֹׁון קִדְמָה שׁוף שִׂיחַ תַּרְדֵּמָה תפר
380,Judges 19:1 - Judges 19:30,1.1458,804,13 אָן בֵּית לֶחֶם בלל הֵנָּה יְבוּסִי נָכְרִי רְחֹב רפה,19 גִּבְעָה חֲמִישִׁי חבשׁ חתן פִּלֶגֶשׁ רָמָה,7 בְּלִיַּעַל חָתָן יַרְכָּה נֹכַח סַף שַׁחַר,15 אַיִן יְמִינִי יאל מַחְסֹור נְבָלָה נֵתַח סעד עלל פַּת צֶמֶד תֶּבֶן,13 ארח דפק זנה יְבוּס מִסְפֹּוא מַאֲכֶלֶת מָהַהּ נתח עוץ ערב פצר רדד
772,Number 12:14 - Number 14:10,1.3376,813,13 אֶפֶס אָשֵׁר הֵנָּה זְבוּלֻן זוב חִתִּי חָלָב טַף יְבוּסִי יִשָּׂשׂכָר שִׁמְעֹון,12 חֲמָת כָּלֵב כלם מִבְצָר נוּן עֲמָלֵק רִמֹּון תְּאֵנָה,15 בַּז בָּצוּר לוּ מרד עַז תור,25 אֹודֹות בִּכּוּרִים הֹושֵׁעַ זַכּוּר יְפֻנֶּה יָלִיד לון מִיכָאֵל מִרְיָם עֲנָק עֵנָב פָּארָן צִן קָדֵשׁ רגם שָׁמֵן,38 אֲחִימַן אֶשְׁכֹּול אֶשְׁכֹּל גְּאוּאֵל גְּמַלִּי גַּדִּי גַּדִּיאֵל דִּבָּה הסה וָפְסִי זְמֹורָה חֲצֵרֹות חָגָב חֹרִי יִגְאָל ירק מָכִי מֹוט נְפִילִים נַחְבִּי סְתוּר סֹודִי סוּסִי עַמִּיאֵל פַּלְטִי צֹעַן רְחֹב רָזֶה רָפֶה רָפוּא שֵׁשַׁי שַׁמּוּעַ שָׁפָט תַּלְמַי
596,Genesi 7:1 - Genesi 8:14,1.2436,814,18 יחל נֹחַ צִפֹּור,21 גֶּשֶׁם גָּבֹהַּ זַיִת חַלֹּון חרב יֹונָה מִין מחה תְּהֹום,30 גבר גוע חסר מַעְיָן נְקֵבָה נְשָׁמָה עֲשִׂירִי תֵּבָה,19 חָם יֶפֶת כלא מִכְסֶה מַבּוּל מטר עָלֶה עֹרֵב רֶמֶשׂ רמשׂ שֵׁם שֶׁרֶץ שׁרץ,11 אֲרָרַט אֲרֻבָּה חָרָבָה חיל טָרָף יְקוּם מָנֹוחַ סכר שׁכך
703,Exodus 29:1 - Exodus 29:37,1.295,821,10 אֵפֹד בלל חגר נִיחֹחַ סמך,17 אֶצְבַּע זרק חִטָּה חבשׁ יְמָנִי כִּלְיָה כָּכָה נוף תְּנוּפָה,10 בשׁל חֹשֶׁן כֻּתֹּנֶת כהן מְעִיל מִשְׁחָה נֵזֶר נזה,28 בֹּהֶן חַלָּה חָזֶה יְסֹוד יֹתֶרֶת כְּהֻנָּה כָּבֵד מִלֻּאִים מִצְנֶפֶת מָנָה נֵתַח סַל שֹׁוק,14 אַבְנֵט אַלְיָה אפד חֵשֶׁב כְּרָעַיִם כִּפֻּרִים מִגְבָּעָה מָשְׁחָה נתח פֶּרֶשׁ רָקִיק תְּנוּךְ
654,2_King 8:25 - 2_King 9:28,1.2745,828,8 חגר מִגְדָּל מַכָּה מַעֲלָה מָתְנַיִם עצר קשׁר,24 אֲחַזְיָהוּ חֶדֶר יִזְרְעֶאל כֶּלֶב כרע מִרְמָה מֶרְכָּבָה נהג נקם צפה רָמָה,40 אִיזֶבֶל בַּעְשָׁא חֲזָאֵל חֶלְקָה חָתָן יְהֹורָם יֹורָם מַשָּׂא נְבָט נָבֹות רָמֹת,16 אֲחִיָּה אֲרַמִּי זְנוּנִים חכה יִזְרְעֵאלִי מְגִדֹּו עֲתַלְיָהוּ עָמְרִי פָּלִיט צֶמֶד קְבוּרָה שָׁלִישׁ שִׂיחַ,24 אֲחַזְיָה אֶמֶשׁ בִּדְקַר בֵּית הַגָּן גֶּרֶם חֵצִי יִבְלְעָם כֶּשֶׁף מִנְהָג מַעֲלֵה גוּר נִמְשִׁי פַּךְ רַכָּב שִׁגָּעֹון שִׁפְעָה שׁגע שׁתן
195,Leviti 16:1 - Leviti 16:34,1.0479,831,3 חגר סמך פשׁט,8 אֶצְבַּע טֻמְאָה מְלֹא,17 כַּפֹּרֶת כֻּתֹּנֶת כהן מַחְתָּה נזה פָּרֹכֶת,6 אֶזְרָח דַּק מִצְנֶפֶת סַם עָשֹׂור שַׁבָּתֹון,12 אַבְנֵט גְּזֵרָה גַּחַל חֹפֶן מִכְנָס עֲזָאזֵל עִתִּי פֶּרֶשׁ צנף
681,2_King 18:17 - 2_King 19:14,1.2872,834,13 אַיֵּה אָסָף אוּלַי דָּגָן חרשׁ כָּבֵד סָרִיס סמך קָטָן שַׂק,11 אֵי זַיִת חֲמָת חרף יְשַׁעְיָהוּ רַב תְּאֵנָה תִּירֹושׁ,8 יִצְהָר כּוּשׁ לָכִישׁ מְסִלָּה מרד פֶּחָה שְׁמוּעָה,30 אֶלְיָקִים אָמֹוץ בְּרֵכָה בִּלְעֲדֵי חִלְקִיָּה חִלְקִיָּהוּ חָרָן יֹואָח לִבְנָה מִשְׁעֶנֶת נקב נשׁא סות ערב רַב שָׁקֵה רצץ,27 אֲרָמִי אַרְפַּד בִּטָּחֹון גֹּוזָן גדף הֵנַע לֵדָה מַשְׁבֵּר נְאָצָה סְפַרְוַיִם עֶדֶן עַוָּה צֹאָה רֶצֶף שֶׁבְנָא שֶׁבְנָה תְּלַאשָּׂר תְּעָלָה תִּרְהָקָה תַּרְתָּן תֹּוכֵחָה
689,Leviti 8:1 - Leviti 8:36,1.2896,839,10 אֵפֹד חגר נִיחֹחַ סמך,19 אֶצְבַּע זרק חבשׁ יְמָנִי כִּלְיָה מוּל נוף קהל תְּנוּפָה,14 בשׁל חֹשֶׁן כִּיֹּור כֻּתֹּנֶת מְעִיל מִשְׁחָה נֵזֶר נזה,29 בֹּהֶן חַלָּה חָזֶה יְסֹוד יֹתֶרֶת כֵּן כָּבֵד מִלֻּאִים מִצְנֶפֶת מָנָה נֵתַח סַל צִיץ שֹׁוק,15 אַבְנֵט אַלְיָה אוּר אפד חֵשֶׁב כְּרָעַיִם מִגְבָּעָה נתח פֶּדֶר פֶּרֶשׁ רָקִיק תְּנוּךְ תֻּמִּים
212,Genesi 46:28 - Genesi 47:31,1.0543,878,13 אֹכֶל טַף ירה כָּבֵד נְעוּרִים נשׂג עֲבוּר עִמָּד צַוָּאר תְּבוּאָה,6 יָרֵךְ יהב כול כחד לקט,3 מִטָּה פרה שׁבר,16 גְּוִיָּה גֹּשֶׁן חֲמִישִׁית יצג מְגוּרִים מִרְעֶה נהל קְבוּרָה,11 אפס הֵא חֹמֶשׁ יָלַהּ ישׁם מִרְכֶּבֶת מֵיטָב רַעְמְסֵס שֶׁבֶר


Rank,Reference,Weight,Words,Rare (40-50),Rarer (30-40),Scarce (20-30),Scarcer (10-20),Scarcest (1-10)
1,Psalms 134:1 - Psalms 134:3,1.6417,33,1 מַעֲלָה,0,0,0,0
2,1_King 22:52 - 1_King 22:54,1.6984,66,0,1 אֲחַזְיָהוּ,1 נְבָט,0,0
3,Malach 3:22 - Malach 3:24,1.7339,52,0,0,1 חֵרֶם,1 חֹרֵב,1 אֵלִיָּה
4,Judges 21:23 - Judges 21:25,1.8734,68,0,1 גזל,0,1 חול,0
5,Jeremi 32:36 - Jeremi 32:41,2.0058,139,3 בֶּטַח דֶּבֶר יִרְאָה,0,2 קֶצֶף שׂושׂ,0,0
6,1_Samu 24:17 - 1_Samu 24:23,2.091,137,0,2 גמל,1 מַמְלֶכֶת,1 מְצוּדָה,0
7,2_Chro 34:29 - 2_Chro 34:33,2.1155,157,1 קָטָן,0,0,1 עֹמֶד,0
8,Deuter 27:9 - Deuter 27:14,2.1313,114,4 אָשֵׁר זְבוּלֻן יִשָּׂשׂכָר שִׁמְעֹון,1 קְלָלָה,0,0,3 גְּרִזִים סכת עֵיבָל
9,Exodus 24:12 - Exodus 24:18,2.133,143,2 ירה לוּחַ,1 סִינַי,0,1 חוּר,0
10,2_Chro 21:4 - 2_Chro 21:11,2.1664,172,3 פשׁע,0,3 יְהֹורָם,1 לִבְנָה,1 נִיר
