In [1]:
import collections

from Bio import pairwise2
from Bio.Seq import Seq
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd

from tf.app import use
A = use('etcbc/bhsa', hoist=globals())
Fmt, Tmt, Lmt = F, T, L

B = use('dt-ucph/sp', version='3.4', hoist=globals())
Fsp, Tsp, Lsp = F, T, L

C = use('etcbc/dss', version='1.9', hoist=globals())
Fdss, Tdss, Ldss = F, T, L

del F, T, L

**Locating corpus resources ...**

Name,# of nodes,# slots / node,% coverage
book,39,10938.21,100
chapter,929,459.19,100
lex,9230,46.22,100
verse,23213,18.38,100
half_verse,45179,9.44,100
sentence,63717,6.7,100
sentence_atom,64514,6.61,100
clause,88131,4.84,100
clause_atom,90704,4.7,100
phrase,253203,1.68,100


**Locating corpus resources ...**

Name,# of nodes,# slots / node,% coverage
book,5,79878.4,100
chapter,187,2135.79,100
verse,5841,68.38,100
word,114890,3.48,100
sign,399392,1.0,100


**Locating corpus resources ...**

Name,# of nodes,# slots / node,% coverage
scroll,1001,1428.81,100
lex,10450,129.14,94
fragment,11182,127.91,100
line,52895,27.04,100
clause,125,12.85,0
cluster,101099,6.68,47
phrase,315,5.1,0
word,500995,2.81,99
sign,1430241,1.0,100


In [2]:
file = '../data/hiphil_triliteral.csv'

dat = pd.read_csv(file, sep='\t')
has_hireq = [(1 if 'I' in Fmt.g_lex.v(w) else 0) if scr == 'MT' else '-' for w, scr in zip(dat.tf_id, dat.scroll)]


In [3]:
PENTATEUCH_BOOKS = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']
ALL_BOOK_NAMES = [Tmt.sectionFromNode(bo)[0] for bo in Fmt.otype.s('book')]

In [4]:
class Book:
    def __init__(self, manuscript, book_name, F, T, L):
        self.manuscript = manuscript
        self.book_name = book_name
        self.F = F
        self.T = T
        self.L = L
        if self.manuscript in {'MT', 'SP'}:
            self.verse_g_cons, self.word2char = self.prepare_book_data()
        else:
            self.verse_g_cons, self.word2char = self.prepare_dss_book_data()
        self.verse_text_dict = self.make_verse_text()
        
    def prepare_book_data(self):
        verse_g_cons = collections.defaultdict(list)
        word2char = collections.defaultdict(list)
    
        for book_node in eval(self.F + ".otype.s('book')"):
            book_name = eval(self.T + '.sectionFromNode(book_node)[0]')
            if book_name != self.book_name:
                continue
            words = eval(self.L + ".d(book_node, 'word')")
            for w in words:
                bo, ch, ve = eval(self.T + '.sectionFromNode(w)')
                g_cons = eval(self.F + '.g_cons.v(w)')
                trailer = eval(self.F + '.trailer.v(w)')
                if trailer:
                    trailer = ' '
                verse_g_cons[(bo, ch, ve)].append(g_cons + trailer)
                for char in g_cons:
                    word2char[(bo, ch, ve)].append(w)
        return verse_g_cons, word2char
    
    def prepare_dss_book_data(self):
        verse_g_cons = collections.defaultdict(list)
        word2char = collections.defaultdict(list)
        
        for scr in eval(self.F + ".otype.s('scroll')"):
            if eval(self.T + '.scrollName(scr)') == self.manuscript:
                words = eval(self.L + ".d(scr, 'word')")
                for w in words:
                    bo = eval(self.F + '.book_etcbc.v(w)')
                    if bo != self.book_name:
                        continue            
                    ch = eval(self.F + '.chapter.v(w)')
                    ve = eval(self.F + '.verse.v(w)')
                
                    g_cons = eval(self.F + '.g_cons.v(w)')
                    after = eval(self.F + '.after.v(w)')

                    if after is None:
                        after = ''
                    if g_cons:
                        verse_g_cons[(bo, int(ch), int(ve))].append(g_cons + after)
                        for char in g_cons:
                            word2char[(bo, int(ch), int(ve))].append(w)
        return verse_g_cons, word2char
    
    def make_verse_text(self):
        return {section : ''.join(g_conss).strip() for (section, g_conss) in self.verse_g_cons.items()}

In [5]:
class AllBooks:
    def __init__(self):
        self.data = {}
        

In [6]:
def align_verses(str_1, str_2):
        
    seq1 = Seq(str_1)
    seq2 = Seq(str_2)
    
    alignments = pairwise2.align.globalxx(seq1, seq2)
    
    seq1_al = (alignments[0][0]).strip(' ')
    seq2_al = (alignments[0][1]).strip(' ')
        
    return seq1_al, seq2_al

In [7]:
def make_alignments(verse_text1, verse_text2):
    alignments_dict = {}

    for section, text1 in verse_text1.items():
        try:
            text2 = verse_text2[section]
            alignment1, alignment2 = align_verses(text1, text2)
            alignments_dict[section] = (alignment1, alignment2)
        except:
            continue
    return alignments_dict

In [8]:
def collect_matching_words(alignments_dict, word2char1, word2char2):
    man1_man2_dict = collections.defaultdict(list)

    for section, (al1, al2) in alignments_dict.items():
        man1_idx = 0
        man2_idx = 0
    
        word_chars1 = word2char1[section]
        word_chars2 = word2char2[section]
        for char1, char2 in zip(al1, al2):
            if char1 not in {' ', '-'}:
                man1_word = word_chars1[man1_idx]
                man1_idx += 1
            
            if char2 not in {' ', '-'}:
                man2_word = word_chars2[man2_idx]
                man2_idx += 1
            
            if char1 not in {' ', '-'} and char2 not in {' ', '-'}:
                man1_man2_dict[man1_word].append(man2_word)
                
    return man1_man2_dict

In [9]:
def most_frequent(List):
    return max(set(List), key = List.count)

# Prepare MT and SP texts

Produce dictionary mt_sp_matches which has mt words nodes as keys and matching word numbers from SP as values.

In [10]:
# prepare mt and sp books
MANUSCRIPTS = ['MT', 'SP']
all_books = AllBooks()
for book_name in ALL_BOOK_NAMES:
    book = Book('MT', book_name, 'Fmt', 'Tmt', 'Lmt')
    all_books.data[('MT', book_name)] = book
    
    if book_name in PENTATEUCH_BOOKS:
        book = Book('SP', book_name, 'Fsp', 'Tsp', 'Lsp')
        all_books.data[('SP', book_name)] = book

# Match words

In [11]:
#dat = pd.read_csv('../data/hiphil_triliteral.csv', sep='\t')
dat_dss = dat[~dat.scroll.isin(['MT', 'SP'])]
scroll_book_combinations = list(set(zip(dat_dss.scroll, dat_dss.book)))
dat_dss.shape

(650, 36)

In [12]:
dat.head()

Unnamed: 0,tf_id,scroll,book,chapter,verse,lex,g_cons,stem,pattern,pattern_g_cons,...,column,has_prs,has_prefix,has_hloc,has_nme,rec_signs_stem,cor_signs_stem,type,vowel_letter,has_vowel_letter
0,90,MT,Genesis,1,6,BDL[,MBDJL,BDJL,CCMC,CCCMC,...,-,0,0,0,0,nnnn,nnnn,last,J,1
1,182,MT,Genesis,1,11,DC>[,TDC>,DC>,CCM,CCCM,...,-,0,0,0,0,nnn,nnn,last,,0
2,187,MT,Genesis,1,11,ZR<[,MZRJ<,ZRJ<,CCMC,CCCMC,...,-,0,0,0,0,nnnn,nnnn,last,J,1
3,210,MT,Genesis,1,12,ZR<[,MZRJ<,ZRJ<,CCMC,CCCMC,...,-,0,0,0,0,nnnn,nnnn,last,J,1
4,246,MT,Genesis,1,14,BDL[,HBDJL,BDJL,CCMC,CCCMC,...,-,0,1,0,0,nnnn,nnnn,last,J,1


In [13]:
for manuscript, book_name in scroll_book_combinations:
    book = Book(manuscript, book_name, 'Fdss', 'Tdss', 'Ldss')
    all_books.data[(manuscript, book_name)] = book

In [14]:
def make_matching_word_dict(book_name, all_books):

    all_match_dicts = {}

    matching_book = book_name
    matching_manuscripts = [scr for scr, bo in all_books.data.keys() if bo == matching_book]

    for man1 in matching_manuscripts:
        for man2 in matching_manuscripts:
            if man1 != man2:
            
                man1_verse_texts = all_books.data[(man1, matching_book)].verse_text_dict
                man2_verse_texts = all_books.data[(man2, matching_book)].verse_text_dict

                alignments_dict = make_alignments(man1_verse_texts, man2_verse_texts)
    
                man1_word2char = all_books.data[(man1, matching_book)].word2char
                man2_word2char = all_books.data[(man2, matching_book)].word2char
 
                matching_words_dict = collect_matching_words(alignments_dict, man1_word2char, man2_word2char)

                matches = {}
                for man1_word, man2_list in matching_words_dict.items():
                    man2_word = most_frequent(man2_list)
                    matches[man1_word] = man2_word
                all_match_dicts[((man1, matching_book), (man2, matching_book))] = matches
                
    return all_match_dicts, matching_manuscripts

In [15]:
def read_dataset(file):

    return pd.read_csv(file, sep='\t')

In [16]:
class MaterData:
    def __init__(self, man1, man2, section, lex, mater_val1, mater_val2, tf_id1, tf_id2, g_cons1, g_cons2):
        self.man1 = man1
        self.man2 = man2
        self.section = section
        self.lex = lex
        self.mater_val1 = mater_val1
        self.mater_val2 = mater_val2
        self.tf_id1 = tf_id1
        self.tf_id2 = tf_id2
        self.g_cons1 = g_cons1
        self.g_cons2 = g_cons2

In [17]:
def collect_matching_cases(matching_manuscripts, matching_book, dat):

    all_mater_datasets = {}

    for man in matching_manuscripts:
        mater_data = dat[(dat.book == matching_book) & (dat.scroll == man)]
        all_mater_datasets[(man, matching_book)] = mater_data
        
    return all_mater_datasets

In [18]:
def collect_mater_data(matching_manuscripts, all_match_dicts, all_mater_datasets, matching_book):

    manuscript_mater_match = collections.defaultdict(list)
    manuscripts = set()

    for idx, man in enumerate(matching_manuscripts):
        for idx2, man2 in enumerate(matching_manuscripts):
            if idx < idx2:
            
                matching_ids = all_match_dicts[((man, matching_book), (man2, matching_book))]
                man_data = all_mater_datasets[(man, matching_book)]
                man2_data = all_mater_datasets[(man2, matching_book)]
                for _, row in man_data.iterrows():
                
                    tf_id = row.tf_id
                    lex, typ, has_vl = row.lex, row.type, row.has_vowel_letter
                    g_cons1 = row.g_cons
                    section = (row.book, row.chapter, row.verse)
                    if man == 'SP':
                        tf_id = tf_id - 100000

                    matching_tf_id = matching_ids.get(tf_id, None)
                
                    if not matching_tf_id:
                        continue
                    
                    if man2 == 'SP':
                        matching_tf_id = matching_tf_id + 100000
                
                    man2_row = man2_data[(man2_data.tf_id == matching_tf_id) & (man2_data.lex == lex) & (man2_data.type == typ)]
                    
                    if not man2_row.shape[0]:
                        continue
                    has_vl2 = man2_row.has_vowel_letter.iloc[0]
                    g_cons2 = man2_row.g_cons.iloc[0]
                 
                    mater_data = MaterData(man, man2, section, lex, has_vl, has_vl2, tf_id, matching_tf_id, g_cons1, g_cons2)
                
                    manuscript_mater_match[man].append(mater_data)
                    manuscripts.add(man)
                    manuscripts.add(man2)
                    
    return manuscript_mater_match, manuscripts

In [19]:
def register_similarities_with_mt(manuscripts, mt_ids, manuscript_mater_match):

    mater_value_dict = {0: -1,
                    1: 1}

    mater_match_array = np.zeros((len(manuscripts), len(mt_ids)))

    for dat_object in manuscript_mater_match['MT']:
        print(dat_object.g_cons1, dat_object.g_cons2)
        
        other_man = dat_object.man2
        mt_tfid = dat_object.tf_id1
        other_man_tfid = dat_object.tf_id2
    
        mt_mater = dat_object.mater_val1
        other_man_mater = dat_object.mater_val2
    
        mt_idx = man2idx['MT']
        other_man_idx = man2idx[other_man]
    
        mt_mater_value = mater_value_dict[mt_mater]
        other_man_mater_value = mater_value_dict[other_man_mater]
    
        mt_tf_id = mt_tf2idx[mt_tfid]
    
        mater_match_array[mt_idx, mt_tf_id] = mt_mater_value
        mater_match_array[other_man_idx, mt_tf_id] = other_man_mater_value
        
    return mater_match_array

In [20]:
def get_parallels(manuscript_mater_match, hif_match_dict):
    for scroll in manuscript_mater_match.keys():
        for dat_object in manuscript_mater_match[scroll]:
            man1 = dat_object.man1
            man2 = dat_object.man2
            if man1 == 'MT' or man2 == 'MT':
            
                hif_match_dict[dat_object.tf_id2] = dat_object.tf_id1
                hif_match_dict[dat_object.tf_id1] = dat_object.tf_id2
        
    return hif_match_dict


In [21]:
def count_parallel_cases(mater_match_array):

    mater_arr = np.zeros((2, mater_match_array.shape[1]))

    for col_idx in range(mater_match_array.shape[1]):
        col = mater_match_array[:, col_idx]
        col_counts = collections.Counter(col)
        with_vowel_count = col_counts.get(1, 0)
        without_vowel_count = col_counts.get(-1, 0)
        mater_arr[0, col_idx] = with_vowel_count
        mater_arr[1, col_idx] = without_vowel_count
        
    return mater_arr

In [22]:
file_name = '../data/hiphil_triliteral.csv'
all_bib_books = [Tmt.sectionFromNode(bo)[0] for bo in Fmt.otype.s('book')]
dat = read_dataset(file_name)

hif_match_dict = {}

for book in all_bib_books:

    print(book)
    all_match_dicts, matching_manuscripts = make_matching_word_dict(book, all_books)
    all_mater_datasets = collect_matching_cases(matching_manuscripts, book, dat)
    manuscript_mater_match, manuscripts = collect_mater_data(matching_manuscripts, all_match_dicts, all_mater_datasets, book)
    
    hif_match_dict = get_parallels(manuscript_mater_match, hif_match_dict)
    print(len(hif_match_dict))
    

Genesis
13
Exodus
37
Leviticus
133
Numbers
143
Deuteronomy
213
Joshua
219
Judges
219
1_Samuel
227
2_Samuel
241
1_Kings
241
2_Kings
241
Isaiah
835
Jeremiah
851
Ezekiel
851
Hosea
853
Joel
855
Amos
865
Obadiah
875
Jonah
886
Micah
904
Nahum
908
Habakkuk
910
Zephaniah
914
Haggai
914
Zechariah
916
Malachi
918
Psalms
1043
Job
1049
Proverbs
1057
Ruth
1059
Song_of_songs
1063
Ecclesiastes
1063
Lamentations
1067
Esther
1067
Daniel
1084
Ezra
1084
Nehemiah
1084
1_Chronicles
1084
2_Chronicles
1084


In [23]:
dat = pd.read_csv('../data/hiphil_triliteral.csv', sep='\t')
has_hireq = [(1 if 'I' in Fmt.g_lex.v(w) else 0) if scr == 'MT' else '-' for w, scr in zip(dat.tf_id, dat.scroll)]

dat_mt = dat[dat.scroll == 'MT']
has_hireq_mt = [val for val in has_hireq if val != '-']
dat_mt['has_hireq'] = has_hireq_mt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dat_mt['has_hireq'] = has_hireq_mt


In [24]:
matching_mt_ids = [hif_match_dict.get(dss_id, '-') for dss_id in dat_dss.tf_id]

In [25]:
has_hireq_dss = []

for matching_mt_id in matching_mt_ids:
    if matching_mt_id != '-':
        has_hireq_dss.append(dat_mt[dat_mt.tf_id == matching_mt_id].has_hireq.iloc[0])
    else:
        has_hireq_dss.append('-')
        
dat_dss['has_hireq'] = has_hireq_dss 
dat_dss['mt_match'] = matching_mt_ids

# SAVE HIREQ DATA

In [26]:
dat_total_hireq = pd.concat([dat_mt, dat_dss])
dat_total_hireq.to_csv('../data/hiphil_triliteral_with_hireq.csv', sep='\t', index=False)

In [45]:
dat_dss_hir = dat_dss[dat_dss.has_hireq != '-']
dat_dss_hir

Unnamed: 0,tf_id,scroll,book,chapter,verse,lex,g_cons,stem,pattern,pattern_g_cons,...,has_prefix,has_hloc,has_nme,rec_signs_stem,cor_signs_stem,type,vowel_letter,has_vowel_letter,has_hireq,mt_match
2491,1890850,1Q3,Leviticus,22,3,QDC[,JQDJCW,QDJC,,,...,0,0,0,nnnn,nnnn,last,J,1,1,65403
2492,1893020,1Q5,Deuteronomy,29,20,BDL[,HBDJLW,BDJL,,,...,1,0,0,nnnn,nnnn,last,J,1,1,109959
2493,1893145,1Q5,Deuteronomy,31,3,CMD[,JCMJD,CMJD,,,...,0,0,0,nnnn,nnnn,last,J,1,1,110713
2494,1893499,1Q5,Deuteronomy,32,26,CBT[,>CBJT,CBJT,,,...,0,0,0,rnnn,nnnn,last,J,1,1,111792
2495,1894883,1Qisaa,Isaiah,1,2,>ZN[,H>ZJNJ,>ZJN,,,...,1,0,0,nnnn,nnnn,last,J,1,1,212094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3136,2104434,Mas1b,Leviticus,11,7,PRS[,MPRJS,PRJS,,,...,0,0,0,nnnn,nnnn,last,J,1,1,57952
3137,2106782,Mas1e,Psalms,81,15,KN<[,>KNJ<,KNJ<,,,...,0,0,0,nnnn,nnnn,last,J,1,1,324738
3138,2106808,Mas1e,Psalms,81,17,FB<[,>FBJ<K,FBJ<,,,...,0,0,0,nnnn,nnnn,last,J,1,1,324762
3139,2106840,Mas1e,Psalms,82,3,YDQ[,HYDJQW,YDJQ,,,...,0,0,0,nnnn,nnnn,last,J,1,1,324791


In [46]:
pd.crosstab(dat_dss_hir.has_vowel_letter, dat_dss_hir.has_hireq)

has_hireq,0,1
has_vowel_letter,Unnamed: 1_level_1,Unnamed: 2_level_1
0,19,53
1,3,527


In [47]:
pd.crosstab(dat_mt.has_vowel_letter, dat_mt.has_hireq)

has_hireq,0,1
has_vowel_letter,Unnamed: 1_level_1,Unnamed: 2_level_1
0,122,184
1,9,2176


In [48]:
mt_tsere = dat_mt[dat_mt.has_hireq == 0]
mt_hireq = dat_mt[dat_mt.has_hireq == 1]

dss_tsere = dat_dss[dat_dss.has_hireq == 0]
dss_hireq = dat_dss[dat_dss.has_hireq == 1]

In [49]:
pd.crosstab(mt_hireq.vt, mt_hireq.has_vowel_letter)

has_vowel_letter,0,1
vt,Unnamed: 1_level_1,Unnamed: 2_level_1
impf,93,763
impv,3,115
infc,24,328
perf,21,659
ptca,43,311


In [50]:
pd.crosstab(dss_hireq.vt, dss_hireq.has_vowel_letter)

has_vowel_letter,0,1
vt,Unnamed: 1_level_1,Unnamed: 2_level_1
impf,31,223
impv,2,50
infc,8,48
perf,5,124
ptca,7,82


In [51]:
pd.crosstab(mt_tsere.vt, mt_tsere.has_vowel_letter)

has_vowel_letter,0,1
vt,Unnamed: 1_level_1,Unnamed: 2_level_1
impf,84,6
impv,2,0
infc,6,1
perf,4,2
ptca,26,0


In [52]:
pd.crosstab(dss_tsere.vt, dss_tsere.has_vowel_letter)

has_vowel_letter,0,1
vt,Unnamed: 1_level_1,Unnamed: 2_level_1
impf,16,2
impv,0,1
ptca,3,0


In [53]:
dss_mt_match_dict = {dss_id:mt_id for dss_id, mt_id in zip(dat_dss.tf_id, dat_dss.mt_match) if mt_id != '-'}

In [54]:
mt_dss_match_dict = collections.defaultdict(list)
for dss_id, mt_id in dss_mt_match_dict.items():
    mt_dss_match_dict[mt_id].append(dss_id)

In [55]:
mt_hireq.pattern.isnull().sum()

0

In [56]:
# Explore parallel hireq cases

In [60]:
mt_nodes = list(mt_hireq.tf_id)

for mt_node in mt_nodes:
    dss_nodes = mt_dss_match_dict.get(mt_node, [])
    dss_df = dss_hireq[dss_hireq.tf_id.isin(dss_nodes)]
    if dss_df.shape[0] > 0:
        mt_row = mt_hireq[mt_hireq.tf_id == mt_node]
        mt_mater = mt_row.has_vowel_letter.iloc[0]
        mt_vt, bo, ch, ve, lex, g_cons = mt_row.vt.iloc[0], mt_row.book.iloc[0], mt_row.chapter.iloc[0], mt_row.verse.iloc[0], mt_row.lex.iloc[0], mt_row.g_cons.iloc[0]
        dss_mater_count = collections.Counter(dss_df.has_vowel_letter)
        scrolls = list(dss_df.scroll)
        #if mt_mater == 1:
        #    continue
        print(scrolls, bo, ch, ve, lex, g_cons)
        print(mt_mater, mt_vt)
        print(dss_mater_count)
        print()

['4Q2'] Genesis 1 11 ZR<[ MZRJ<
1 ptca
Counter({1: 1})

['4Q2', '4Q10'] Genesis 1 14 BDL[ HBDJL
1 infc
Counter({1: 2})

['4Q1'] Genesis 39 23 YLX[ MYLJX
1 ptca
Counter({1: 1})

['4Q3'] Genesis 41 9 ZKR[ MZKJR
1 ptca
Counter({1: 1})

['4Q13'] Exodus 2 3 YPN[ HYPJNW
1 infc
Counter({1: 1})

['4Q13'] Exodus 2 9 HLK[ HJLJKJ
1 impv
Counter({0: 1})

['4Q14'] Exodus 8 5 KRT[ HKRJT
1 infc
Counter({1: 1})

['4Q11'] Exodus 16 24 B>C[ HB>JC
1 perf
Counter({1: 1})

['4Q22'] Exodus 18 9 NYL[ HYJLW
1 perf
Counter({1: 1})

['4Q22'] Exodus 18 10 NYL[ HYJL
1 perf
Counter({1: 1})

['4Q14'] Exodus 18 10 NYL[ HYJL
1 perf
Counter({1: 1})

['4Q22'] Exodus 21 6 NGC[ HGJCW
1 perf
Counter({1: 1})

['4Q22'] Exodus 21 6 NGC[ HGJCW
1 perf
Counter({1: 1})

['4Q22'] Exodus 21 27 NPL[ JPJL
1 impf
Counter({1: 1})

['4Q11'] Exodus 23 7 YDQ[ >YDJQ
1 impf
Counter({1: 1})

['4Q11'] Exodus 23 13 ZKR[ TZKJRW
1 impf
Counter({1: 1})

['4Q25'] Leviticus 1 3 QRB[ JQRJBNW
1 impf
Counter({1: 1})

['4Q24'] Leviticus 1 13 QRB[ HQRJ

['1Qisaa'] Isaiah 34 1 QCB[ HQCJBW
1 impv
Counter({1: 1})

['1Qisaa'] Isaiah 34 2 XRM[ HXRJMM
1 perf
Counter({1: 1})

['1Qisaa'] Isaiah 34 14 RG<[ HRGJ<H
1 perf
Counter({1: 1})

['1Qisaa'] Isaiah 34 17 NPL[ HPJL
1 perf
Counter({1: 1})

['1Qisaa'] Isaiah 35 10 NFG[ JFJGW
1 impf
Counter({1: 1})

['1Qisaa'] Isaiah 36 3 ZKR[ MZKJR
1 ptca
Counter({1: 1})

['1Qisaa'] Isaiah 36 10 CXT[ HCXJTH
1 infc
Counter({1: 1})

['1Qisaa'] Isaiah 36 14 NC>=[ JC>
0 impf
Counter({0: 1})

['1Qisaa'] Isaiah 36 14 NYL[ HYJL
1 infc
Counter({1: 1})

['1Qisaa'] Isaiah 36 15 NYL[ JYJLNW
1 impf
Counter({1: 1})

['1Qisaa'] Isaiah 36 18 NYL[ JYJLNW
1 impf
Counter({1: 1})

['1Qisaa'] Isaiah 36 18 NYL[ HYJLW
1 perf
Counter({1: 1})

['1Qisaa'] Isaiah 36 19 NYL[ HYJLW
1 perf
Counter({1: 1})

['1Qisaa'] Isaiah 36 20 NYL[ HYJLW
1 perf
Counter({1: 1})

['1Qisaa'] Isaiah 36 20 NYL[ JYJL
1 impf
Counter({1: 1})

['1Qisaa'] Isaiah 36 22 ZKR[ MZKJR
1 ptca
Counter({1: 1})

['1Qisaa', '1Q8'] Isaiah 37 11 XRM[ HXRJMM
1 infc
Counter

['4Q84'] Psalms 94 9 NBV[ JBJV
1 impf
Counter({1: 1})

['4Q84'] Psalms 94 13 CQV[ HCQJV
1 infc
Counter({1: 1})

['4Q84'] Psalms 102 20 CQP[ HCQJP
1 perf
Counter({1: 1})

['4Q84'] Psalms 102 27 XLP[ TXLJPM
1 impf
Counter({1: 1})

['4Q84'] Psalms 103 5 FB<[ MFBJ<
1 ptca
Counter({1: 1})

['4Q84'] Psalms 103 12 RXQ[ HRXJQ
1 perf
Counter({1: 1})

['4Q87'] Psalms 105 40 FB<[ JFBJ<M
1 impf
Counter({1: 1})

['4Q87'] Psalms 109 13 KRT[ HKRJT
1 infc
Counter({1: 1})

['4Q90', '11Q5'] Psalms 119 15 NBV[ >BJVH
1 impf
Counter({1: 2})

['4Q90', '11Q5'] Psalms 119 18 NBV[ >BJVH
1 impf
Counter({1: 2})

['4Q87'] Psalms 125 5 HLK[ JWLJKM
1 impf
Counter({1: 1})

['4Q87', '11Q5'] Psalms 126 2 GDL[ HGDJL
1 perf
Counter({1: 2})

['4Q87'] Psalms 126 3 GDL[ HGDJL
1 perf
Counter({1: 1})

['11Q5'] Psalms 132 15 FB<[ >FBJ<
1 impf
Counter({1: 1})

['11Q5'] Psalms 132 17 YMX[ >YMJX
1 impf
Counter({1: 1})

['11Q5'] Psalms 132 18 LBC[ >LBJC
1 impf
Counter({1: 1})

['11Q5'] Psalms 135 17 >ZN[ J>ZJNW
1 impf
Counter({1:

In [None]:
# Explore tsere cases

In [159]:
mt_nodes = list(mt_tsere.tf_id)

for mt_node in mt_nodes:
    dss_nodes = mt_dss_match_dict.get(mt_node, [])
    dss_df = dss_tsere[dss_tsere.tf_id.isin(dss_nodes)]
    if dss_df.shape[0] > 0:
        mt_row = mt_tsere[mt_tsere.tf_id == mt_node]
        mt_mater = mt_row.has_vowel_letter.iloc[0]
        mt_vt, bo, ch, ve = mt_row.vt.iloc[0], mt_row.book.iloc[0], mt_row.chapter.iloc[0], mt_row.verse.iloc[0]
        dss_mater_count = collections.Counter(dss_df.has_vowel_letter)
        scrolls = list(dss_df.scroll)
        #if mt_mater == 0:
        #    continue
        print(scrolls, bo, ch, ve)
        print(mt_mater, mt_vt)
        print(dss_mater_count)
        print()

['4Q2'] Genesis 1 11
0 impf
Counter({0: 1})

['4Q9'] Genesis 41 34
0 impf
Counter({0: 1})

['2Q5'] Leviticus 11 26
0 ptca
Counter({0: 1})

['1Qisaa'] Isaiah 27 5
0 impf
Counter({0: 1})

['1Qisaa'] Isaiah 27 6
0 impf
Counter({1: 1})

['1Qisaa'] Isaiah 32 9
0 impv
Counter({1: 1})

['1Qisaa'] Isaiah 36 15
0 impf
Counter({0: 1})

['1Qisaa'] Isaiah 42 6
0 impf
Counter({1: 1})

['1Qisaa', '1Q8'] Isaiah 51 10
0 ptca
Counter({0: 2})

['Mur88'] Obadiah 1 14
0 impf
Counter({0: 1})

['Mur88'] Micah 3 4
0 impf
Counter({0: 1})

['4Q85'] Psalms 27 14
0 impf
Counter({0: 1})

['4Q83'] Psalms 69 18
0 impf
Counter({0: 1})

['11Q5'] Psalms 119 19
0 impf
Counter({0: 1})

['1Q10', '4Q89', '11Q5'] Psalms 119 43
0 impf
Counter({0: 3})

['11Q5'] Psalms 119 133
0 impf
Counter({0: 1})

['11Q5'] Psalms 143 7
0 impf
Counter({0: 1})

['4Q99'] Job 37 5
0 impf
Counter({0: 1})

['4Q103'] Proverbs 15 25
0 impf
Counter({0: 1})

