In [1]:
from datetime import datetime
import getpass
print('Notebook last updated by {} at {}'.format(getpass.getuser(), datetime.now().__str__()))

Notebook last updated by Cody at 2017-08-19 22:16:09.626930


# Instructions for viewing this notebook


To view this notebook as a slideshow: install the RISE extension for jupyter notebooks https://github.com/damianavila/RISE

   * `pip3 install RISE`
   * `jupyter-nbextension install rise --py --sys-prefix`
   * `jupyter-nbextension enable rise --py --sys-prefix`
   * restart the notebook, next to the 'open the command palette' button a slideshow function will appear

Run the six 'Notes' cells below before viewing this slideshow

Go to the next slide by pressing space

In [None]:
from IPython.display import Image, HTML, display, clear_output
import time, collections, pickle
from tf.fabric import Fabric
from pprint import pprint

In [None]:
# import custom css styles
custom_styles = str(open('codes/style.html', 'r').read())

# insert the styles into the notebook
display(HTML(custom_styles))

In [None]:
ETCBC = 'hebrew/etcbc4c'
PHONO = 'hebrew/phono'
TF = Fabric(modules=[ETCBC, PHONO], silent=False)

In [None]:
# Load ETCBC features

api = TF.load('''
    otype
    lex g_word_utf8 trailer trailer_utf8
    freq_lex gloss qere qere_trailer language
    book chapter verse label number
    nu ps gn vs vt prs ls lex g_cons
    function txt domain rela code gloss
    sp kind typ pdp mother g_cons_utf8 tab
''')

api.makeAvailableIn(globals())

In [None]:
# import custom presentation modules
# must go after TF load so as to bring in TF methods
from codes.present import present, animate_text, display_slots, obj_boundaries, display_objects, display_span

In [None]:
def open_tf_file_slots(file, extension='tf_files/'):
    slots_file = open(extension+file).read().split('\n')
    print('\n'.join(slots_file[5:12])) 
    
def node_numbers_and_their_objects(file, extension='tf_files/'):
    slots_file = open(extension+file).read().split('\n')
    print('\n'.join(slots_file[5:18]))
    
# Counting objects
def show_database_object_types():
    indent(reset=True)
    info('Counting objects in database ...')
    for otype in F.otype.all:
        counter = 0
        indent(level=1, reset=True)
        for node in F.otype.s(otype): 
            counter += 1
        info('{:>7} {}s'.format(counter, otype))
    indent(level=0)
    info('Done')
    
def demonstrate_features_phrase_function(file, extension='tf_files/'):
    slots_file = open(extension+file).read().split('\n') # clean this up
    print('\n'.join(slots_file[5:14])) # clean this up
    
def english_book_names():
    node_to_english = ''
    for b in F.otype.s('book'):
        node_to_english += '{} = {}\n'.format(b, T.bookName(b, lang='en'))
    print(node_to_english)
    
def show_text_formats():
    text = collections.defaultdict(list)
    indent(reset=True)
    info('writing plain text of whole Bible in all formats')
    for v in F.otype.s('verse'):
        words = L.d(v, 'word')
        for fmt in sorted(T.formats):
            text[fmt].append(T.text(words, fmt=fmt))
    info('done {} formats'.format(len(text)))
    for fmt in sorted(text):
        print('{}\n{}\n'.format(fmt, '\n'.join(text[fmt][0:1])))
        
        
def show_time_span():
    
    # open time span data for selected span
    with open('data/l_olam_wayyiqtol.pickle', 'rb') as infile:
        selected_span = pickle.load(infile)

    html_code = ''
        
    for div in display_span(selected_span):
        html_code += div
        
    display(HTML('<tabl style="text-align: right"><td><tr>{}</tr></td></table>'.format(html_code)))

partOfSpeech = collections.Counter()
freqLex = collections.Counter()

def count_partofspeech_lexemes():
    indent(level=0, reset=True)
    info('Starting tasks')
    indent(level=1, reset=True)
    info('Counting the words by part-of-speech ...')
    for w in F.otype.s('word'):
        partOfSpeech[F.sp.v(w)] += 1
    info('Done: {} categories'.format(len(partOfSpeech)))
    indent(level=2)
    info('\n'.join('{:<7}: {:>6}x'.format(*x) for x in sorted(
        partOfSpeech.items(),
        key=lambda x: (-x[1], x[0])
    )), tm=False)
    indent(level=1, reset=True)
    info('Listing the top 10 frequent words ...')
    for w in F.otype.s('word'):
        freqLex[F.lex.v(w)] += 1
    info('Done: {} lexemes'.format(len(freqLex)))
    indent(level=2)
    info('\n'.join('{:<7}: {:>6}x'.format(*x) for x in sorted(
        freqLex.items(),
        key=lambda x: (-x[1], x[0])
    )[0:10]), tm=False)
    indent(level=0)
    info('All tasks completed')
    
    
begin=\
'''
<br><br><br><br><br><br>

<div style="text-align: center">
    <span style="font-size: 20px; color: #bec2c6">[begin slides]</span>
<div>

<br><br><br><br><br><br>
'''

display(HTML(begin))

<div class="title_slide">

    <h1 class="custom">ETCBC's Text-Fabric</h1>
    <h2 class="custom" style="margin-top:0px">What, How, and Why</h2>
    <br>
    <table>
        <tr>
        <td>
            <img src="images/tf-small.png" style="width:250px; height:150px;">
        </td>
        <td>
        <img src="images/etcbc.png" style="width:315px; height:150;">
        </td>
        </tr>
    </table>
    
    <br>
    **Christiaan Erwich & Cody Kingham**
</div>

<div class="slide_with_text">

    <h1 class="slide_header_custom">Introduction
    <table>
        <tr>
        <td>
            <img src="images/tf-small.png" width="120" height="70">
        </td>
        </tr>
    </table>
    </h1>

    <hr width="50%">
           
    <!-- unordered list with no bullets, add numbers manually -->
    <ul class="blank" style="text-align: left">
    
    <br>
    
    <!-- "li" == list item -->
    <li style>1.&nbsp;&nbsp;&nbsp;&nbsp;What is Text-Fabric?</li>
        
        <br>
        
        <!-- embedded unordered list for sub levels --> 
        <ul class="blank">
            <li>1.1.&nbsp;&nbsp;&nbsp;&nbsp;predecessor: LAF</li>
            <br>
            <li>1.2.&nbsp;&nbsp;&nbsp;&nbsp;Text-Fabric</li>
            <br>
            <li>1.3.&nbsp;&nbsp;&nbsp;&nbsp;the Text-Fabric data model</li>
        </ul>
    </ul>
       
</div> 

<div class="slide_with_text">
    
    <h1 class="slide_header_custom">Introduction
    <table>
        <tr>
        <td>
            <img src="images/tf-small.png" width="120" height="70">
        </td>
        </tr>
    </table>
    </h1>
    
    <hr width="50%">
    
    <!-- unordered list with no bullets -->
    <ul class="blank">
    
    <br>
    
    <!-- "li" == list item -->
    <li>2.&nbsp;&nbsp;&nbsp;&nbsp;Using Text-Fabric</li>
        
        <br>
        
        <ul class="blank">
            <li>2.1.&nbsp;&nbsp;&nbsp;&nbsp;classes and features</li>
            
            <br>
            
            <li>2.2.&nbsp;&nbsp;&nbsp;&nbsp;participant patterns (Christiaan)</li>
            
            <br>
            
            <li>2.3.&nbsp;&nbsp;&nbsp;&nbsp;time spans (Cody)</li>
       </ul>
    
    </ul>
    
</div>

<div class="slide_with_text">
    
    <h1 class="slide_header_custom">Introduction
    <table>
        <tr>
        <td>
            <img src="images/tf-small.png" width="120" height="70">
        </td>
        </tr>
    </table>
    </h1>
    
    <hr width="50%">
    
    <br>
    
    <!-- unordered list with no bullets -->
    <ul class="blank">
    
    <!-- "li" == list item -->
    <li>3.&nbsp;&nbsp;&nbsp;&nbsp;The Future of Text-Fabric </li>
        
    <br>    
    
    <!-- "li" == list item -->
    <li>4.&nbsp;&nbsp;&nbsp;&nbsp;Questions and Discussion </li>
    
    </ul>
    
</div>

NOTES FOR PRESENTING


1.2 Basic information
* Columnar data processed with:
   * Python 3.x package
   * Easily processable with other languages

1.3 Data model
* Directed graph, slots, nodes&edges, otypes
* Contrast with Other Data Models
* Demonstrate otypes, etc. in format of general coutings (e.g. Dirk's tutorial)

2 How to do Text-Fabric? 
* Classes and Features
* Participant patterns (Christiaan)
* Time spans (Cody)

3 The future of Text-Fabric

<div class="slide_with_text">

    <h1 class="custom">1.1&nbsp;&nbsp;&nbsp;&nbsp;Predecessor</h1>
    
</div>

<div class="slide_with_text">
<span style="font-size: 200%">•</span>
<img class="custom_left" src="images/laf_logo.png" height="101" width="101">
<span style="font-size: 120%; font-weight: bold">LAF-Fabric, by Dirk Roorda (DANS)</span>
</div>

<div class="slide_with_text">

    <h1 class="slide_header_custom">LAF-Fabric (xml)</h1>
    <img src="images/laf-fabric-pro-cons.png">
    
</div>

<div class="slide_with_text">
        
    <img class="custom_left" src="images/laf_logo.png" height="201" width="201">

    <h2 class="custom">LAF-Fabric</h2>
    <br>
    
    <!--Unordered List-->
    <ul class="custom">
    
        <!-- "li" == list item -->
        <li>&nbsp;&nbsp;&nbsp;&nbsp;LAF convention of XML is bloated, hard to customize (<span>&#8722;</span>)</li>
        
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;Graph representation in nodes and edges (<span>&#43;</span>)</li>
        
        <br>
        
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;LAF-Fabric in Python 3.x (<span>&#43;</span>)</li>
        
        <br>
        
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;Separation of concerns (<span>&#43;</span>)</li>
        
        <br>
        
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;Requires a lot of memory—8gb! (<span>&#8764;</span>)</li>
        
    </ul>
</div>


<div class="slide_with_text">

    <h1 class="custom">1.2&nbsp;&nbsp;&nbsp;&nbsp;Text-Fabric</h1>
    
</div>

<div class="slide_with_text">
<span style="font-size: 200%">•</span>
<img class="custom_left" src="images/tf-small.png" height="101" width="101">
<span style="font-size: 120%; font-weight: bold">by Dirk Roorda (DANS)</span>
</div>

<div class="slide_with_text">

<img class="custom_left" src="images/tf-small.png" height="221" width="221">

<h2 class="custom">Text-Fabric</h2>

<br>

    <!--Unordered List-->
    <ul class="custom">
    
        <!-- "li" == list item -->
        <li>&nbsp;&nbsp;&nbsp;&nbsp;compact (<span>&#43;</span>)</li>
        <br>
        <li>&nbsp;&nbsp;&nbsp;&nbsp;transparent (<span>&#8722;</span>)</li>
        <br>
        <li>&nbsp;&nbsp;&nbsp;&nbsp;plain-text columns (<span>&#43;</span>)</li>
        <br>
        <li>&nbsp;&nbsp;&nbsp;&nbsp;separation of concerns (<span>&#43;</span>)</li>
        <br>
        <li>&nbsp;&nbsp;&nbsp;&nbsp;easy to add features to nodes (<span>&#43;</span>)</li>
    </ul>
</div>

<div class="slide_with_text">

    <h2 class="custom">1.3&nbsp;&nbsp;&nbsp;&nbsp;Text-Fabric Data Model</h2>
    
    <br>
    
    <ul class="custom">
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;Makeup of a Text</li>
        
        <ul class="custom">
            <li>&nbsp;&nbsp;&nbsp;&nbsp;words</li>
            <li>&nbsp;&nbsp;&nbsp;&nbsp;grammatical units made up of groups of words (e.g. phrases, clauses, sentences)</li><br>
        </ul>
                
        <li>&nbsp;&nbsp;&nbsp;&nbsp;Makeup of Text-Fabric</li>
        
        <ul class="custom">
            <li>&nbsp;&nbsp;&nbsp;&nbsp;words = "slots" (positions)</li>
            <li>&nbsp;&nbsp;&nbsp;&nbsp;grammatical units = "objects" (ranges of slots)</li>
        </ul>
        

    </ul>
</div>

In [None]:
slots = display_slots(('Genesis',1,1))

In [None]:
phrase_slots = display_objects(('Genesis', 1, 1), 'phrase', words=slots[1], progress=slots[0])

<h2 class="custom">similarly, clauses....</h2>

In [None]:
clause_slots = display_objects(('Genesis', 1, 1), 'clause', words=slots[1], progress=slots[0])\

<h2 class="custom">or sentences...</h2>

In [None]:
sentence_slots = display_objects(('Genesis', 1, 1), 'sentence', words=slots[1], progress=slots[0])

<div class="slide_with_text">

<h2 class="custom">TF Slot Ranges Stored in oslots.tf File</h2>

    <br>

    <!--Unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        <li>&nbsp;&nbsp;&nbsp;&nbsp;"object slots"</li>
             
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;First node number given, the rest are implied through the line number.</li>
        
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;Each line (i.e. node) has a range of slots.</li>    
   </ul>

<br>

<strong>
For example, the top of the oslots file (Genesis 1:1f):
</strong>
    
</div>

In [None]:
open_tf_file_slots('oslots.tf')

<div class="slide_with_text">

<h2 class="custom">How does TF Recognize a Node Number?</h2>

    <br>
    
    <!--Unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        <li><strong>otype.tf</strong> stores node ranges with their corresponding object type.</li>

        
    </ul>
</div>

In [None]:
node_numbers_and_their_objects('otype.tf') 

<div class="slide_title_main">

<h2 class="custom">Node Features are Stored in Files</h2>

    <br>

    <!--Unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        <li>First node number supplied, the rest are implied through the line number.</li>
        
        <br>
        
        <li>Feature string is given for each line/node.</li>
    </ul>
</div>

In [None]:
demonstrate_features_phrase_function('function.tf')

<div class="slide_with_text">

    <h2 class="custom">&nbsp;&nbsp;&nbsp;&nbsp;Graph Structure</h2>
    
    <br>
    
    <ul class="custom">
               
        <ul class="custom">
            <li>&nbsp;&nbsp;&nbsp;&nbsp;nodes - slots and objects</li><br> 
            <li>&nbsp;&nbsp;&nbsp;&nbsp;edges - relationships between slots/objects</li>
        </ul>

    </ul>
</div>

<div class="slide_with_text">

<h1 class="custom">2.&nbsp;&nbsp;&nbsp;&nbsp;Using Text-Fabric</h1>

    <br>

    <!--ordered List-->
    <ul class="blank" style="text-align: left">
    
        <!-- "li" == list item -->
        <li>2.1.&nbsp;&nbsp;&nbsp;&nbsp;classes and features</li>
        
        <br>
        
        <li>2.2.&nbsp;&nbsp;&nbsp;&nbsp;participant patterns in the Psalms (Christiaan)</li>
        
        <br>
        
        <li>2.3.&nbsp;&nbsp;&nbsp;&nbsp;tense and time spans (Cody)</li>
        
    </ul>
</div>

<div class="slide_with_text">

<h2 class="custom">2.1.&nbsp;&nbsp;&nbsp;&nbsp;Classes and Features</h2>

</div>

<div>
    <br>

    <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        <li>&nbsp;&nbsp;&nbsp;&nbsp;accessing object features</li>
        
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;clause type </li>
        
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;booknames in different languages</li>
        
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;text formats</li>
        
        <br>
        
         <li>&nbsp;&nbsp;&nbsp;&nbsp;complexer stuff: counting lexemes and part of speech </li>
    </ul>
</div>

<div class="slide_with_text">

<h2 class="custom">Accessing Object Features</h2>

<br>

<span style="font-size: 120%"><code>F.feature.v(node_number)</code></span>

</div>

In [None]:
# with word node number

F.g_cons_utf8.v(313893)

In [None]:
# with phrase node number

F.function.v(791236) 

In [None]:
# with clause node number

F.typ.v(579892) 

<div class="slide_with_text">

<span style="font-size: 120%"><code>T.bookName.v(node_number)</code></span>

</div>

In [None]:
english_book_names()

In [None]:
show_text_formats()

In [None]:
count_partofspeech_lexemes()

<h1 class="custom">2.2 Patterns of Participant Shifts</h1>



<img src="images/psalm75-colors.png">

<img src="images/asaf.png" height="" width="">

<div class="slide_with_text">

<h2 class="custom">Let's zoom in</h2>

<br>

</div>

<img src="images/asaf-zoom.png" height="" width="">

<div class="slide_with_text">

    <h1 class="slide_header_custom">Promising parallel patterns in BH poetry in <strong>whole verses</strong></h1>
    <img src="images/sbl-table-patterns-2.png">
    
</div>

<div class="slide_with_text">

<h2 class="custom">2.3.&nbsp;&nbsp;&nbsp;&nbsp;Tense and Time Spans</h2>

    <br>

    <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        <li>&nbsp;&nbsp;&nbsp;&nbsp;question of tense/aspect in Biblical Hebrew</li>
        
        <ul class="custom">
                <li>&nbsp;&nbsp;&nbsp;&nbsp;computational, textlinguistic approach with ETCBC and TF</li>
                <li>&nbsp;&nbsp;&nbsp;&nbsp;a question of semantics</li>
                <li>&nbsp;&nbsp;&nbsp;&nbsp;a question of discourse structure</li>
        </ul>
        
        <br>
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;verb tense is approached as a question of <strong>semantic co-occurrence</strong></li>
            <ul class="custom">
                <li>&nbsp;&nbsp;&nbsp;&nbsp;e.g. "father" might tend to occur alongside "son"</li>
                <li>&nbsp;&nbsp;&nbsp;&nbsp;also time markers alongside certain verbs?</li><br> 
            </ul>
            
            
        <li>&nbsp;&nbsp;&nbsp;&nbsp;verb tense is approached as a question of <strong>discourse context</strong></li>

            <ul class="custom">
                <li>&nbsp;&nbsp;&nbsp;&nbsp;Do mother clauses pass on the semantic content of a verb tense to their daughters?</li><br> 
            </ul>

    </ul>
</div>



<img src="images/yiqtol_vs_wayyiqtol.png" height="" width="">

<img src="images/until_forever.png" height="" width="">

In [None]:
show_time_span()

<div class="slide_with_text">

<h1 class="custom">3.&nbsp;&nbsp;&nbsp;&nbsp;The Future of Text-Fabric</h1>

    <br>

    <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        
        <li><strong>community of users</strong></li>
    </ul>
</div>

<div class="slide_with_text">
 
     <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <span style="font-size: 200%">•</span>
        <img class="custom_left" src="images/qBible.png" height="101" width="101">
        <span style="font-weight: bold">qBible: James Cuénod</span>

    </ul>
</div>

<div class="slide_with_text">
 
     <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <span style="font-size: 200%">•</span>
        <img class="custom_left" src="images/etcbc_remix.png" height="101" width="101">
        <span style="font-weight: bold">ETCBC-remix: Eliran Wong</span>

    </ul>
</div>

<div class="slide_with_text">
 
     <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <span style="font-size: 200%">•</span>
        <img class="custom_left" src="images/bible_ol.png" height="101" width="101">
        <span style="font-weight: bold">Bible Online Learner: Nicolai Winther-Nielsen</span>

    </ul>
</div>

<div class="slide_with_text">

<h1 class="custom">3.&nbsp;&nbsp;&nbsp;&nbsp;The Future of Text-Fabric</h1>

    <br>

    <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        
        <li><strong>future research</strong></li>

            <ul class="custom">
                    <li>&nbsp;&nbsp;&nbsp;&nbsp;using structure to interpret semantics</li>
                    <li>&nbsp;&nbsp;&nbsp;&nbsp;artificial intelligence</li>
                    <li>&nbsp;&nbsp;&nbsp;&nbsp;data analytics (e.g. Markov chains)</li>
            </ul>
    </ul>
</div>

It is important to the ETCBC that the community of TF users grows beyond those who are affiliated to the ETCBC. We show that TF provides a platform for researchers to produce the fruits of their work as data. It is our hope that the TF data becomes diverse, contributed to by many, and evolves over time through user-created modules. 

<div class="slide_with_text">

<h1 class="custom">&nbsp;&nbsp;&nbsp;&nbsp;The future of Text-Fabric</h1>

    <br>

    <!--unordered List-->
    <ul class="custom" style="text-align: left">
    
        <!-- "li" == list item -->
        
        <li>&nbsp;&nbsp;&nbsp;&nbsp;You?</li>
    </ul>
</div>

<div class="slide_with_text">

<h1 class="custom">4.&nbsp;&nbsp;&nbsp;&nbsp;Questions and Discussion</h1>

    <br>

</div>