In [None]:
import operator

from lxml import etree
import textblob

In [None]:
BOOK_OF_BIBLE = "Genesis"
BIBLE_TRANSLATION = "./data/NASB.xmm"

book_string = str()

In [None]:
root = etree.parse(BIBLE_TRANSLATION)

In [None]:
book_xml = root.xpath("//b[@n='{}']".format(BOOK_OF_BIBLE))

In [None]:
for element in book_xml[0].iter():
    if (element.tag == 'b'):  # print the book title
        print("{}\n".format(element.values()[0]))
    elif (element.tag == 'c'):  # print each chapter
        print("Chapter {}".format(element.values()[0]))
    else:  # record each verse
        book_string += " {}".format(element.text)

In [None]:
def _get_noun_phrases(noun_phrases, n=10):
    """Get the most common, n noun phrases."""
    noun_phrase_count = dict()

    for noun_phrase in noun_phrases:
        if noun_phrase_count.get(noun_phrase):
            noun_phrase_count[noun_phrase] += 1
        else:
            noun_phrase_count[noun_phrase] = 1

    # sort the noun_phrases by occurence (in descending order)
    sorted_noun_phrase_count = sorted(noun_phrase_count.items(), key=operator.itemgetter(1), reverse=True)

    return sorted_noun_phrase_count[:n]


def full_report(bible_blob):
    """Output a report for the given text blob."""
    bible_blob.sentiment

    sorted_noun_phrases = _get_noun_phrases(bible_blob.noun_phrases)

    print("# NLP Analysis for {} (using the {} translation)\n".format(BOOK_OF_BIBLE.capitalize(), BIBLE_TRANSLATION.split("/")[-1].split(".")[0]))

    print("## Sentiment\n")
    print("Polarity†: {}\n".format(round(t.sentiment.polarity, 5)))
    print("Subjectivity‡: {}".format(round(t.sentiment.subjectivity, 5)))

    print("\n")

    print("## Most Common Noun Phrases in {}:\n".format(BOOK_OF_BIBLE.capitalize()))
    for noun_phrase in sorted_noun_phrases:
        print(" * {}\t-  {}".format(noun_phrase[1], noun_phrase[0]))

    print("\n")

    print("† Polarity is measured on a scale of [-1.0, 1.0] and measures whether that language used by the author is negative, nuetral, or positive.\n")
    print("‡ Subjectivity is measured on a scale of [0.0, 1.0] and measures how subjective the text is (0.0 being very objective; 1.0 being very subjective).")

In [None]:
t = textblob.TextBlob(book_string)
full_report(t)