In [31]:
import ipywidgets as widgets
import pandas as pd
from IPython.display import display
from IPython.display import display
from ipywidgets import HBox, VBox
from collections import OrderedDict
from fuzzywuzzy import process 
from fuzzywuzzy import fuzz
pd.options.display.html.table_schema = True
pd.options.display.max_rows = None
pd.options.display.max_columns = 10
pd.options.display.max_colwidth = 60
pd.options.display.width = 180
pd.options.display.expand_frame_repr = True
import google.cloud.automl_v1beta1 as automl_v1beta1
import textlib
from textlib import load_raw
from textlib import normalize_sent
from nltk.tokenize import sent_tokenize


ModuleNotFoundError: No module named 'textlib'

<h1>Index of Apps<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#The-H&amp;K-Lawyer-Notebook" data-toc-modified-id="The-H&amp;K-Lawyer-Notebook-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>The H&amp;K Lawyer Notebook</a></span><ul class="toc-item"><li><span><a href="#Classification-Tool" data-toc-modified-id="Classification-Tool-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Classification Tool</a></span></li><li><span><a href="#Bulk-Compare" data-toc-modified-id="Bulk-Compare-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Bulk Compare</a></span></li><li><span><a href="#Clustering-and-Text-Similarity-Models" data-toc-modified-id="Clustering-and-Text-Similarity-Models-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Clustering and Text Similarity Models</a></span></li><li><span><a href="#QuickCAN" data-toc-modified-id="QuickCAN-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>QuickCAN</a></span></li><li><span><a href="#QuickCAR" data-toc-modified-id="QuickCAR-1.5"><span class="toc-item-num">1.5&nbsp;&nbsp;</span>QuickCAR</a></span></li><li><span><a href="#Signature-Page-Generator" data-toc-modified-id="Signature-Page-Generator-1.6"><span class="toc-item-num">1.6&nbsp;&nbsp;</span>Signature Page Generator</a></span></li><li><span><a href="#Closing-Binder-Generator" data-toc-modified-id="Closing-Binder-Generator-1.7"><span class="toc-item-num">1.7&nbsp;&nbsp;</span>Closing Binder Generator</a></span></li></ul></li><li><span><a href="#Documentation-and-Help" data-toc-modified-id="Documentation-and-Help-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Documentation and Help</a></span></li></ul></div>

![alt text](https://s3.amazonaws.com/blaze4/HK+logo.png)
# The H&K Lawyer Notebook

A collection of simple, yet powerful tools for lawyers to do their jobs better and more efficiently. By leveraging data analytics, natural language processing, machine learning, network graphing and other open source solutions, users are able to produce better quality work faster and more efficiently. Many of these tools are still under active development, so the H&K Lawyer Notebook is reserved for our more intrepid lawyers.

***

## Classification Tool

Classify text using one of several trained machine learning models.

In [29]:

name = 'projects/ccapp-141701/locations/us-central1/models/TCN1260328743632064609'
prediction_client = automl_v1beta1.PredictionServiceClient.from_service_account_json(
    '/Users/josiasdewey/jupyter/notebooks/ccapp-141701-bf3fd9e2a4f7.json')
def automl_predict(model_full_id, snippet):
    payload = {'text_snippet': {
        'content': snippet,
        "mime_type": "text/plain"
            }
      }
    return prediction_client.predict(model_full_id, payload)
 
radio_buttons = widgets.RadioButtons(
    options=['finance', 'leasing', 'private equity', 'corporate M&A', 'bond financing'],
     value='finance',
    description='Select model:',
    disabled=False
)
display(radio_buttons)

clause_selection = widgets.SelectMultiple(
    options=['Termination', 'Commitment/Unused Fees', 'Change in Control', 'Voluntary Prepayment', 'Material Adverse Condition', 'Events of Default', 'Eurodollar Rate'],
    value=['Change in Control'],
    #rows=10,
    description='Select:',
    disabled=False
)
display(clause_selection)

file_path_text = widgets.Text(              
    value='credit.txt',
    placeholder='Type something',
    description='Filename:',
    disabled=False
)
display(file_path_text)

from IPython.display import display
button = widgets.Button(description="Submit")
button.style.button_color = 'lightblue'
display(button)

def on_button_clicked(b):
    current_raw_sent_list = []
    current_clean_sent_list = []
    sent_list = sent_tokenize(load_raw(file_path_text.value))
    for sent in sent_list:
        clean_sent = normalize_sent(sent)
        current_clean_sent_list.append(clean_sent)
        current_raw_sent_list.append(sent)
    df_current = pd.DataFrame(columns=['label', 'raw', 'clean'])
    df_current['clean'] = current_clean_sent_list
    df_current['raw'] = current_raw_sent_list
    for index, row in df_current.iterrows():
        if index <= 25:
            if len(row['clean']) < 25000:
                response = automl_predict(name, row['clean'])
                clause_type = response.payload[0].display_name
                row['label'] = clause_type
    display(df_current)
    
button.on_click(on_button_clicked)

RadioButtons(description='Select model:', options=('finance', 'leasing', 'private equity', 'corporate M&A', 'b…

SelectMultiple(description='Select:', index=(2,), options=('Termination', 'Commitment/Unused Fees', 'Change in…

Text(value='/Users/josiasdewey/jupyter/notebooks/credit.txt', description='Filename:', placeholder='Type somet…

Button(description='Submit', style=ButtonStyle(button_color='lightblue'))

***

## Bulk Compare
Use One2Many to compare a large number of documents against a common base document.  For example, compare lease agreements against lease form to determine deviations from approved form. Use Many2Many to compare a large number of items (the 'Query') against another large groups of items (the 'Search') and determine matches. For example, a client requests we confirm the destruction of files relating to several hundred matters, where the matter description for each must be cross checked against several thousand entries in our file storage list. This tool leverages fuzzy matching in order to identify matches undetected by pure booleen searches (e.g., "FIRST INTERNATIONAL" captured even if search query is "INTL FIRS")

In [1]:
button_compare = widgets.Button(description="Submit")
button_compare.style.button_color = 'lightblue'

@button_compare.on_click
def on_click(b):
    doc_1_raw_sent_list = []
    doc_1_clean_sent_list = []
    sent_list = sent_tokenize(load_raw(file_path_text_1.value))
    for sent in sent_list:
        clean_sent = normalize_sent(sent)
        doc_1_clean_sent_list.append(clean_sent)
        doc_1_raw_sent_list.append(sent)
    df_doc_1 = pd.DataFrame(columns=['raw', 'clean'])
    df_doc_1['clean'] = doc_1_clean_sent_list
    df_doc_1['raw'] = doc_1_raw_sent_list
    doc_2_raw_sent_list = []
    doc_2_clean_sent_list = []
    sent_list = sent_tokenize(load_raw(file_path_text_2.value))
    for sent in sent_list:
        clean_sent = normalize_sent(sent)
        doc_2_clean_sent_list.append(clean_sent)
        doc_2_raw_sent_list.append(sent)
    df_doc_2 = pd.DataFrame(columns=['raw', 'clean'])
    df_doc_2['clean'] = doc_2_clean_sent_list
    df_doc_2['raw'] = doc_2_raw_sent_list
    df_results = pd.DataFrame(columns=['query', 'match', 'strength'])
    query = df_doc_1['clean']
    choices = df_doc_2['clean']
    query_list = []
    found_list = []
    strength_list = []
    for q in query:
        match = process.extractOne(q, choices=choices, scorer=fuzz.token_sort_ratio, score_cutoff=0)
        if match != None:
            print(match)
            index = match[2]
            if match[1] >= compare_slider.value:
                found = match[0]
                strength = match[1]
            else:
                found = 'None'
                strength = 'N/A'
            query_list.append(q)
            found_list.append(choices[index])
            strength_list.append(strength)
    df_results['query'] = query_list
    df_results['found']= found_list
    df_results['strength'] = strength_list
    display(df_results)

file_path_text_1 = widgets.Text(
    value='Stock Purchase Agreement_original.txt',
    placeholder='Type something',
    description='Query:',
    disabled=False
)

file_path_text_2 = widgets.Text(
    value='Stock Purchase Agreement_new_deal.txt',
    placeholder='Type something',
    description='Search:',
    disabled=False
)

s = widgets.Text(
    value='filename and path',
    placeholder='Type something',
    description='Base:',
    disabled=False
)

t = widgets.Text(
    value='filename and path',
    placeholder='Type something',
    description='Compare:',
    disabled=False
)

compare_slider = widgets.FloatSlider(
    value=0,
    base=100,
    min=0, # max exponent of base
    max=100, # min exponent of base
    step=0.2, # exponent step
    description='Similarity:'
)


tab1 = VBox(children=[HBox(children=[file_path_text_1, file_path_text_2, compare_slider])])
tab2 = VBox(children=[HBox(children=[s, t])])
tab = widgets.Tab(children=[tab1, tab2])
tab.set_title(0, 'Many2Many')
tab.set_title(1, 'One2Many')
VBox(children=[tab, button_compare])

NameError: name 'widgets' is not defined

***

## Clustering and Text Similarity Models
*Cluster text into similar topics.  Find similar text within several documents.*

***

## QuickCAN
Computer generated revisions to contracts based on precedent clause database.  Machine learning algorithms and fuzzy matching are used to incorporate revisions made to similar text in precedent clauses.

In [21]:
x = widgets.RadioButtons(
    options=['finance', 'leasing', 'private equity', 'corporate M&A', 'bond financing'],
     value='finance',
    description='Type:',
    disabled=False
)
display(x)

IntSlider = widgets.SelectionSlider(
    options=['Aggressive', 'Favorable', 'Neutral', 'Must Haves'],
    value='Aggressive',
    description='Set Intensity:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

display(IntSlider)

y = widgets.Text(
    value='filename and path',
    placeholder='Type something',
    description='Filename:',
    disabled=False
)

display(y)

from IPython.display import display
button = widgets.Button(description="Submit")
button.style.button_color = 'lightblue'
display(button)

def on_button_clicked(df_current, df_precedent):
    print('')
    #run_match(df_current, df_precedent)


button.on_click(on_button_clicked(y.value, y.value))

RadioButtons(description='Type:', options=('finance', 'leasing', 'private equity', 'corporate M&A', 'bond fina…

SelectionSlider(continuous_update=False, description='Set Intensity:', options=('Aggressive', 'Favorable', 'Ne…

Text(value='filename and path', description='Filename:', placeholder='Type something')

Button(description='Submit', style=ButtonStyle(button_color='lightblue'))




## QuickCAR

Computer generated responses to requests for production and interrogetories. Machine learning algorithms and fuzzy matching are used to incorporate historical responses to similar requests.  Will automatically generate a first draft of a response.

## Signature Page Generator
***
Generate signature page packets with the press of a button.

## Closing Binder Generator
***
Automate the preparation of closing binders.

In [23]:
@button.on_click
def on_click(b):
    pass

lender_widget = widgets.RadioButtons(
    options=['Wells Fargo', 'Bank of America (Private Wealth)', 'Bank of America', 'JPMorgan Chase', 'Ocean Bank', 'Other'],
    value='Ocean Bank',
    description='Lender:',
    disabled=False
)

toggle = widgets.RadioButtons(
    options=['Middle Market', 'Construction Loan', 'Bank Loan', 'Loan Modification', 'ABL Loan', 'Art Loan'],
    value='Bank Loan',
    description='Loan Type:',
    disabled=False
)

title_textbox = widgets.Text(
    value='RM',
    description='Title:',
)

tab1 = VBox(children=[HBox(children=[lender_widget, toggle])])
tab2 = VBox(children=[HBox(children=[title_textbox,])])
tab = widgets.Tab(children=[tab1, tab2])
tab.set_title(0, 'Loan Details')
tab.set_title(1, 'Contacts')
VBox(children=[tab, button])

VBox(children=(Tab(children=(VBox(children=(HBox(children=(RadioButtons(description='Lender:', index=4, option…