In [39]:
from docx import Document
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")


In [40]:
doc = Document("test_documents/93A_demand_letter_sample.docx")

for pnum, para in enumerate(doc.paragraphs):
    for rnum, run in enumerate(para.runs):
        if "date" in run.text.lower():
            run.text = run.text.replace("date", "DATE")
        print((pnum, rnum), run.text, run.style.name)

(0, 0) Your name Default Paragraph Font
(1, 0) Your address Default Paragraph Font
(2, 0) Your telephone  Default Paragraph Font
(2, 1) number Default Paragraph Font
(3, 0) Date Default Paragraph Font
(4, 0) Name of Merchant Default Paragraph Font
(5, 0) Merchant's address Default Paragraph Font
(7, 0) Dear Merchant: Default Paragraph Font
(9, 0) Under the provisions of Massachusetts General Laws, Chapter 93A, Section 9, I hereby make written Default Paragraph Font
(10, 0) demand for relief as outlined in that statute. Default Paragraph Font
(12, 0) On or about  Default Paragraph Font
(12, 1) {DATE} Default Paragraph Font
(12, 2) , the following unfair or deceptive act occurred: Default Paragraph Font
(12, 3)   Default Paragraph Font
(13, 0) {EXPLAIN WHAT HAPPENED} Default Paragraph Font
(15, 0) This unfair or deceptive act or practice is, in my opinion, declared unlawful by Section 2 of Chapter 93A, Default Paragraph Font
(16, 0) (you may want to give regulation number, if applicable)

In [41]:
role_description = """
You are the author of a Docassemble guided interview to complete a form. You will use the following rules
to label a DOCX template for use with the Assembly Line framework for Docassemble. You will use the Jinja2
system to add variable names, like {{ variable_name }}. You can also add conditional text with {%p if variable_name %}
CONDITIONAL PARAGRAPH TEXT
{%p endif %}.

Where possible, you will use an existing variable name from the list. If not appropriate, invent a new name
that follows the same pattern.

The input will be a JSON formatted list which looks like this:
[
    [0, 0, "This is the first run in the first paragraph"]
]

Your reply will only contain a JSON formatted list which will contain a tuple of:
    1. The paragraph number
    2. The run number
    3. The new text with Jinja markup
    4. The start and ending position of the text within the run that should be replaced with the new text
    5. A flag indicating that the new text should be inserted as a new paragraph
"""

rules = """
Rules:

    General Naming Convention:
        These will be valid Python variable names. People are always represented as a list of
        DAIndividuals, even if there is only one person.
        Use lowercase letters for variable names and separate words with an _.
        Variable names should be descriptive yet short, without using abbreviations.

    Special Characters:
        Use . to separate an object from its attribute.
        Use [] to reference a list of items, with indexing starting at 0.

    Special Endings:
        Suffix _date for date fields (optional).
        Suffix _amount or _value for currency fields (optional).

Variable Names, Prefixes, and Suffixes:

    People:
        users (for the person benefiting from the form)
        other_parties (for the opposing party)
        plaintiffs
        defendants
        petitioners
        respondents
        children
        spouses
        parents
        caregivers
        attorneys
        translators
        debt_collectors
        creditors
        witnesses
        guardians_ad_litem
        guardians
        decedents
        interested_parties

    Name Forms:
        users[0].name.full() (Full name)
        users[0].name.first (First name only)
        users[0].name.middle (Middle name only)
        users[0].name.middle_initial() (First letter of middle name)
        users[0].name.last (Last name only)
        users[0].name.suffix (Suffix of user's name only)

    Demographic Data:
        users[0].birthdate (Birthdate)
        users[0].age_in_years() (Calculated age based on birthdate)
        users[0].gender (Gender)
        users[0].gender_female (User is female, for checkbox field)
        users[0].gender_male (User is male, for checkbox field)
        users[0].gender_other (User is not male or female, for checkbox field)
        users[0].gender_nonbinary (User identifies as nonbinary, for checkbox field)
        users[0].gender_undisclosed (User chose not to disclose gender, for checkbox field)
        users[0].gender_self_described (User chose to self-describe gender, for checkbox field)
        user_needs_interpreter (User needs an interpreter, for checkbox field)
        user_preferred_language (User's preferred language)

    Addresses:
        users[0].address.block() (Full address, on multiple lines)
        users[0].address.on_one_line() (Full address on one line)
        users[0].address.line_one() (Line one of the address, including unit or apartment number)
        users[0].address.line_two() (Line two of the address, usually city, state, and Zip/postal code)
        users[0].address.address (Street address)
        users[0].address.unit (Apartment, unit, or suite)
        users[0].address.city (City or town)
        users[0].address.state (State, province, or sub-locality)
        users[0].address.zip (Zip or postal code)
        users[0].address.county (County or parish)
        users[0].address.country (Country)

    Other Contact Information:
        users[0].phone_number (Phone number)
        users[0].mobile_number (A phone number explicitly labeled as the "mobile" number)
        users[0].phone_numbers() (A list of both mobile and other phone numbers)
        users[0].email (Email)

    Signatures:
        users[0].signature (Signature)
        signature_date (Date the form is completed)

    Information about Court and Court Processes:
        trial_court (Court's full name)
        trial_court.address.county (County where court is located)
        trial_court.division (Division of court)
        trial_court.department (Department of court)
        docket_number (Case or docket number)
        docket_numbers (A comma-separated list of docket numbers)
"""

In [42]:
doc = Document("test_documents/93A_demand_letter_sample.docx")

items = []
for pnum, para in enumerate(doc.paragraphs):
    for rnum, run in enumerate(para.runs):
        items.append([pnum, rnum, run.text])

In [47]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
token_count = len(encoding.encode(role_description + rules + repr(items)))

response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
    {
        "role": "system",
        "content": role_description + rules
    },
    {
      "role": "user",
      "content": repr(items)
    }
  ],
  temperature=1,
  max_tokens=3900 - token_count,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)

print(response)

{
  "id": "chatcmpl-89LoYaXoO2SbvHVVsTIFCXJaXoRVS",
  "object": "chat.completion",
  "created": 1697239498,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "[[0, 0, 'users[0].name.full()'], \n[1, 0, 'users[0].address.block()'], \n[2, 0, 'users[0].phone_number'], \n[2, 1, '\" number\"'], \n[3, 0, 'signature_date'], \n[4, 0, '\"Name of Merchant\"'], \n[5, 0, '\"Merchant\\'s address\"'], \n[7, 0, '\"Dear Merchant:\"'], \n[9, 0, '\"Under the provisions of Massachusetts General Laws, Chapter 93A, Section 9, I hereby make written\"'], \n[10, 0, '\"demand for relief as outlined in that statute.\"'], \n[12, 0, '\"On or about \"'], \n[12, 1, '{date}'], \n[12, 2, '\", the following unfair or deceptive act occurred:\"'], \n[12, 3, '\" \"'], \n[13, 0, '\"{EXPLAIN WHAT HAPPENED}\"'], \n[15, 0, '\"This unfair or deceptive act or practice is, in my opinion, declared unlawful by Section 2 of Chapter 93A,\"'], \n

In [48]:
import json
json.loads(response.choices[0]["message"]["content"])

JSONDecodeError: Expecting value: line 1 column 9 (char 8)