In [1]:
# load environment variables from .env file
from dotenv import load_dotenv
load_dotenv(override=True)

True

# Objects

In [2]:
from backend.database.embedding import MockEmbedding, OpenAIEmbedding
from backend.database.vector_db import VectorDB, EmbedData, Field, EmbedField
from legal_db import get_dialogs, get_legal_text, get_meeting_summary, to_timestamp, Dialog, Meeting, LawArticle, CaseRecord
import json

## Dialog DB creation (for use case)

In [3]:
case_id = 'JUSTICIUS-MARCO'
meeting_timestamps = ['2023-11-13', '2023-11-27']

for meeting_timestamp in meeting_timestamps:
    dialogs_db = VectorDB(f"Dialog_{case_id}_{meeting_timestamp}", Dialog)
    dialogs = get_dialogs(case_id, meeting_timestamp)
    dialogs_db.reset() 
    dialogs_db.add(dialogs, list(i for i in range(len(dialogs))))


# Querying after DB creation

In [None]:
def format_query_result(query_result):
    return "\n...\n".join(f"{d.speaker}: {d.content}" for d in query_result if d.speaker == "Justicius")
    

## Meeting Summary Generation

In [4]:
case_id = 'JUSTICIUS-MARCO'
meeting_timestamp = '2023-11-27'

def format_dialogs(dialogs: list[Dialog]):
    return "\n".join([f"{d.speaker}: {d.content}" for d in dialogs])

formatted_dialog = format_dialogs(get_dialogs(case_id, meeting_timestamp))

In [16]:
from gpt_wrapper.assistants import ChatGPT
from gpt_wrapper.messages import SimpleHistory, msg

# Step 1: Summarize
summarizer = ChatGPT(messages=SimpleHistory(), model="gpt-4-1106-preview")
summary = await summarizer(f"""
The following is a full trasncript of a legal meeting between the lawyer Justicius and his client Marco.
Please summarize for the client Marco, the 3 most important points of the meeting to be reminded about.
You MUST address Marco directly in your summary! Start the summary with "In your last meeting..." and end by 1 sentence asking if he has any questions about the meeting.
Use markdown formatting and bullet points.

[Trascript]:
{formatted_dialog}""".strip(), temperature=0.5)

# Step 2: Generate a title
titlizer = ChatGPT(messages=SimpleHistory(), model="gpt-4-1106-preview")
title = await titlizer(f"Please generate a short title (<10 words) for the given meeting summary:\n\n{summary}", temperature=0.5)

[User]: The following is a full trasncript of a legal meeting between the lawyer Justicius and his client Marco.
Please summarize for the client Marco, the 3 most important points of the meeting to be reminded about.
You MUST address Marco directly in your summary! Start the summary with "In your last meeting..." and end by 1 sentence asking if he has any questions about the meeting.
Use markdown formatting and bullet points.

[Trascript]:
Justicius: Good day, Marco! I trust everything has been going smoothly since our last discussion.
Marco: Hello, Justicius! Yes, everything's been fine. I've had some time to review the documents we discussed, and I'm ready for our next steps.
Justicius: Excellent. I'm glad to hear that. Let's delve into the details. I've thoroughly reviewed the documents you provided, including your marriage certificate and financial statements. It's essential to have a comprehensive understanding of your situation.
Marco: I appreciate your thoroughness, Justicius. I

# Meeting DB creation

In [5]:
case_id = 'JUSTICIUS-MARCO'
meeting_timestamps = ['2023-11-13', '2023-11-27']

meeting_db = VectorDB(f"Meetings_{case_id}", Meeting)
meeting_db.reset() # reset if exists
for meeting_timestamp in meeting_timestamps:
    meeting = get_meeting_summary(case_id, meeting_timestamp)
    meeting_db.add(meeting) 

# Legal DB creation

In [8]:
books = ['BGB', 'ZPO', 'FamFG']
for book in books:
    law_articles = get_legal_text(book)
    law_db = VectorDB(f"{book}", LawArticle)
    law_db.reset() # reset if exists
    if book == 'BGB':
        batch_size = 200
        for i in range(0, len(law_articles), batch_size):
            law_db.add(law_articles[i:i+batch_size])
    else:
        law_db.add(law_articles)    

BGB: 2477 articles


# AI Assistant

In [1]:
from gpt_wrapper.tools import Tools, Toolkit, ToolList, function_tool

In [None]:
class CaseDBToolkit(Toolkit):
    def __init__(self, case_id: str):
        self.all_meetings = [...]
        self.meeting_dbs = [VectorDB(f"meeting_db:{meeting_id}", Meeting) for meeting_id in self.all_meetings]
    
    @function_tool
    def add_meeting(self, timestamp: int, transcript: str):
        # use GPT endpoint to summarize the transcript and generate a short title
        content = gpt(f"Write a 1-paragraph summary for the following meeting transcript:\n{transcript}")
        title = gpt(f"Write a short title for the following meeting transcript:\n{transcript}")

        # add to database
        self.meeting_db.add(Meeting(
            date=timestamp,
            title=title,
            summary=content,
            full_content=transcript
        ))
    
    @function_tool
    def get_relevant_meeting_snippets(self, meeting_id: str, query: str):
        return self.meeting_dbs[meeting_id].search(query)

# Dialog generation 

In [None]:
from gpt_wrapper.assistants import ChatGPT
from gpt_wrapper.messages import SimpleHistory, msg

# Step 1: Get the data
name, meeting_timestamp, case, lawyer = summary_submitted()

# Step 2: Summarize
summarizer = ChatGPT(messages=SimpleHistory(), model="gpt-4-1106-preview")

dialog = await summarizer(f"""
Generate a dialog (around 20 conversations) with the ID of {meeting_timestamp} between the {lawyer} and the {name} based on the information from {case}. 
It should be the first interaction of the lawyer and client. Usually, talk about general things about the case, the lawyer's strategy, and the fee. 
The conversation always starts with the lawyer speaking first.
""")

# Step 3: Generate an object for the database
case_id = f"{lawyer}-{name}" 
meeting_timestamp = meeting_timestamp
dialogs_db.reset()
dialogs_db.add(dialog)  

