# Contract Generation

In [1]:
import pandas as pd
from io import StringIO
from pathlib import Path
from pydantic import BaseModel, Field
from sklearn.metrics import accuracy_score
from language_models.models.llm import OpenAILanguageModel
from language_models.models.embedding import SentenceTransformerEmbeddingModel
from language_models.agent import (
    Agent,
    Workflow,
    WorkflowLLMStep,
    OutputType,
    PromptingStrategy,
)
from language_models.vector_stores import FAISSVectorStore, DistanceMetric
from language_models.proxy_client import ProxyClient
from language_models.settings import settings
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

In [2]:
proxy_client = ProxyClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
)

In [3]:
system_prompt = """You are an official at the U.S. Department of Defense, specifically responsible for overseeing the Defense Federal Acquisition Regulation (DFAR).

Your primary task is to draft sections of defense contracts, ensuring they align with federal regulations and the strategic goals of the Department of Defense.

Given specific context or requirements, you are expected to craft clear, precise, and legally sound contract language."""

prompt = """Overall goal: {goal}

Consider this context:
{context}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=2000,
    temperature=0.2,
)

draft_contract_section = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt=prompt,
    prompt_variables=["goal", "context"],
    output_type=OutputType.STRING,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [4]:
loader = PyPDFLoader("./docs/DFARS.pdf")
documents = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", " ", ""], chunk_size=2000, chunk_overlap=200)
documents = text_splitter.split_documents(documents)

[Document(metadata={'source': './docs/DFARS.pdf', 'page': 0}, page_content='VOLUME  III-P ARTS 201 TO 253\nDEFENSE\nFEDERAL\nACQUISITION\nREGULATION\nIssued Fiscal Year 2020 by the:'), Document(metadata={'source': './docs/DFARS.pdf', 'page': 2}, page_content='TITLE 48 - DEFENSE FEDERAL ACQUISITION REGULATIONS SYSTEM\nChapter1\nDEFENSE  FEDERAL  ACQUISITION  REGULATION\nVolume I'), Document(metadata={'source': './docs/DFARS.pdf', 'page': 4}, page_content='DEFENSE FEDERAL ACQUISITION REGULATION\nSUBCHAPTER  A  - GENERAL'), Document(metadata={'source': './docs/DFARS.pdf', 'page': 6}, page_content='DEFENSE FEDERAL ACQUISITION REGULATION\nGeneral Structure and Subparts\nSUBCHAPTER  A  - GENERAL\nPART 201  - FEDERAL ACQUISITION REGULATIONS SYSTEM\n201.1 PURPOSE, AUTHORITY, ISSUANCE\n201.2 ADMINISTRATION\n201.3 AGENCY ACQUISITION REGULATIONS\n201.4 DEVIATIONS FROM THE FAR\n201.6 CAREER DEVELOPMENT, CONTRACTING AUTHORITY, AND RESPONSIBILITIES\nPART 202  - DEFINITIONS OF WORDS AND TERMS\n202.1 

In [None]:
embedding_model = SentenceTransformerEmbeddingModel(model="all-mpnet-base-v2")

try:
    vector_store = FAISSVectorStore.load_local("./docs", "job_embeddings")
except:
    vector_store = FAISSVectorStore.from_documents(
        documents=documents,
        embedding_model=embedding_model,
        distance_metric=DistanceMetric.COSINE_SIMILARITY,
    )
    vector_store.save_local("./data", "job_embeddings")

In [None]:
def format_documents(documents: list[Document]) -> str:
    """Formats the documents for the LLM prompt."""
    template = (
        "Document\n"
        "--------\n"
        "Metadata: {metadata}\n"
        "Content:\n{page_content}"
    )
    return "\n\n".join([template.format(metadata=document.metadata, page_content=document.page_content) for document in documents])

def get_relevant_documents(self, query: str, fetch_k: int = 5) -> str:
    """Gets relevant documents."""
    documents = self.vector_store.similarity_search(query, fetch_k)
    documents = [document for document, _ in documents]
    return format_documents(documents)

In [8]:
system_prompt = """Your primary task is to search through the DFARS to identify and retrieve relevant regulations, rules, and guidelines that must be followed for a draft of a contract section."""

prompt = """Draft:
{draft}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=2000,
    temperature=0.2,
)

class Regulations(BaseModel):
    page_number: int = Field(description="The page where you found the information")
    regulation: str = Field(description="The relevant bits of the regulation")

find_regulations = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt=prompt,
    prompt_variables=["draft"],
    tools=None,
    output_type=OutputType.ARRAY_STRUCT,
    output_schema=Regulations,
    prompting_strategy=PromptingStrategy.CHAIN_OF_THOUGHT,
    verbose=True,
)

In [None]:
system_prompt = """You are tasked with refining contract sections based on relevant regulations from the Defense Federal Acquisition Regulation Supplement (DFARS).

You will receive a draft of a contract section along with specific DFARS regulations.

Your job is to incorporate these regulations into the draft, ensuring accuracy, clarity, and compliance.

Where regulations are applied, include the corresponding DFARS page number in brackets to provide clear references."""

prompt = """Draft:
{draft}

Relevant regulations:
{regulations}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=2000,
    temperature=0.2,
)

class ContractSection(BaseModel):
    title: str = Field(description="The title for the contract section")
    content: str = Field(description="The refined content for the contract section")

refine_contract_section = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt=prompt,
    prompt_variables=["context"],
    output_type=OutputType.OBJECT,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [None]:
class Inputs(BaseModel):
    goal: str = Field(description="The contract the user wants to draft")
    context: str = Field(description="The context provided for a section as bullet points")

workflow = Workflow(
    name="Generates a contract section",
    description="Uses LLMs to generate a contract section given some context",
    inputs=Inputs,
    output="refine_contract_section",
    steps=[
        WorkflowLLMStep(name="draft_contract_section", agent=draft_contract_section),
        WorkflowLLMStep(name="find_regulations", agent=find_regulations),
        WorkflowLLMStep(name="refine_contract_section", agent=refine_contract_section),
    ]
)

In [None]:
def generate_contract(contract_sections: list[str]) -> str:
    sections = []
    for contract_section in contract_sections:
        output = workflow.invoke({"section": contract_section})
        section = str(output.final_answer.title) + "\n\n" + str(output.final_answer.content)
        sections.append(section)
        contract_drafting_agent.chat.reset()
    return "\n\n".join(sections)

In [None]:
contract = generate_contract()

In [None]:
def render_reasoning(output):
    for i, step in enumerate(output.steps):
        names = {
            "system_prompt": "System Prompt",
            "prompting_strategy": "Prompting Strategy",
            "prompt": "Prompt",
            "raw_output": "Raw Output",
            "observation": "Observation",
            "final_answer": "Final Answer",
            "tool_use": "Tool Use",
            "tool_output": "Tool Output",
        }
        string = "Use LLM"
        print(string)
        print("=" * len(string))
        print()
        for entry in step.steps:
            print(names[entry.name])
            print("-" * len(entry.name))
            if entry.name == "tool_use":
                print(f"Thought: {entry.content.thought}")
                print()
                print(f"Used: {entry.content.used}")
                print()
                print(f"Arguments: {entry.content.arguments}")
            elif entry.name == 'final_answer':
                if entry.content.thought is not None:
                    print(f"Thought: {entry.content.thought}")
                    print()
                print(f"Output: {entry.content.output}")
            else:
                print(entry.content)

            if entry.name != "final_answer" or i != len(output.steps) - 1:
                print()