In [None]:
!pip install openai


In [12]:
def generate_json_prompt(doc_text):
    prompt = f"""
        You are an expert in clause extraction in context of contract related documents.
        Generate a JSON representation of the document based on the following defined clauses:
        1.   Supplier Name: The name of the company or entity providing the goods or services outlined in the contract.
        2.   Contract Number: A unique identifier assigned to the contract, often used for tracking and reference purposes.
        3.   Contract Type: The type of contract, such as a purchase agreement, service level agreement, or master services agreement.
        4.   Contract Term and Value: The duration and total value of the contract, including any milestones, payment schedules, and performance metrics.
        5.   Payment Terms: The schedule and method of payment for goods or services provided under the contract.
        6.   Key obligations (BT): The key responsibilities and obligations of the buyer (buyer's organization) under the contract.
        7.   Key obligations (Supplier): The key responsibilities and obligations of the supplier under the contract.
        8.   SLAs and KPIs and their page numbers: Service Level Agreements (SLAs) and Key Performance Indicators (KPIs) that outline the standards for service delivery and performance metrics, along with the page numbers where they can be found in the contract.
        9.   Service Credits: The penalties or incentives applied to the supplier for failing to meet service level agreements or performance targets.
        10.  Milestone Clauses: Specific deadlines or milestones that must be met during the contract term, often tied to payment schedules or performance metrics.
        11.  Liquidated Damages: A predetermined amount of damages that the supplier must pay to the buyer in the event of a breach of contract.
        12.  Other Remedies: Additional remedies or consequences that may be applied in the event of a breach of contract, such as termination or specific performance.
        13. Rebate Mechanism: A system for providing rebates or discounts to the buyer based on specific criteria, such as volume or spend.
        14. Pricing Schedule: price validity period and price increase mechanism: The schedule outlining the pricing structure, including the duration of the pricing agreement and the process for adjusting prices.
        15. Contract Commitments (Volume, Spend, Resource, etc.): The specific commitments or obligations of the supplier, such as volume commitments, spend targets, or resource allocation.
        16. Forex & Commodity fluctuations: Clauses that address the impact of foreign exchange rate fluctuations or commodity price changes on the contract.
        17. Contract Governance Model: The framework for managing and governing the contract, including roles, responsibilities, and decision-making processes.
        18. Renewal Terms: The terms and conditions for renewing the contract, including notice periods, pricing, and performance metrics.
        19. Exclusivity Clause(s): Clauses that restrict the supplier from providing similar goods or services to other buyers or entities.
        20. Contract Liability Clauses: Clauses that address the liability of the supplier or buyer in the event of a breach of contract.
        21. Termination Clauses (rollover, terminate for convenience, notice period): Clauses that outline the process for terminating the contract, including notice periods, rollover options, and termination fees.
        22. Training Clauses (BT mandatory training, Supplier provided training): Clauses that outline the training requirements for the buyer or supplier, including mandatory training and supplier-provided training.
        23. TUPE Clauses: Clauses that address the Transfer of Undertakings (Protection of Employment) regulations, which protect employee rights in the event of a change in ownership or control.
        24. Intellectual Property Rights: Clauses that address the ownership and use of intellectual property, including patents, trademarks, and copyrights.
        25. Amendment Number and Amendment Date: The unique identifier and date of any amendments or changes made to the original contract.
         Document Text:
        f{doc_text}
        Sample JSON Output:
        {{
            <Clause Name>: {{
                Original Text: <Pass the original text for the Clause from input paragraph as it is without any changes.>,
                Summary: <Summarize the the Original Text>,
                Details: <Exact details for any specific numerical term like amount, date, etc or any particular name of the party involved like company name>
            }}
        }}
        Example 1:
        Contract Term and Value: {{
        Original Text: Contract Term: 2 years, Contract Value: $1,000,000,
        Summary: The contract is for 2 years with a total value of $1,000,000.,
        Details: Duration: 2 years, Value: $1,000,000
    }}
    Example 2:
    "Contract Number": {{
        "Original Text": "Contract Number: 12345",
        "Summary": "The contract is identified by the number 12345.",
        "Details": "12345"
    }}
    Example 3:
     "Supplier Name": {{
        "Original Text": "Supplier Name: BT Company",
        "Summary": "Supplier Name is BT Company.",
        "Details": "BT Company"
    }}

        "### If Clause is not found in the document then consider it as NONE ###"
    )
    """
    return prompt


In [30]:
from openai import OpenAI

client = OpenAI(

    api_key="",
)

def chat_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.9,
        presence_penalty=0.3
    )
    return response.choices[0].message.content.strip()

In [25]:
import os

file_paths = [
    '/content/sample_data/Input/A01 Appendix 1 - P-Router - Description of Supplies Final 4_4_14.pdf.txt',
    '/content/sample_data/Input/BT Amendment 14.pdf.txt',
    '/content/sample_data/Input/CA 099 Management of Bulk Purchase Orders - signed.pdf.txt',
    '/content/sample_data/Input/CA013 bill and hold Q4 v2 - signed.pdf.txt',
    '/content/sample_data/Input/CA60 Additional capex saving 17-18 final version - signed.pdf.txt'
]


base_path = "/content/sample_data/json_output/"


os.makedirs(base_path, exist_ok=True)


def read_file_content(file_path):
    with open(file_path, 'r') as file:
        return file.read()


def process_document(text, output_file):
    prompt = generate_json_prompt(text)
    response = chat_gpt(prompt)
    with open(output_file, 'w') as f:
        f.write(response)


for file_path in file_paths:
    text = read_file_content(file_path)
    base_name = os.path.basename(file_path)
    output_file = os.path.join(base_path, f"{os.path.splitext(base_name)[0]}.json")
    process_document(text, output_file)
    print(f"JSON stored in: {output_file}")


JSON stored in: /content/sample_data/json_output/A01 Appendix 1 - P-Router - Description of Supplies Final 4_4_14.pdf.json
JSON stored in: /content/sample_data/json_output/BT Amendment 14.pdf.json
JSON stored in: /content/sample_data/json_output/CA 099 Management of Bulk Purchase Orders - signed.pdf.json
JSON stored in: /content/sample_data/json_output/CA013 bill and hold Q4 v2 - signed.pdf.json
JSON stored in: /content/sample_data/json_output/CA60 Additional capex saving 17-18 final version - signed.pdf.json
