<a href="https://colab.research.google.com/github/Dhanush834/AusgutAI/blob/main/Full_Legal_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install openai portkey-ai transformers torch sentence-transformers scikit-learn numpy

Collecting openai
  Downloading openai-1.44.0-py3-none-any.whl.metadata (22 kB)
Collecting portkey-ai
  Downloading portkey_ai-1.8.7-py3-none-any.whl.metadata (7.3 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting mypy<2.0,>=0.991 (from portkey-ai)
  Downloading mypy-1.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (1.9 kB)
Collecting cached-property (from portkey-ai)
  Downloading cached_property-1.5.2-py2.py3-none-any.whl.metadata (11 kB)
Collecting types-requests (from portkey-ai)
  Downloading types_requests-2.32.0.20240907-py3-none-any.whl.metadata (1.9 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Do

In [1]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

class IndianLawRAGAgent:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )

        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def answer_legal_question(self, question, context):
        inputs = self.bert_tokenizer(question, context, return_tensors="pt")
        outputs = self.bert_model(**inputs)

        answer_start = outputs.start_logits.argmax()
        answer_end = outputs.end_logits.argmax() + 1
        answer = self.bert_tokenizer.convert_tokens_to_string(
            self.bert_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
        return answer

    def generate_legal_advice(self, startup_type, situation):
        relevant_docs = self.retrieve_relevant_documents(f"{startup_type} {situation}")
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""As a legal expert, provide advice for an Indian {startup_type} startup in the following situation: {situation}

        Relevant legal information:
        {context}

        Advice:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian startup law."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def summarize_law(self, law_name):
        relevant_docs = self.retrieve_relevant_documents(law_name)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Summarize the key points of the Indian law: {law_name}, especially as it pertains to startups.

        Relevant legal information:
        {context}

        Summary:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian law summaries."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def check_compliance(self, startup_description):
        relevant_docs = self.retrieve_relevant_documents(startup_description)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Given the following Indian startup description, list the key compliance requirements and potential legal issues to be aware of: {startup_description}

        Relevant legal information:
        {context}

        Compliance requirements and potential legal issues:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in compliance for Indian startups."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

def main():
    agent = IndianLawRAGAgent(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    print("Choose an option:")
    print("1. Answer a legal question")
    print("2. Generate legal advice")
    print("3. Summarize a law")
    print("4. Check compliance requirements")

    choice = int(input("Enter the number of your choice: "))

    if choice == 1:
        legal_question = input("Enter your legal question: ")
        legal_context = input("Enter the legal context: ")
        answer = agent.answer_legal_question(legal_question, legal_context)
        print(f"Answer: {answer}")

    elif choice == 2:
        startup_type = input("Enter the startup type: ")
        situation = input("Describe the situation: ")
        advice = agent.generate_legal_advice(startup_type, situation)
        print(f"Legal Advice: {advice}")

    elif choice == 3:
        law_name = input("Enter the law name: ")
        summary = agent.summarize_law(law_name)
        print(f"Law Summary: {summary}")

    elif choice == 4:
        startup_description = input("Enter the startup description: ")
        compliance = agent.check_compliance(startup_description)
        print(f"Compliance Check: {compliance}")

    else:
        print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'sentence_transformers'

In [8]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

class IndianLawRAGAgent:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )

        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def answer_legal_question(self, question, context):
        inputs = self.bert_tokenizer(question, context, return_tensors="pt")
        outputs = self.bert_model(inputs)

        answer_start = outputs.start_logits.argmax()
        answer_end = outputs.end_logits.argmax() + 1
        answer = self.bert_tokenizer.convert_tokens_to_string(
            self.bert_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
        return answer

    def generate_legal_advice(self, startup_type, situation):
        relevant_docs = self.retrieve_relevant_documents(f"{startup_type} {situation}")
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""As a legal expert, provide advice for an Indian {startup_type} startup in the following situation: {situation}

        Relevant legal information:
        {context}

        Advice:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian startup law."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def summarize_law(self, law_name):
        relevant_docs = self.retrieve_relevant_documents(law_name)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Summarize the key points of the Indian law: {law_name}, especially as it pertains to startups.

        Relevant legal information:
        {context}

        Summary:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian law summaries."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def check_compliance(self, startup_description):
        relevant_docs = self.retrieve_relevant_documents(startup_description)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Given the following Indian startup description, list the key compliance requirements and potential legal issues to be aware of: {startup_description}

        Relevant legal information:
        {context}

        Compliance requirements and potential legal issues:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in compliance for Indian startups."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def handle_user_input(self, user_input):
        # Step 1: Summarize the user input
        summarization_prompt = f"""Summarize the following legal input to extract key points:

        Input: {user_input}

        Summary:"""

        summarization_response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI summarization assistant."},
                {"role": "user", "content": summarization_prompt}
            ]
        )
        summarized_input = summarization_response.choices[0].message.content.strip()
        print(f"Summarized Input: {summarized_input}")

        # Step 2: Classify the user input into a type (e.g., 'legal question', 'generate advice', etc.)
        classification_prompt = f"""Classify the following summarized legal input into one of these categories:
        'legal question', 'generate advice', 'summarize law', 'check compliance'.

        Summary: {summarized_input}

        Classification:"""

        classification_response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI classification assistant."},
                {"role": "user", "content": classification_prompt}
            ]
        )
        classification = classification_response.choices[0].message.content.strip().lower()
        print(f"Classification: {classification}")

        # Step 3: Based on classification, automatically generate the legal context or invoke appropriate function
        if "legal question" in classification:
            # Use OpenAI to generate the legal context automatically
            legal_context_prompt = f"""Generate the legal context for the following legal question:

            Question: {summarized_input}

            Legal Context:"""

            legal_context_response = self.client.chat.completions.create(
                model="gpt-4-turbo",
                messages=[
                    {"role": "system", "content": "You are an AI legal context assistant."},
                    {"role": "user", "content": legal_context_prompt}
                ]
            )
            legal_context = legal_context_response.choices[0].message.content.strip()
            print(f"Legal Context: {legal_context}")
            return f"Function called: answer_legal_question\nResponse: {self.answer_legal_question(summarized_input, legal_context)}"

        elif "generate advice" in classification:
            # Auto-generate startup type based on the input (if applicable)
            startup_type_prompt = f"""Identify the startup type based on the following input:

            Input: {summarized_input}

            Startup Type:"""

            startup_type_response = self.client.chat.completions.create(
                model="gpt-4-turbo",
                messages=[
                    {"role": "system", "content": "You are an AI startup classification assistant."},
                    {"role": "user", "content": startup_type_prompt}
                ]
            )
            startup_type = startup_type_response.choices[0].message.content.strip()
            print(f"Startup Type: {startup_type}")
            return f"Function called: generate_legal_advice\nResponse: {self.generate_legal_advice(startup_type, summarized_input)}"

        elif "summarize law" in classification:
            return f"Function called: summarize_law\nResponse: {self.summarize_law(summarized_input)}"

        elif "check compliance" in classification:
            return f"Function called: check_compliance\nResponse: {self.check_compliance(summarized_input)}"

        else:
            return "Sorry, I couldn't classify your input. Please try again."


def main():
    agent = IndianLawRAGAgent(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    user_input = input("Enter your request: ")
    response = agent.handle_user_input(user_input)
    print(f"Response: {response}")

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Enter your request: startup at health
Summarized Input: The input "startup at health" is quite brief and lacks specific details, making it difficult to provide a detailed summary. It generally suggests a focus on a startup company operating within the healthcare sector. Further information about the company's activities, objectives, or context would be required to offer a more precise summary.
Classification: summarize law
Response: Function called: summarize_law
Response: The Indian legal framework provides a structured process for setting up a business, including health sector startups. Here are the main points concerning the legal formalities for starting a company in India, especially relevant for startups like those in healthcare:

1. **Company Registration**: All new businesses must register with the Registrar of Companies (RoC). Essential steps include selecting a suitable company name, acquiring a Digital Signature Certificate (DSC), and filing the necessary incorporation docum

In [10]:
pip install python-docx



In [7]:
import json
import numpy as np
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
from docx import Document
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

class ContractGenerator:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )
        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def extract_information_with_bert(self, text):
        inputs = self.bert_tokenizer(text, return_tensors="pt")
        outputs = self.bert_model(**inputs)

        answer_start = outputs.start_logits.argmax()
        answer_end = outputs.end_logits.argmax() + 1
        answer = self.bert_tokenizer.convert_tokens_to_string(
            self.bert_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
        return answer

    def generate_contract(self, details):
        # Use BERT to extract and refine contract details if necessary
        refined_details = self.extract_information_with_bert(details)

        prompt = f"""
        Create a legal contract based on the following details:

        Contract Details:
        {refined_details}

        The contract should include:
        - Names of the parties
        - Effective date
        - Terms and conditions
        - Signatures

        Contract:
        """

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are a legal expert specializing in drafting contracts."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def save_contract_to_file(self, contract_text, filename="contract.docx"):
        doc = Document()
        doc.add_paragraph(contract_text)
        doc.add_paragraph("[It is advisable that both parties have this document reviewed by their respective legal advisors before signing.]")
        doc.save(filename)

    def handle_user_request(self, contract_details):
        # Generate contract text
        contract_text = self.generate_contract(contract_details)
        print(f"Generated Contract: {contract_text}")

        # Save to file
        filename = "generated_contract.docx"
        self.save_contract_to_file(contract_text, filename)

        return filename

def main():
    agent = ContractGenerator(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    # Collect contract details from the user
    print("Please provide the following details for the contract:")
    party1 = input("Party 1: ")
    party2 = input("Party 2: ")
    effective_date = input("Effective Date (YYYY-MM-DD): ")
    terms = input("Terms and Conditions: ")

    contract_details = f"""
    Party 1: {party1}
    Party 2: {party2}
    Effective Date: {effective_date}
    Terms and Conditions: {terms}
    """

    filename = agent.handle_user_request(contract_details)
    print(f"Contract has been generated and saved as {filename}.")

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Please provide the following details for the contract:
Party 1: Demo 1
Party 2: Demo 2
Effective Date (YYYY-MM-DD): 24-08-09
Terms and Conditions: tets
Generated Contract: Certainly! Below, you'll find a template for a simple legal contract based on the information provided. Please note that specific details such as the nature of the agreement, considerations, obligations, and detailed terms should be tailored to the specific agreement between the parties. This template provides a generic framework that should be adapted to the specific context of the contract.

---

**CONTRACT AGREEMENT**

This Contract Agreement (the "Agreement") is entered into as of [Date], ("Effective Date"), by and between [Party A Name], with a principal place of business located at [Party A Address] ("Party A"), and [Party B Name], with a principal place of business located at [Party B Address] ("Party B").

**WHEREAS**, Party A and Party B wish to establish the terms and conditions under which they will conduc

In [None]:
pip install googletrans



In [9]:
import json
import numpy as np
from google.colab import files
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from docx import Document
import io
from openai import OpenAI

class NaturalLanguageComplianceChecker:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,  # Replace with actual URL including http:// or https://
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )

        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r', errors="ignore") as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents for legal validation
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def validate_contract(self, contract_text):
        relevant_docs = self.retrieve_relevant_documents("contract clause compliance")
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Validate the following contract content for legal compliance:

        Contract Text: {contract_text}

        Relevant legal information:
        {context}

        Compliance validation and suggestions:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal compliance assistant specializing in Indian law."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

def upload_contract_file():
    uploaded = files.upload()
    filename = next(iter(uploaded))

    # Read the uploaded .docx file
    with io.BytesIO(uploaded[filename]) as file:
        doc = Document(file)
        contract_text = '\n'.join([para.text for para in doc.paragraphs])

    return contract_text

def main():
    # Initialize the compliance checker agent
    agent = NaturalLanguageComplianceChecker(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    # Step 1: Upload a file from Colab
    print("Please upload your contract file:")
    contract_text = upload_contract_file()

    # Step 2: Validate the contract content and provide suggestions
    validation_result = agent.validate_contract(contract_text)
    print(f"\nCompliance Validation and Suggestions:\n{validation_result}")

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Please upload your contract file:


Saving generated_contract(1).docx to generated_contract(1).docx

Compliance Validation and Suggestions:
### Compliance Validation and Suggestions:
The contract content provided is generally comprehensive and outlines the key aspects of a service agreement contract. However, there are a few areas where it can be improved to ensure better legal compliance and clarity under Indian law:

1. **Jurisdiction and Governing Law**:
   - The clause about governing law mentions "the laws of the state of **[State]**." It should explicitly specify that the agreement is governed by Indian laws to avoid any ambiguity considering the multi-jurisdictional nature of Indian states.
   - Example amendment: "This Agreement shall be governed by and construed in accordance with the laws of India."

2. **Scope of Services (Article 1)**:
   - The services need to be described precisely in the agreement. Vague descriptions could lead to disputes regarding the scope of work. It is beneficial to detail the service

In [19]:
import json
import random
import numpy as np
from datetime import datetime, timedelta
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
from docx import Document
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

class ContractGenerator:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )
        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.company_name = data["company_name"]
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def ask_contract_type(self):
        print("Please select the type of contract you want to generate:")
        print("1. Employment Contract")
        print("2. Non-Disclosure Agreement (NDA)")
        print("3. Partnership Agreement")
        contract_type = input("Enter the number corresponding to the contract type: ")
        return contract_type

    def ask_general_questions(self):
        employee_name = input("Enter the employee's full name: ")
        start_date = input("Enter the contract start date (YYYY-MM-DD): ")
        return employee_name, start_date

    def ask_employment_contract_questions(self):
        job_title = input("Enter the job title: ")
        salary_info = input("Enter the salary information: ")
        responsibilities = input("Enter key responsibilities: ")
        return job_title, salary_info, responsibilities

    def ask_nda_questions(self):
        parties = input("Enter the names of the parties involved: ")
        confidential_info = input("Enter a brief description of the confidential information: ")
        return parties, confidential_info

    def ask_partnership_agreement_questions(self):
        partners = input("Enter the names of the partners: ")
        partnership_terms = input("Enter the key terms of the partnership: ")
        return partners, partnership_terms

    def generate_contract_prompt(self, contract_type, details):
        if contract_type == '1':  # Employment Contract
            job_title, salary_info, responsibilities = details
            prompt = f"""
            Create a legal employment contract based on the following information:

            Company: {self.company_name}
            Job Title: {job_title}
            Employee: {self.employee_name}
            Start Date: {self.start_date}
            Salary Information: {salary_info}
            Key Responsibilities: {responsibilities}

            The contract should include:
            - Names of the parties
            - Effective date
            - Job title and department
            - Salary and benefits
            - Key responsibilities
            - Standard work terms
            - Signatures

            Contract:
            """
        elif contract_type == '2':  # NDA
            parties, confidential_info = details
            prompt = f"""
            Create a Non-Disclosure Agreement (NDA) based on the following information:

            Company: {self.company_name}
            Parties: {parties}
            Confidential Information: {confidential_info}

            The NDA should include:
            - Definition of confidential information
            - Obligations of the parties
            - Term and duration of the NDA
            - Non-use and non-disclosure clauses
            - Termination conditions
            - Signatures

            NDA:
            """
        elif contract_type == '3':  # Partnership Agreement
            partners, partnership_terms = details
            prompt = f"""
            Create a Partnership Agreement based on the following information:

            Company: {self.company_name}
            Partners: {partners}
            Key Terms: {partnership_terms}

            The agreement should include:
            - Names of the parties
            - Partnership terms
            - Responsibilities of the partners
            - Profit and loss sharing terms
            - Dispute resolution
            - Termination conditions
            - Signatures

            Partnership Agreement:
            """
        return prompt

    def generate_contract(self, contract_type):
        if contract_type == '1':  # Employment Contract
            details = self.ask_employment_contract_questions()
        elif contract_type == '2':  # NDA
            details = self.ask_nda_questions()
        elif contract_type == '3':  # Partnership Agreement
            details = self.ask_partnership_agreement_questions()
        else:
            print("Invalid contract type selected.")
            return None

        # Combine general questions (employee name, start date, etc.)
        self.employee_name, self.start_date = self.ask_general_questions()
        prompt = self.generate_contract_prompt(contract_type, details)

        # Use OpenAI to generate the contract text
        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are a legal expert specializing in drafting contracts."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def save_contract_to_file(self, contract_text, filename="contract.docx"):
        doc = Document()
        doc.add_paragraph(contract_text)
        doc.add_paragraph("[It is advisable that both parties have this document reviewed by their respective legal advisors before signing.]")
        doc.save(filename)

    def generate_and_save_contract(self):
        contract_type = self.ask_contract_type()
        contract_text = self.generate_contract(contract_type)
        if contract_text:
            print(f"\nGenerated Contract:\n\n{contract_text}\n")

            # Save to file
            filename = f"generated_{self.company_name.replace(' ', '').lower()}_contract_{contract_type}.docx"
            self.save_contract_to_file(contract_text, filename)

            print(f"Contract has been generated and saved as {filename}.")

def main():
    agent = ContractGenerator(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )
    agent.generate_and_save_contract()

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Please select the type of contract you want to generate:
1. Employment Contract
2. Non-Disclosure Agreement (NDA)
3. Partnership Agreement
Enter the number corresponding to the contract type: 2
Enter the names of the parties involved: 2
Enter a brief description of the confidential information: test
Enter the employee's full name: bhbhb
Enter the contract start date (YYYY-MM-DD): h87879y87g

Generated Contract:

**NON-DISCLOSURE AGREEMENT**

This Non-Disclosure Agreement ("Agreement") is entered into as of [Date] by and between TechNova Solutions Pvt. Ltd., a private limited company incorporated and existing under the laws of [Jurisdiction of Incorporation] with its principal office at [Address] ("Disclosing Party"), and [Other Party Name], a [Describe Entity Type - e.g., corporation, individual, etc.] organized and existing under the laws of [Jurisdiction of Incorporation] with its principal office at [Address] ("Receiving Party") (collectively referred to as the "Parties").

**1. Def