<a href="https://colab.research.google.com/github/Dhanush834/AusgutAI/blob/main/Full_Legal_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install openai portkey-ai transformers torch sentence-transformers scikit-learn numpy



In [8]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

class IndianLawRAGAgent:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )

        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def answer_legal_question(self, question, context):
        inputs = self.bert_tokenizer(question, context, return_tensors="pt")
        outputs = self.bert_model(**inputs)

        answer_start = outputs.start_logits.argmax()
        answer_end = outputs.end_logits.argmax() + 1
        answer = self.bert_tokenizer.convert_tokens_to_string(
            self.bert_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
        return answer

    def generate_legal_advice(self, startup_type, situation):
        relevant_docs = self.retrieve_relevant_documents(f"{startup_type} {situation}")
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""As a legal expert, provide advice for an Indian {startup_type} startup in the following situation: {situation}

        Relevant legal information:
        {context}

        Advice:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian startup law."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def summarize_law(self, law_name):
        relevant_docs = self.retrieve_relevant_documents(law_name)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Summarize the key points of the Indian law: {law_name}, especially as it pertains to startups.

        Relevant legal information:
        {context}

        Summary:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian law summaries."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def check_compliance(self, startup_description):
        relevant_docs = self.retrieve_relevant_documents(startup_description)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Given the following Indian startup description, list the key compliance requirements and potential legal issues to be aware of: {startup_description}

        Relevant legal information:
        {context}

        Compliance requirements and potential legal issues:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in compliance for Indian startups."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

def main():
    agent = IndianLawRAGAgent(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    print("Choose an option:")
    print("1. Answer a legal question")
    print("2. Generate legal advice")
    print("3. Summarize a law")
    print("4. Check compliance requirements")

    choice = int(input("Enter the number of your choice: "))

    if choice == 1:
        legal_question = input("Enter your legal question: ")
        legal_context = input("Enter the legal context: ")
        answer = agent.answer_legal_question(legal_question, legal_context)
        print(f"Answer: {answer}")

    elif choice == 2:
        startup_type = input("Enter the startup type: ")
        situation = input("Describe the situation: ")
        advice = agent.generate_legal_advice(startup_type, situation)
        print(f"Legal Advice: {advice}")

    elif choice == 3:
        law_name = input("Enter the law name: ")
        summary = agent.summarize_law(law_name)
        print(f"Law Summary: {summary}")

    elif choice == 4:
        startup_description = input("Enter the startup description: ")
        compliance = agent.check_compliance(startup_description)
        print(f"Compliance Check: {compliance}")

    else:
        print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Choose an option:
1. Answer a legal question
2. Generate legal advice
3. Summarize a law
4. Check compliance requirements


KeyboardInterrupt: Interrupted by user

In [10]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

class IndianLawRAGAgent:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )

        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def answer_legal_question(self, question, context):
        inputs = self.bert_tokenizer(question, context, return_tensors="pt")
        outputs = self.bert_model(inputs)

        answer_start = outputs.start_logits.argmax()
        answer_end = outputs.end_logits.argmax() + 1
        answer = self.bert_tokenizer.convert_tokens_to_string(
            self.bert_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
        return answer

    def generate_legal_advice(self, startup_type, situation):
        relevant_docs = self.retrieve_relevant_documents(f"{startup_type} {situation}")
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""As a legal expert, provide advice for an Indian {startup_type} startup in the following situation: {situation}

        Relevant legal information:
        {context}

        Advice:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian startup law."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def summarize_law(self, law_name):
        relevant_docs = self.retrieve_relevant_documents(law_name)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Summarize the key points of the Indian law: {law_name}, especially as it pertains to startups.

        Relevant legal information:
        {context}

        Summary:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in Indian law summaries."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def check_compliance(self, startup_description):
        relevant_docs = self.retrieve_relevant_documents(startup_description)
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Given the following Indian startup description, list the key compliance requirements and potential legal issues to be aware of: {startup_description}

        Relevant legal information:
        {context}

        Compliance requirements and potential legal issues:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal assistant specializing in compliance for Indian startups."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def handle_user_input(self, user_input):
        # Step 1: Summarize the user input
        summarization_prompt = f"""Summarize the following legal input to extract key points:

        Input: {user_input}

        Summary:"""

        summarization_response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI summarization assistant."},
                {"role": "user", "content": summarization_prompt}
            ]
        )
        summarized_input = summarization_response.choices[0].message.content.strip()
        print(f"Summarized Input: {summarized_input}")

        # Step 2: Classify the user input into a type (e.g., 'legal question', 'generate advice', etc.)
        classification_prompt = f"""Classify the following summarized legal input into one of these categories:
        'legal question', 'generate advice', 'summarize law', 'check compliance'.

        Summary: {summarized_input}

        Classification:"""

        classification_response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI classification assistant."},
                {"role": "user", "content": classification_prompt}
            ]
        )
        classification = classification_response.choices[0].message.content.strip().lower()
        print(f"Classification: {classification}")

        # Step 3: Based on classification, automatically generate the legal context or invoke appropriate function
        if "legal question" in classification:
            # Use OpenAI to generate the legal context automatically
            legal_context_prompt = f"""Generate the legal context for the following legal question:

            Question: {summarized_input}

            Legal Context:"""

            legal_context_response = self.client.chat.completions.create(
                model="gpt-4-turbo",
                messages=[
                    {"role": "system", "content": "You are an AI legal context assistant."},
                    {"role": "user", "content": legal_context_prompt}
                ]
            )
            legal_context = legal_context_response.choices[0].message.content.strip()
            print(f"Legal Context: {legal_context}")
            return f"Function called: answer_legal_question\nResponse: {self.answer_legal_question(summarized_input, legal_context)}"

        elif "generate advice" in classification:
            # Auto-generate startup type based on the input (if applicable)
            startup_type_prompt = f"""Identify the startup type based on the following input:

            Input: {summarized_input}

            Startup Type:"""

            startup_type_response = self.client.chat.completions.create(
                model="gpt-4-turbo",
                messages=[
                    {"role": "system", "content": "You are an AI startup classification assistant."},
                    {"role": "user", "content": startup_type_prompt}
                ]
            )
            startup_type = startup_type_response.choices[0].message.content.strip()
            print(f"Startup Type: {startup_type}")
            return f"Function called: generate_legal_advice\nResponse: {self.generate_legal_advice(startup_type, summarized_input)}"

        elif "summarize law" in classification:
            return f"Function called: summarize_law\nResponse: {self.summarize_law(summarized_input)}"

        elif "check compliance" in classification:
            return f"Function called: check_compliance\nResponse: {self.check_compliance(summarized_input)}"

        else:
            return "Sorry, I couldn't classify your input. Please try again."


def main():
    agent = IndianLawRAGAgent(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    user_input = input("Enter your request: ")
    response = agent.handle_user_input(user_input)
    print(f"Response: {response}")

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Enter your request: startup in cyber tech
Summarized Input: Summary: A startup in cyber tech refers to a new business venture that focuses on developing technology related to cybersecurity. This involves creating innovative solutions to protect digital systems, networks, data, and programs from attacks, damages, or unauthorized access.
Classification: the provided summary does not present a legal question, seek specific advice, or check compliance. rather, it describes what a cyber tech startup is. this aligns best with 'summarize law' as it is summarizing the focus and activities of businesses in a specific sector, albeit without explicitly mentioning legal frameworks. 

classification: 'summarize law'


Token indices sequence length is longer than the specified maximum sequence length for this model (776 > 512). Running this sequence through the model will result in indexing errors


Legal Context: In considering the legal context for a startup in the cyber tech industry, particularly one that focuses on cybersecurity, several areas of law come into play. This legal framework is crucial to ensure that the startup not only protects itself from legal pitfalls but also operates in compliance with various statutes and regulations that govern data protection, intellectual property, contractual relationships, and liability issues. Here are the key areas of law relevant to a cybersecurity startup:

1. **Cybersecurity Laws and Regulations**: A cybersecurity startup must adhere to legal standards and frameworks that govern the storage, processing, and transmission of data. For example, in the United States, regulations such as the General Data Protection Regulation (GDPR) in Europe and the California Consumer Privacy Act (CCPA) in California impose strict requirements on the handling of personal data. These laws also define the duties of cybersecurity providers, including b

TypeError: list indices must be integers or slices, not tuple

In [12]:
pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m225.3/244.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2


In [13]:
import json
import numpy as np
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
from docx import Document

class ContractGenerator:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )
        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r') as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def extract_information_with_bert(self, text):
        inputs = self.bert_tokenizer(text, return_tensors="pt")
        outputs = self.bert_model(**inputs)

        answer_start = outputs.start_logits.argmax()
        answer_end = outputs.end_logits.argmax() + 1
        answer = self.bert_tokenizer.convert_tokens_to_string(
            self.bert_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
        return answer

    def generate_contract(self, details):
        # Use BERT to extract and refine contract details if necessary
        refined_details = self.extract_information_with_bert(details)

        prompt = f"""
        Create a legal contract based on the following details:

        Contract Details:
        {refined_details}

        The contract should include:
        - Names of the parties
        - Effective date
        - Terms and conditions
        - Signatures

        Contract:
        """

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are a legal expert specializing in drafting contracts."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def save_contract_to_file(self, contract_text, filename="contract.docx"):
        doc = Document()
        doc.add_paragraph(contract_text)
        doc.add_paragraph("[It is advisable that both parties have this document reviewed by their respective legal advisors before signing.]")
        doc.save(filename)

    def handle_user_request(self, contract_details):
        # Generate contract text
        contract_text = self.generate_contract(contract_details)
        print(f"Generated Contract: {contract_text}")

        # Save to file
        filename = "generated_contract.docx"
        self.save_contract_to_file(contract_text, filename)

        return filename

def main():
    agent = ContractGenerator(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    # Collect contract details from the user
    print("Please provide the following details for the contract:")
    party1 = input("Party 1: ")
    party2 = input("Party 2: ")
    effective_date = input("Effective Date (YYYY-MM-DD): ")
    terms = input("Terms and Conditions: ")

    contract_details = f"""
    Party 1: {party1}
    Party 2: {party2}
    Effective Date: {effective_date}
    Terms and Conditions: {terms}
    """

    filename = agent.handle_user_request(contract_details)
    print(f"Contract has been generated and saved as {filename}.")

if __name__ == "__main__":
    main()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Please provide the following details for the contract:
Party 1: frere
Party 2: efefewf
Effective Date (YYYY-MM-DD): wefewfewf
Terms and Conditions: weeffwewef
Generated Contract: Below is a simplified contract template based on the general details you provided. Without specific details about the nature of the contract, I will present a generic agreement. You can modify it as necessary to suit the particular agreement being established.

---

**CONTRACT AGREEMENT**

This Contract Agreement ("Agreement") is made on [Effective Date], by and between:

**[Party A Name]**, with a principal place of business located at [Party A Address] ("Party A"),

and

**[Party B Name]**, with a principal place of business located at [Party B Address] ("Party B").

**WHEREAS**, Party A and Party B (together, the "Parties") wish to outline the terms and conditions under which they will engage in [Brief Description of the Business Relationship or Transaction],

**NOW, THEREFORE**, in consideration of the mut

In [1]:
pip install googletrans



In [2]:
import json
import numpy as np
from google.colab import files
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from docx import Document
import io
from openai import OpenAI

class NaturalLanguageComplianceChecker:
    def __init__(self, portkey_api_key, portkey_virtual_key, bert_model_name, knowledge_base_path):
        self.client = OpenAI(
            api_key="dummy",
            base_url=PORTKEY_GATEWAY_URL,  # Replace with actual URL including http:// or https://
            default_headers=createHeaders(
                provider="openai",
                api_key=portkey_api_key,
                virtual_key=portkey_virtual_key
            )
        )

        # Load BERT model and tokenizer from Hugging Face
        self.bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

        # Load knowledge base
        with open(knowledge_base_path, 'r', errors="ignore") as f:
            data = json.load(f)
            self.knowledge_base = data["knowledge_base"]

        # Encode documents for legal validation
        self.document_embeddings = self.sentence_model.encode([doc['content'] for doc in self.knowledge_base])

    def retrieve_relevant_documents(self, query, top_k=3):
        query_embedding = self.sentence_model.encode([query])
        similarities = cosine_similarity(query_embedding, self.document_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.knowledge_base[i] for i in top_indices]

    def validate_contract(self, contract_text):
        relevant_docs = self.retrieve_relevant_documents("contract clause compliance")
        context = "\n".join([doc['content'] for doc in relevant_docs])

        prompt = f"""Validate the following contract content for legal compliance:

        Contract Text: {contract_text}

        Relevant legal information:
        {context}

        Compliance validation and suggestions:"""

        response = self.client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an AI legal compliance assistant specializing in Indian law."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

def upload_contract_file():
    uploaded = files.upload()
    filename = next(iter(uploaded))

    # Read the uploaded .docx file
    with io.BytesIO(uploaded[filename]) as file:
        doc = Document(file)
        contract_text = '\n'.join([para.text for para in doc.paragraphs])

    return contract_text

def main():
    # Initialize the compliance checker agent
    agent = NaturalLanguageComplianceChecker(
        portkey_api_key="1+YM2sBEgaZWe45bMOq2huic1uyF",
        portkey_virtual_key="015-openai-40bada",
        bert_model_name="law-ai/InLegalBERT",
        knowledge_base_path="test.json"
    )

    # Step 1: Upload a file from Colab
    print("Please upload your contract file:")
    contract_text = upload_contract_file()

    # Step 2: Validate the contract content and provide suggestions
    validation_result = agent.validate_contract(contract_text)
    print(f"\nCompliance Validation and Suggestions:\n{validation_result}")

if __name__ == "__main__":
    main()


ImportError: cannot import name 'BaseTransport' from 'httpx' (/usr/local/lib/python3.10/dist-packages/httpx/__init__.py)