In [None]:
# Installing dependancies
!pip install chromadb openai

In [None]:
# Importing dependancies
import json
import openai
import chromadb
import pandas as pd

In [None]:
# Setting up the api key
openai.api_key = ""

In [None]:
# Defining the reasoning class
class LLMReasoning:
  def __init__(self):
    # Defining the system
    self.SYSTEM_DEFINITION = """DETECT TYPE OF LAW ASSOCIATED:
                              Given the input, identify the types of laws that can come under that situation,
                              and other entitities that provided in the examples, be precise and
                              don't over complicate the situation and give too many offences.
                              USE LOGICAL REASONING TO THE BEST OF YOUR ABILITY AS IF YOU HAVE TO Decide the CASE ACTUALLY.
                              ---------------------------------------------------------------------

                              STRICTLY CHOOSE FROM THIS ONLY IF NOT PRESENT IN THIS DON'T INCLUDE:
                              {AVAILABLE_LABELS}

                              STRICTLY ANSWER IN THE FORMAT GIVEN, IF NOT POSSIBLE STRICTLY RETURN [] ALWAYS.
                              Here are examples:

                              {
                                  "Category": ["family law", "criminal law"],
                                  "Extras": {
                                      "language": "The language appears to be informal or possibly a non-native form of English.",
                                      "location": "", <strictly one of the indian states if available else "">
                                      "charge": "", <strictly an integer if filling the value other than empty string, if not available fill with 200>
                                      "time": "", <strictly in number of days form>
                                      "practicesat": "",
                                      "clientdemographics": "",
                                      "Typesoflaws": ["family law", "criminal law"],
                                      "ClientFeedback": ""
                                  }
                              }

                              ________________________________________________________________
                              Sentence: 'Two brothers were tenant of a landlord in a commercial property.One brother had one son and a
                              daughter (both minor) when he got divorced with his wife.The children's went into mother's custody at the
                              time of divorce and after some years the husband (co tenant) also died. Now can the children of the
                              deceased brother(co tenant) claim the right'
                              Sol: {"Category": [\n    "family law",\n    "criminal law"\n  ],\n  "Extras": {\n    "language": "",\n    "location": "",\n    "charge": "",\n    "practicesat": "",\n    "clientdemographics": "Individuals",\n    "Typesoflaws": [\n      "family law",\n      "criminal law"\n    ],\n    "ClientFeedback": 3\n  }\n}
                              ________________________________________________________________
                              Sentence: 'hi dear sir @ madam i am work enterprises shop casher my owner my big bother my owner all
                              cosmer money take and leave in city all cosmer tarcher and attack me and my family what i do sir'
                              Sol: {"Category": [\n    "family law",\n    "criminal law"\n  ],\n  "Extras": {\n    "language": "The language appears to be informal or possibly a non-native form of English.",\n    "location": "",\n    "charge": "",\n    "time": null,\n    "practicesat": "",\n    "clientdemographics": "",\n    "Typesoflaws": [\n      "family law",\n      "criminal law"\n    ],\n    "ClientFeedback": ""\n  }\n}
                              ________________________________________________________________

                              Sentence: {task}
                              Operations:

                              <STRICTLY COMPLETE THE DICTIONARY LOGICALLY AND AS PER THE Sol provided>
                              """

  # To get apply reasoning on the input query for VectorDB embeddings
  def llmReasoning(self, text):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": self.SYSTEM_DEFINITION},
            {"role": "user", "content": text}
        ],
        max_tokens=100
    )
    output_text = response.choices[0].message.content

    return output_text

  # Run the model
  def run(self, text):
    outputText = self.llmReasoning(text)
    InputForVectorDB = json.loads(outputText)

    com = InputForVectorDB['Extras']
    genQuery = f"""The language = {com['language']},
                location = {com['location']}, charge = {com['charge']},
                time = {com['time']}, practicesat = {com['practicesat']},
                clientdemographics = {com['clientdemographics']},
                Typesoflaws = {com['Typesoflaws']},
                ClientFeedback = {com['ClientFeedback']}"""

    return genQuery

In [None]:
# Vector database pipeline to update the embeddings in the db
class VectorDBPipeline:
  def __init__(self, path='/content/updated_merged_data.csv'):
    self.chroma_client = chromadb.Client()
    self.collection = self.chroma_client.create_collection(name="LawyerCollection")
    self.df = pd.read_csv(path)
    self.df = self.df.drop(self.df.index[0])
    # self.df = self.df.drop(columns=['Unnamed: 0'])

  # Adding the vectors
  def add_vectortoDB(self):
      for i in range(len(self.df)):
        update = {
          "language": self.df.iloc[i]['language'],
          "location": self.df.iloc[i]['location'],
          "charge": self.df.iloc[i]['charge'],
          "time": self.df.iloc[i]['time'],
          "practicesat": self.df.iloc[i]['practicesat'],
          "clientdemographics": self.df.iloc[i]['clientdemographics'],
          "Typesoflaws": self.df.iloc[i]['Typesoflaws'],
          "ClientFeedbackof": self.df.iloc[i]['ClientFeedbackof']
        }

        # Convert the dictionary to a JSON-formatted string
        update_str = json.dumps(update)

        self.collection.add(
          documents=[update_str],
          metadatas=[{"source": "LawData"}],
          ids=[f"{i+1}"]
        )

  # Get query from the DB
  def run_query(self, query):
    results = self.collection.query(query_texts=[query],
                               n_results=5)
    res = results['documents'][0]
    access = []
    for elem in res:
      access.append(elem)

    return access

In [None]:
# Setting up the vector db pipeline
vector_db_pipeline = VectorDBPipeline('/content/cleaned_data.csv')
vector_db_pipeline.add_vectortoDB()

In [None]:
# Test the system
# query = "im a marathi person i got laid off by a company i need a senior lawyer who is based in my state to fight my case, he shoukd also have a good custmer feedback, i need the case solved as soon as possible"
query = "I'm a woman facing dowry harassment in Delhi. I need a lawyer who can help me file a dowry harassment case against my in-laws."
reasoningModel = LLMReasoning()
InputForVectorDB = reasoningModel.run(query)
result = vector_db_pipeline.run_query(InputForVectorDB)
result

In [None]:
reasoningModel.llmReasoning(query)

In [None]:
print(result)

In [None]:
for res in result:
  print(json.loads(res))

In [None]:
!pip install chromadb langchain openai tiktoken

In [None]:
next = pd.read_csv('/content/master.csv')
next = next.drop(next.columns[0], axis=1)
next.head()

In [None]:
import os
import openai
import numpy as np
import pandas as pd
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="/content/master.csv")
data= loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key="")
llm = OpenAI(openai_api_key="")
docsearch = Chroma.from_documents(texts, embeddings)

qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=docsearch.as_retriever())

In [None]:
categories = json.loads(reasoningModel.llmReasoning(query))
categories = categories['Extras']
categories = json.dumps(categories)
categories

In [None]:
results = docsearch.similarity_search(
    categories, distance_metric="cos", k=10
)
results

In [None]:
top_5_page_content = [result.page_content for result in results]

# Split the text into separate entries
entries = [content.split('\n') for content in top_5_page_content]

# Create a list of tuples to represent the data
data = [("Lawyer Names", entry[-1].split(": ")[1], "index", entry[0].split(": ")[1]) for entry in entries]

# Convert the list of tuples into a set to remove duplicates
unique_data = set(data)

# If you want to convert it back to a list of dictionaries, you can do so
unique_data_list = [{"Lawyer Names": item[1], "Index": item[3]} for item in unique_data]

df = pd.DataFrame(unique_data_list)

df.head(5)

In [None]:
# Initialize an empty list to store the corresponding rows from "next"
corresponding_rows = []

# Iterate over the "Index" values in the "df" DataFrame
for index_value in df['Index']:
    # Find the row in the "next" DataFrame where the 'index' matches the current index_value
    print(index_value)
    matching_row = next[next['index'] == index_value]

    # Append the matching row to the list
    corresponding_rows.append(matching_row)

# Convert the list of rows to a new DataFrame
corresponding_df = pd.concat(corresponding_rows, ignore_index=True)

# Display the resulting DataFrame
print(corresponding_df)


In [None]:
GEN_DISPLAY_QUERY = """"""

# Iterate over the first 5 rows in the 'df' DataFrame
for i in range(5):
    try:
      name = df.iloc[i]['Lawyer Names']
      index = df.iloc[i]['Index']

      # Find the row in 'next' DataFrame that matches the 'index'
      row = next[next['index'] == int(index)].iloc[0].to_dict()

      # Print the formatted output
      out = f"""
      Name: {name},
      Languages: {row['language']},
      Location: {row['location']},
      Average Charge: {row['charge']},
      Average Time: {row['time']},
      Practices at: {row['practicesat']},
      Client Demographics: {row['clientdemographics']},
      Laws associated: {row['Typesoflaws']},
      Client Feedback Rating: {row['ClientFeedbackof']}
      """
      # print(out)
      GEN_DISPLAY_QUERY += out
    except:
      pass


In [None]:
print(GEN_DISPLAY_QUERY)

In [None]:
a = next[next["index"] == 1285]
a.to_dict()

In [None]:
!pip install gradio

In [None]:
loader2 = CSVLoader(file_path="/content/LawYantra Dataset - lawyer_summaries.csv")
data2= loader2.load()
text_splitter2 = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts2 = text_splitter.split_documents(data2)
docsearch2 = Chroma.from_documents(texts2, embeddings)

qa2 = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=docsearch2.as_retriever())

In [None]:
DEFINITION = """
You are an Explainable AI bot that gets details of lawyers
which are {text}, and a query: {query}, result
in a way such that it explains why the lawyers are a perfect match for the query.
STRICTLY REFER THE DATA PROVIDED TO YOU ONLY AND ANSWER THROUGH IT

"""
def recommendation(text, query):
    found = f"Lawyers: {text}, Query: {query}"
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": DEFINITION},
            {"role": "user", "content": DEFINITION.format(text=text, query=query)}
        ],
        max_tokens=100
    )
    output_text = response.choices[0].message.content

    return output_text

In [None]:
import gradio as gr

def echo(query, history):
    # query = "I'm a woman facing dowry harassment in Delhi. I need a lawyer who can help me file a dowry harassment case against my in-laws."
    if "/start" in query:
      # reasoningModel = LLMReasoning()
      # InputForVectorDB = reasoningModel.run(query)
      # result = vector_db_pipeline.run_query(InputForVectorDB)

      categories = json.loads(reasoningModel.llmReasoning(query))
      # ai_recommendation = qa.run(", ".join(categories['Category']))

      categories = categories['Extras']
      categories = json.dumps(categories)

      results = docsearch2.similarity_search(
          categories, distance_metric="cos", k=5
      )

      top_5_page_content = [result.page_content for result in results]

      # Split the text into separate entries
      entries = [content.split('\n') for content in top_5_page_content]

      # Create a list of tuples to represent the data
      data = [("Lawyer Names", entry[-1].split(": ")[1], "index", entry[0].split(": ")[1]) for entry in entries]

      # Convert the list of tuples into a set to remove duplicates
      unique_data = set(data)

      # If you want to convert it back to a list of dictionaries, you can do so
      unique_data_list = [{"Lawyer Names": item[1], "Index": item[3]} for item in unique_data]

      df = pd.DataFrame(unique_data_list)

      GEN_DISPLAY_QUERY = """"""
      ai_recommendation = recommendation(GEN_DISPLAY_QUERY, query)

      # Iterate over the first 5 rows in the 'df' DataFrame
      for i in range(5):
          try:
            name = df.iloc[i]['Lawyer Names']
            index = df.iloc[i]['Index']

            # Find the row in 'next' DataFrame that matches the 'index'
            row = next[next['index'] == int(index)].iloc[0].to_dict()

            # Print the formatted output
            out = f"""
            Name: {name},
            Languages: {row['language']},
            Location: {row['location']},
            Average Charge: {row['charge']},
            Average Time: {row['time']},
            Practices at: {row['practicesat']},
            Client Demographics: {row['clientdemographics']},
            Laws associated: {row['Typesoflaws']},
            Client Feedback Rating: {row['ClientFeedbackof']}
            """
            # print(out)
            GEN_DISPLAY_QUERY += out
          except:
            pass

      fin = GEN_DISPLAY_QUERY + ai_recommendation

      return fin
    else:
      SYSTEM_DEF = """
      You are a virtual assistant named LawYantra, that responds only to the Law related queries.
      based in India. Give accurate on the point suggestions for normal people.
      You are a law assistant chatbot, if you know the answer to the query still
      politely decline the user from answering any other query except law related.
      """
      response = openai.ChatCompletion.create(
          model="gpt-3.5-turbo",
          messages=[
              {"role": "system", "content": SYSTEM_DEF},
              {"role": "user", "content": query}
          ],
          max_tokens=100
      )
      output_text = response.choices[0].message.content
      return output_text


demo = gr.ChatInterface(fn=echo, examples=["hello", "hola", "merhaba"], title="Law Yantra")
demo.launch(debug=True)

In [None]:
categories = json.loads(reasoningModel.llmReasoning(query))
# ai_recommendation = qa.run(", ".join(categories['Category']))

categories = categories['Extras']
categories = json.dumps(categories)

results = docsearch2.similarity_search(
    categories, distance_metric="cos", k=5
)

top_5_page_content = [result.page_content for result in results]

# Split the text into separate entries
entries = [content.split('\n') for content in top_5_page_content]

# Create a list of tuples to represent the data
data = [("Lawyer Names", entry[-1].split(": ")[1], "index", entry[0].split(": ")[1]) for entry in entries]

# Convert the list of tuples into a set to remove duplicates
unique_data = set(data)

# If you want to convert it back to a list of dictionaries, you can do so
unique_data_list = [{"Lawyer Names": item[1], "Index": item[3]} for item in unique_data]

df = pd.DataFrame(unique_data_list)

GEN_DISPLAY_QUERY = """"""
ai_recommendation = recommendation(GEN_DISPLAY_QUERY, query)

# Iterate over the first 5 rows in the 'df' DataFrame
for i in range(5):
    try:
      name = df.iloc[i]['Lawyer Names']
      index = df.iloc[i]['Index']

      # Find the row in 'next' DataFrame that matches the 'index'
      row = next[next['index'] == int(index)].iloc[0].to_dict()

      # Print the formatted output
      out = f"""
      Name: {name},
      Languages: {row['language']},
      Location: {row['location']},
      Average Charge: {row['charge']},
      Average Time: {row['time']},
      Practices at: {row['practicesat']},
      Client Demographics: {row['clientdemographics']},
      Laws associated: {row['Typesoflaws']},
      Client Feedback Rating: {row['ClientFeedbackof']}
      """
      # print(out)
      GEN_DISPLAY_QUERY += out
    except:
      pass

fin = GEN_DISPLAY_QUERY + ai_recommendation

print(fin)