In [47]:
# Install necessary libraries
!pip install transformers torch ipywidgets PyPDF2 tqdm

import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
from PyPDF2 import PdfReader
import ipywidgets as widgets
from IPython.display import display
from tqdm import tqdm



# **Loading Transformer Model and Tokenizer**

In [48]:
# Load tokenizer and model (BERT for token classification)
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# **KnowledgeBase Text Extraction**

In [49]:
# Function to read the IPC PDF and text extraction
def load_ipc_pdf(file_path):
    reader = PdfReader(file_path)
    num_pages = len(reader.pages)
    text = ""

    # Initialize tqdm progress bar
    with tqdm(total=num_pages, desc="Loading PDF", unit="page") as pbar:
        for page in reader.pages:
            text += page.extract_text() + "\n"
            pbar.update(1)  # Update progress bar

    return text

# **Keyword Extraction Function**

In [50]:
#Installation
!pip install keybert
from keybert import KeyBERT



In [51]:
# Function to extract keywords from the query
def extract_keywords(query):
    # Initialize the KeyBERT model
    model = KeyBERT('distilbert-base-nli-mean-tokens')

    # Extract keywords
    keywords = model.extract_keywords(query)

    return keywords

# **Define UI elements**

In [52]:
# Define UI elements
query_input = widgets.Textarea(
    placeholder='Enter your query...',
    layout=widgets.Layout(width='50%', height='100px')
)
output_text = widgets.HTML(
    value='',
    layout=widgets.Layout(width='50%')
)

# **fINAL OUTPUT printing**

In [53]:
# Function to process the query and display results
def process_query(query):
    if query:
        keywords = extract_keywords(query)  # Extract keywords and attention scores
        roberta_output = keywords  # Save keywords and scores in global variable

    else:
        robert_output = []

    return roberta_output

Confirming Output

In [54]:
print(roberta_output)

[('66a', 0.9257), ('section', 0.7126)]


# **LawGPT - Model**

In [55]:
# Step 1: Install Dependencies
!pip install transformers PyPDF2

# Step 2: Import Libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from PyPDF2 import PdfReader



In [5]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [6]:
!pip install torch transformers PyPDF2 tqdm numpy



In [7]:
!pip install sentence-transformers



In [56]:
def load_chunks(file_path):
    try:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        print(f"Loaded {len(data)} chunks from {file_path}")
        return data
    except Exception as e:
        print(f"Error loading chunks from {file_path}: {e}")
        return []


load_chunks('/content/ipc_embeddings_st.pkl')

Loaded 291 chunks from /content/ipc_embeddings_st.pkl


array([[-0.01257933, -0.01098874,  0.00801154, ...,  0.05990362,
        -0.00219519, -0.03905078],
       [-0.05231097, -0.01420248, -0.01898878, ...,  0.015189  ,
         0.00301157,  0.0073801 ],
       [-0.03341011,  0.02017757,  0.03487004, ...,  0.01939204,
         0.0151604 ,  0.00769284],
       ...,
       [-0.02963411,  0.03586548, -0.03346367, ...,  0.03860403,
         0.05357299,  0.05411004],
       [-0.04078694,  0.00177848,  0.00433853, ...,  0.04338269,
        -0.05798173, -0.05487958],
       [-0.04271344,  0.02900897, -0.03100069, ..., -0.00871516,
        -0.00218897, -0.0339858 ]], dtype=float32)

In [59]:
import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load pre-trained SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def load_chunks(file_path):
    try:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        print(f"Loaded {len(data)} chunks from {file_path}")
        return data
    except Exception as e:
        print(f"Error loading chunks from {file_path}: {e}")
        return []

# Function to calculate weighted query embedding
def calculate_weighted_query_embedding(roberta_output):
    embeddings = []
    weights = []

    if not roberta_output:
        print("Error: roberta_output is empty.")
        return np.zeros(384)  # Return a zero vector of dimension 384 as a placeholder

    for keyword, weight in roberta_output:
        print(f"Keyword: {keyword}, Weight: {weight}")  # Print for debugging
        embedding = model.encode(keyword, convert_to_tensor=True).cpu().numpy()
        embeddings.append(embedding)
        weights.append(weight)

    embeddings = np.array(embeddings)  # Convert list of embeddings to numpy array
    weights = np.array(weights).reshape(-1, 1)  # Convert weights to numpy array

    if len(embeddings) == 0:
        print("Error: No embeddings calculated.")
        return np.zeros(384)  # Return a zero vector of dimension 384 as a placeholder

    # Calculate weighted average embedding
    weighted_embedding = np.sum(embeddings * weights, axis=0) / np.sum(weights)

    return weighted_embedding

# Main function to process the IPC document
def process_ipc(roberta_output):
    try:
        # Load preprocessed chunks and embeddings
        ipc_chunks = load_chunks('/content/ipc_chunks_Updated.pkl')
        ipc_embeddings = load_chunks('/content/ipc_embeddings.pkl')
        ipc_embeddings = np.vstack(ipc_embeddings)  # Ensure embeddings are a NumPy array
    except (FileNotFoundError, EOFError):
        print("Preprocessed chunks or embeddings not found. Please ensure the files exist.")
        return

    # Initialize FAISS index
    dimension = ipc_embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(ipc_embeddings.astype('float32'))  # Ensure ipc_embeddings are float32

    # Calculate query embedding
    query_embedding = calculate_weighted_query_embedding(roberta_output)

    # Ensure the query_embedding has the same dimension as the FAISS index
    assert query_embedding.shape[0] == dimension, f"Dimension mismatch: query ({query_embedding.shape[0]}) vs index ({dimension})"

    # Search in FAISS index
    k = 5  # Number of top results to retrieve
    query_embedding = query_embedding.reshape(1, -1).astype('float32')  # Ensure query_embedding is float32
    distances, indices = index.search(query_embedding, k)

    # Prepare and return top 3 results
    top_results = []
    for i, idx in enumerate(indices[0][:3]):  # Take only top 3 results
        result = {
            # "distance": distances[0][i],
            "text": ipc_chunks[idx]
        }
        top_results.append(result)

    return top_results

# # Example usage
# roberta_output = [('67', 0.8539), ('ipc', 0.7163), ('section', 0.6221)]
# roberta_output = process_query("What is Section 66A?")
# top_results = process_ipc(roberta_output)

# # Print or use top_results for further processing (e.g., passing to GPT-3 API)
# for i, result in enumerate(top_results):
#     print(f"Top Result {i+1}:")
#     print(f"Distance: {result['distance']:.4f}")
#     print(f"Text: {result['text']}\n")


In [60]:
query_keywords=process_query("Could you please inform me about the specific section of the Indian Penal Code (IPC) under which an individual could be charged if they were involved in the forgery of documents? I'm interested in understanding the legal implications and consequences related to the forgery of official paperwork.")
top_results = process_ipc(query_keywords)
print()
print()
print(top_results)
print()
print()
top_results_string = list_of_dicts_to_string(top_results)
print(top_results_string)

Loaded 291 chunks from /content/ipc_chunks_Updated.pkl
Error loading chunks from /content/ipc_embeddings.pkl: [Errno 2] No such file or directory: '/content/ipc_embeddings.pkl'


ValueError: need at least one array to concatenate

In [21]:
for i, result in enumerate(top_results):
    print(f"Top Result {i+1}:")
    # print(f"Distance: {result['distance']:.4f}")
    print(f"{result['text']}\n")

NameError: name 'top_results' is not defined

In [22]:
def list_of_dicts_to_string(top_results):
    # Convert list of dictionaries to a string format
    result_string = '\n\n'.join('\n'.join(f"{key}: {value}" for key, value in result.items()) for result in top_results)
    return result_string

# Convert list of dictionaries to string
top_results_string = list_of_dicts_to_string(top_results)

    # Output the string representation
print("String representation of top_results:")
print(top_results_string)


NameError: name 'top_results' is not defined

# Printing the First responses

In [None]:
print("Please note that this is not professional legal advice, and you should always consult with a qualified attorney for specific legal matters.")
result_text = f"{result['text']}\n"
print(result_text)

Please note that this is not professional legal advice, and you should always consult with a qualified attorney for specific legal matters.
### Section 29  
 
*Definition of Section 29*   
Section 29 defines "Document." It includes any matter expressed or described upon any substance by 
means of letters, figures, or marks, or by more than one of those means, intended to be used, or which 
may be u sed, as evidence of that matter.  
 
*Cases of Section 29*   
- *Shivnarayan v.



# **Adding Paraphrasing Model - Groq**

In [30]:
# Groq installation
!pip install groq

Collecting groq
  Downloading groq-0.9.0-py3-none-any.whl (103 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from groq)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, groq
Successfully installed groq-0.9.0 h11-0.14.0 httpcore-1.0.5 http

In [31]:
from groq import Groq
# Initializing client
client = Groq(
  api_key = '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',
)

In [32]:
import time

def get_response(query_input):
    start = time.time()
    query_keywords=process_query(query_input)
    top_results = process_ipc(query_keywords)
    top_results_string = list_of_dicts_to_string(top_results)

    #print(top_results_string+"\n")

    chat_completion = client.chat.completions.create(
    messages=[
        {
            "role":"user",
            "content": f"You are a Legal Chatbot - LawGPT, paraphrase the input and give a response : {top_results_string}"
        },
        {
            "role": "user",
            "content": f"{query_input}",
        }
    ],
    model="llama3-70b-8192",
    )
    end = time.time()
    print("Time taken:",end-start,"secs")
    groq_reponse = chat_completion.choices[0].message.content
    return groq_reponse

In [38]:
groq_response = get_response("What is the Section under which you can classify a hit-and run?")
print(groq_response)

Loaded 291 chunks from /content/ipc_chunks_Updated.pkl
Error loading chunks from /content/ipc_embeddings.pkl: [Errno 2] No such file or directory: '/content/ipc_embeddings.pkl'


ValueError: need at least one array to concatenate

In [34]:
!pip install pyngrok



In [35]:
!pip install flask_ngrok

Collecting flask_ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask_ngrok
Successfully installed flask_ngrok-0.0.25


In [36]:
from flask import Flask, request, render_template_string
import pickle
import faiss
import numpy as np
from pyngrok import ngrok
from pathlib import Path
from sentence_transformers import SentenceTransformer
from flask_ngrok import run_with_ngrok

# Initialize Flask application
app = Flask(__name__)

# HTML template
html_template = '''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LawGPT Version Alpha</title>
    <style>
        body { margin: 0; padding: 0; font-family: 'Arial', sans-serif; background-color: #0A0A23; color: #ffffff; }
        .container { display: flex; height: 100vh; }
        .sidebar { width: 20%; background-color: #141432; padding: 20px; box-shadow: 2px 0 5px rgba(0, 0, 0, 0.5); display: flex; flex-direction: column; justify-content: space-between; }
        .logo { margin-bottom: 30px; }
        .menu button { display: block; background: none; color: #ffffff; padding: 15px 10px; border: none; text-align: left; width: 100%; cursor: pointer; font-size: 16px; }
        .menu button:hover { background-color: #1E1E2D; }
        .footer button { background: none; color: #ffffff; border: none; cursor: pointer; margin: 5px 0; font-size: 14px; }
        .main { width: 80%; padding: 20px; display: flex; flex-direction: column; justify-content: space-between; }
        .header { display: flex; justify-content: space-between; align-items: center; }
        .chat-window { flex-grow: 1; background-color: #0F0F1F; padding: 20px; margin: 20px 0; border-radius: 10px; overflow-y: auto; position: relative; }
        .chat-message { margin-bottom: 20px; }
        .user-query { font-weight: bold; }
        .chat-response p { margin: 5px 0; line-height: 1.5; }
        .link { color: #E94560; text-decoration: none; }
        .input-area { display: flex; align-items: center; }
        .input-area input { width: 90%; padding: 10px; border-radius: 5px; border: 1px solid #3E3E5A; background-color: #0F0F1F; color: #ffffff; margin-right: 10px; }
        .input-area button { background-color: #E94560; color: #ffffff; padding: 10px 20px; border: none; cursor: pointer; border-radius: 5px; }
        .output-status { color: #E94560; font-weight: bold; position: absolute; bottom: 10px; right: 20px; }
    </style>
</head>
<body>
    <div class="container">
        <div class="sidebar">
            <div>
                <div class="logo">
                    <img src="/content/Dark-Theme.png" alt="Logo" width="100">
                </div>
                <div class="menu">
                    <button>Dashboard</button>
                    <button>Download Chat</button>
                    <button>All Chats</button>
                    <button>Legal Documents</button>
                    <button>Legal Consultancy</button>
                    <button>Notification</button>
                    <button>Clear conversations</button>
                    <button>Light mode</button>
                    <button>My account</button>
                    <button>Updates & FAQ</button>
                    <button>Log out</button>
                </div>
            </div>
            <footer>
                <button>Home</button>
                <button>Services</button>
                <button>About</button>
            </footer>
        </div>
        <div class="main">
            <div class="header">
                <span class="model">Model: All-MiniLM LawGPT</span>
            </div>
            <div class="chat-window">
                {% if query %}
                <div class="chat-message user-query">
                    <p>{{ query }}</p>
                </div>
                {% endif %}
                {% if result %}
                <div class="chat-message chat-response">
                    <p>{{ result }}</p>
                </div>
                {% endif %}
                <div class="output-status">{{ status }}</div>
            </div>
<div class="input-area">
    <form method="post">
        <input type="text" id="query" name="query" placeholder="Enter your query here" style="width: 1075px; height: 30px;">
        <button type="submit">Send</button>
    </form>
</div>
        </div>
    </div>
</body>
</html>'''



# Route to process IPC document
@app.route('/', methods=['GET', 'POST'])
def backend_getter():
    result = None
    query_input = None
    status = ""

    if request.method == 'POST':
        query_input = request.form['query']
        print(query_input)
        if query_input:
            groq_response = get_response(query_input)
            print(groq_response)
            result = groq_response
        else:
            status = "Please enter a query."

    return render_template_string(html_template, result=result, query=query_input, status=status)

# Authenticate ngrok with your authtoken
ngrok.set_auth_token('2immPOueqdKYdRjriLqFNoGN735_7URZZFDPjeHXbhNzAKuBX')

if __name__ == '__main__':
    # Create a public URL using ngrok, connecting to Flask's port 5000
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel: ", public_url)
    # Run the Flask application
    app.run(port=5000)


 * ngrok tunnel:  NgrokTunnel: "https://5b24-35-231-177-218.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [None]:
from flask import Flask
from pyngrok import ngrok

# Authenticate ngrok with your authtoken
ngrok.set_auth_token('2immPOueqdKYdRjriLqFNoGN735_7URZZFDPjeHXbhNzAKuBX')

# Create the Flask application
app = Flask(__name__)

# Define a route for the default URL
@app.route('/')
def hello():
    return "Hello, World!"

if __name__ == '__main__':
    # Create a public URL using ngrok
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel: ", public_url)

    # Run the Flask application
    app.run()


 * ngrok tunnel:  NgrokTunnel: "https://3c6b-34-148-16-62.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [04/Jul/2024 15:09:28] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Jul/2024 15:09:29] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


In [None]:
from flask import Flask
from flask_ngrok import run_with_ngrok

app = Flask(__name__)
run_with_ngrok(app)  # Integrate ngrok with the Flask app

# Define a simple route
@app.route('/')
def index():
    return 'Hello from Flask!'

if __name__ == '__main__':
    app.run()  # No need to include debug=True here


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Exception in thread Thread-51:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connection.py", line 203, in _new_conn
    sock = connection.create_connection(
  File "/usr/local/lib/python3.10/dist-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/usr/local/lib/python3.10/dist-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 791, in urlopen
    response = self._make_request(
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 497, in _make_request
    conn.request(
  File "/usr/local/lib/python3.10/dist-packages/urllib3