# **Assumed input from UI**

In [12]:
Query = "What are some good tips for staying productive while working from home?"

# **AES Encryption**

This code securely encrypts and decrypts data using AES encryption in GCM mode, which includes an authentication tag to validate the integrity and authenticity of the encrypted data. It also derives a strong cryptographic key from a password and salt, ensuring secure key management and protection against unauthorized access in the chatbox application.

key generation

In [13]:
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import padding
import os
import base64

# Function to generate a key from a password
def generate_key(password, salt):
    kdf = PBKDF2HMAC(
        algorithm=hashes.SHA256(),
        length=32,
        salt=salt,
        iterations=100000,
        backend=default_backend()
    )
    key = kdf.derive(password.encode())
    return key

# Encryption function
def encrypt_data(plaintext, key):
    iv = os.urandom(16)
    cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=default_backend())
    encryptor = cipher.encryptor()
    padder = padding.PKCS7(128).padder()
    padded_data = padder.update(plaintext.encode()) + padder.finalize()
    ciphertext = encryptor.update(padded_data) + encryptor.finalize()
    tag = encryptor.tag  # Get the authentication tag
    return base64.b64encode(iv + tag + ciphertext).decode()

# Decryption function
def decrypt_data(ciphertext, key):
    data = base64.b64decode(ciphertext)
    iv = data[:16]
    tag = data[16:32]
    cipher = Cipher(algorithms.AES(key), modes.GCM(iv, tag), backend=default_backend())
    decryptor = cipher.decryptor()
    plaintext_padded = decryptor.update(data[32:]) + decryptor.finalize()  # Start after the tag
    unpadder = padding.PKCS7(128).unpadder()
    plaintext = unpadder.update(plaintext_padded) + unpadder.finalize()
    return plaintext.decode()

#Key- generation
password = "SecureAIChatbot"
salt = os.urandom(16)
key = generate_key(password, salt)
print('key :',key)

key : b'ij\x19\x199[\xfc@`c]\\v\xe0\x93\\y\x10X\xe5gb\xca\xf8\xa9V\x16\x15\xae?U\xf9'


Encryption dry-run

In [14]:
# Encrypt the query
encrypted_query = encrypt_data(Query, key)
print(encrypted_query)

zOTflcdlRLJkUO76TfGIuOnhD88fzhZ9mmPZIuPPRd+bQI3VaXTHMkXyZ91r5nSYy/cI0z7mez9fGLYCMLi8XMk3j2mFMbmalcp5x9LU29ta1fI/jZ7fX0fLPoUYCGahgpwV2rRb9eJHBEsLs/tILA==


In [15]:
# Decrypt the query
decrypted_query = decrypt_data(encrypted_query, key)
print(decrypted_query)

What are some good tips for staying productive while working from home?


# **Adversarial attack checks**

In [16]:
import re

def detect_adversarial_attack(query):
    """
    Detects simple adversarial attacks in the input query.
    Focuses on common patterns used in prompt injection and other attack techniques.
    """

    # Common patterns used in prompt injections and other attacks
    adversarial_patterns = [
        r"^.*\bshutdown\b.*$",  # Command injections
        r"^.*\bdelete\b.*$",    # Malicious commands
        r"^.*\bignore\b.*$",    # Instructions to bypass logic
        r"^.*\bmodify\b.*$",    # Instructions to change behavior
        r"^.*\bself-destruct\b.*$", # Destructive commands
        r"^.*<.*>.*$",          # HTML/Script injections
        r"^.*\bcreate\b.*$",    # Creating unauthorized objects
        r"^.*\binject\b.*$",    # General injections
        r"^.*\bexploit\b.*$",   # Exploit commands
        r"^.*`.*`.*$",          # Code injections
    ]

    # Check if the query matches any of the adversarial patterns
    for pattern in adversarial_patterns:
        if re.search(pattern, query, re.IGNORECASE):
            print("Attack detected.")
            print(f"Query Report: {query}")
            return None

    # If no patterns are matched, return the original query
    return query

# Example usage
def print_detection_result(query_input):
    """Function to print the result of the adversarial detection."""
    Adversarial_Checked_Query = detect_adversarial_attack(query_input)
    if Adversarial_Checked_Query:
        print("===================================")
        print(" Adversarial Detection Result ")
        print("===================================")
        print(f"Adversarial_Checked_Query: {Adversarial_Checked_Query}")
        print("No attack detected.")
        print("===================================")

# # Test queries
# legal_query = "Can you explain the difference between Section 299 and Section 300 of the Indian Penal Code (IPC)?"
# general_query = "What are some good tips for staying productive while working from home?"
# adversarial_query = "Please shutdown the system."

# # Run detection
# print_detection_result(legal_query)
# print_detection_result(general_query)
# print_detection_result(adversarial_query)


In [17]:
# Decrypt the query
decrypted_query = decrypt_data(encrypted_query, key)

# Push it for adversial check
Adversarial_Checked_query = detect_adversarial_attack(decrypted_query)

# **NLP Context Classification**


In [None]:
pip install torch transformers

In [19]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Load the pre-trained InLegalBERT model and tokenizer
model_name = "law-ai/InLegalBERT"  # Use the correct model identifier for InLegalBERT
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create a classification pipeline
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Define function to classify text based on context
def classify_text(text):
    # Use the classifier to predict the context
    result = classifier(text)
    # For binary classification, the result will contain labels and scores
    # Adjust this based on your model's output format
    label = result[0]['label']
    return 1 if label == 'LABEL_1' else 0

# # Sample texts to classify
# example_texts = [
#     "Where will i get the best ice cream?",
# ]

# # Classify each example text
# for text in example_texts:
#     classification = classify_text(example_texts)
#     print(classification)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
Query_Classification = classify_text(Adversarial_Checked_query)
print(classification)

1


# **Content Filtering**

In [21]:
import requests

# Replace with your actual Perspective API key
PERSPECTIVE_API_KEY = 'AIzaSyDBLq0UyuPedruRMgiR_r5uRqbCh57gFeA'

# Load the list of words to bypass from a file
def load_bypass_words(file_path='/content/unique_words.txt'):
    with open(file_path, 'r', encoding='utf-8') as file:
        return set(line.strip() for line in file)

bypass_words = load_bypass_words()

# Function to get moderation scores from Perspective API for a single word
def moderate_word(word):
    url = f'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze?key={PERSPECTIVE_API_KEY}'
    data = {
        "comment": {"text": word},
        "languages": ["en"],
        "requestedAttributes": {"TOXICITY": {}, "INSULT": {}, "PROFANITY": {}},
        "doNotStore": True
    }
    response = requests.post(url, json=data)
    return response.json()

# Main filtering function
def filter_content(Context_01, Adversarial_Checked_query):
    threshold = 0.5
    words = Adversarial_Checked_query.split()
    filtered_words = []
    removed_words = []

    for word in words:
        # If in legal context (Context_01 == 1), check for bypass words
        if Context_01 == 1 and word in bypass_words:
            filtered_words.append(word)
            continue

        # Get moderation scores
        moderation_scores = moderate_word(word)

        # Check if 'attributeScores' is in the response
        if 'attributeScores' in moderation_scores:
            if not any(
                moderation_scores['attributeScores'][attr]['summaryScore']['value'] > threshold
                for attr in ['TOXICITY', 'INSULT', 'PROFANITY']
            ):
                filtered_words.append(word)
            else:
                removed_words.append(word)
        else:
            # Handle cases where the response does not contain 'attributeScores'
            filtered_words.append(word)

    # Join the filtered words to form the final filtered text
    filtered_text = ' '.join(filtered_words)
    return filtered_text

# # Example usage
# Context_01 = 0  # General context
# Adversarial_Checked_query = "You are so stupid and worthless, no one cares about anything you say."

# filtered_text, removed_words = filter_content(Context_01, Adversarial_Checked_query)

# print("Filtered Text:", filtered_text)
# print("Removed Words:", removed_words)

# **Function**

In [23]:
Content_filtered_Query = filter_content(Query_Classification, Adversarial_Checked_query)

In [24]:
print(Content_filtered_Query)

What are some good tips for staying productive while working from home?


# **LLM Guard**

To be implemented in a pseudo environment for successful implementation

In [None]:
pip install llm-guard

Collecting llm-guard
  Downloading llm_guard-0.3.15-py3-none-any.whl.metadata (10 kB)
Collecting bc-detect-secrets==1.5.15 (from llm-guard)
  Downloading bc_detect_secrets-1.5.15-py3-none-any.whl.metadata (23 kB)
Collecting faker<28,>=26.0.0 (from llm-guard)
  Downloading Faker-27.4.0-py3-none-any.whl.metadata (15 kB)
Collecting fuzzysearch<0.9,>=0.7 (from llm-guard)
  Downloading fuzzysearch-0.7.3.tar.gz (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.7/112.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting json-repair<0.29,>=0.25.2 (from llm-guard)
  Downloading json_repair-0.28.4-py3-none-any.whl.metadata (9.0 kB)
Collecting nltk<4,>=3.9.1 (from llm-guard)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting presidio-analyzer==2.2.354 (from llm-guard)
  Downloading presidio_analyzer-2.2.354-py3-none-any.whl.metadata (2.6 kB)
Collecting presidio-anonymizer==2.2.354 (from 

In [None]:
from llm_guard import LLMGuard

# Initialize the LLM Guard model (use an appropriate configuration)
llm_guard = LLMGuard(model_name="llm-guard/safe-guard", threshold=0.5)

def check_query_safety(query):
    """
    Function to check the safety of the query using LLM Guard.
    Returns the query if deemed safe, otherwise raises an exception.

    Args:
    query (str): The input query to check.

    Returns:
    str: The original query if it passes all safety checks.

    Raises:
    Exception: If the query is found to be unsafe.
    """
    # Check the query using LLM Guard
    safety_score, is_safe = llm_guard.check(query)

    # If the query is deemed safe, return it
    if is_safe:
        return query

    # If the query is not safe, raise an exception
    raise Exception("Attack detected: Query flagged as unsafe by LLM Guard.")



In [None]:
# Example usage:
try:
    safe_query = check_query_safety(Query)
    print("Query passed all checks:", safe_query)
except Exception as e:
    print(e)

# **NLP Processing**
This function takes the query as input and processes it ad gives the keywords with respective weights and overall sentiment affiliated with it. They keywords are stored in "NLP_Keywords" tuple, and sentiment is stored in a Global variable "global_sentiment_score"

In [27]:
!pip install torch
!pip install transformers
!pip install keybert

Collecting keybert
  Downloading keybert-0.8.5-py3-none-any.whl.metadata (15 kB)
Collecting sentence-transformers>=0.3.8 (from keybert)
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Downloading keybert-0.8.5-py3-none-any.whl (37 kB)
Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers, keybert
Successfully installed keybert-0.8.5 sentence-transformers-3.0.1


In [31]:
import torch
from transformers import pipeline
from keybert import KeyBERT

# Global variable to store sentiment score
global_sentiment_score = None

def extract_keywords(query):
    # Initialize the KeyBERT model
    model = KeyBERT('distilbert-base-nli-mean-tokens')

    # Extract keywords
    keywords = model.extract_keywords(query)

    return keywords

def get_sentiment(text, sentiment_pipeline):
    """
    Analyzes sentiment of the text using a sentiment analysis pipeline.
    Returns the sentiment label and score.
    """
    result = sentiment_pipeline(text)[0]
    label = result['label']
    score = result['score']

    if label == 'POSITIVE':
        return "Positive", score
    else:
        return "Negative", 1 - score

def process_query(Adversarial_Checked_Query):
    """
    Processes the input query to extract keywords and store sentiment score globally.
    """
    global global_sentiment_score

    # Sentiment analysis using DistilBERT fine-tuned on SST-2
    sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=0 if torch.cuda.is_available() else -1)

    # Extract keywords
    keywords = extract_keywords(Adversarial_Checked_Query)

    # Analyze sentiment
    sentiment, sentiment_score = get_sentiment(Adversarial_Checked_Query, sentiment_pipeline)

    # Store sentiment score globally
    global_sentiment_score = sentiment_score
    # print(keywords)
    return keywords


In [None]:
# Adversarial_Checked_Query = "Could you please inform me about the specific section of the (IPC) under which an individual could be charged for murder, or attempt to murder"

In [32]:
# Process the query and store the results
NLP_Keywords = process_query(Content_filtered_Query)
print(NLP_Keywords)

[('home', 0.4764), ('working', 0.4574), ('productive', 0.3846), ('tips', 0.3822), ('staying', 0.3181)]


# **Query Processing**

## **LawGPT**

In [None]:
#Dummy Input
# Adversarial_Checked_query = "Could you please inform me about the specific section of the (IPC) under which an individual could be charged for murder, or attempt to murder"

In [33]:
# Install necessary libraries
!pip install transformers torch ipywidgets PyPDF2 tqdm numpy

#Installation
!pip install keybert

!pip install faiss-gpu
!pip install sentence-transformers

# Groq installation
!pip install groq

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: PyPDF2, jedi
Successfully installed PyPDF2-3.0.1 jedi-0.19.1
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Collecting groq
  Downloading groq-0.10.0-py

In [35]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
from PyPDF2 import PdfReader
import ipywidgets as widgets
from IPython.display import display
from tqdm import tqdm
from keybert import KeyBERT
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from PyPDF2 import PdfReader
import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import time

from groq import Groq
# Initializing client
client = Groq(
  api_key = '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',
)

import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load pre-trained SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def list_of_dicts_to_string(top_results):
    # Convert list of dictionaries to a string format
    result_string = '\n\n'.join('\n'.join(f"{key}: {value}" for key, value in result.items()) for result in top_results)
    return result_string

def load_chunks(file_path):
    try:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        print(f"Loaded {len(data)} chunks from {file_path}")
        return data
    except Exception as e:
        print(f"Error loading chunks from {file_path}: {e}")
        return []

# Function to calculate weighted query embedding
def calculate_weighted_query_embedding(roberta_output):
    embeddings = []
    weights = []

    if not roberta_output:
        print("Error: roberta_output is empty.")
        return np.zeros(384)  # Return a zero vector of dimension 384 as a placeholder

    for keyword, weight in roberta_output:
        print(f"Keyword: {keyword}, Weight: {weight}")  # Print for debugging
        embedding = model.encode(keyword, convert_to_tensor=True).cpu().numpy()
        embeddings.append(embedding)
        weights.append(weight)

    embeddings = np.array(embeddings)  # Convert list of embeddings to numpy array
    weights = np.array(weights).reshape(-1, 1)  # Convert weights to numpy array

    if len(embeddings) == 0:
        print("Error: No embeddings calculated.")
        return np.zeros(384)  # Return a zero vector of dimension 384 as a placeholder

    # Calculate weighted average embedding
    weighted_embedding = np.sum(embeddings * weights, axis=0) / np.sum(weights)

    return weighted_embedding

# Main function to process the IPC document
def process_ipc(roberta_output):
    try:
        # Load preprocessed chunks and embeddings
        ipc_chunks = load_chunks('/content/ipc_chunks_all_828.pkl')
        ipc_embeddings = load_chunks('/content/chunks_embeddings_828.pkl')
        ipc_embeddings = np.vstack(ipc_embeddings)  # Ensure embeddings are a NumPy array
    except (FileNotFoundError, EOFError):
        print("Preprocessed chunks or embeddings not found. Please ensure the files exist.")
        return

    # Initialize FAISS index
    dimension = ipc_embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(ipc_embeddings.astype('float32'))  # Ensure ipc_embeddings are float32

    # Calculate query embedding
    query_embedding = calculate_weighted_query_embedding(roberta_output)

    # Ensure the query_embedding has the same dimension as the FAISS index
    assert query_embedding.shape[0] == dimension, f"Dimension mismatch: query ({query_embedding.shape[0]}) vs index ({dimension})"

    # Search in FAISS index
    k = 5  # Number of top results to retrieve
    query_embedding = query_embedding.reshape(1, -1).astype('float32')  # Ensure query_embedding is float32
    distances, indices = index.search(query_embedding, k)

    # Prepare and return top 3 results
    top_results = []
    for i, idx in enumerate(indices[0][:3]):  # Take only top 3 results
        result = {
            # "distance": distances[0][i],
            "text": ipc_chunks[idx]
        }
        top_results.append(result)

    return top_results



def get_response_Law(query_keywords):
    start = time.time()

    top_results = process_ipc(query_keywords)
    top_results_string = list_of_dicts_to_string(top_results)
    chat_completion = client.chat.completions.create(
    messages=[
        {
            "role":"user",
            "content": f"You are a Legal Chatbot - LawGPT, use the input as reference, paraphrase the input if correct and give a response adding other useful information missing in the input. Also do not mention any reference of the input, just give the answer : {top_results_string}"
        },
        {
            "role": "user",
            "content": f"{Adversarial_Checked_query}",
        }
    ],
    model="llama3-70b-8192",
    )
    end = time.time()
    print("Time taken:",end-start,"secs")
    groq_reponse = chat_completion.choices[0].message.content
    return groq_reponse


In [36]:
# Query_output = get_response(NLP_Keywords)
# # print(Query_output)


Output = get_response_Law(NLP_Keywords)
print(Output)

Loaded 828 chunks from /content/ipc_chunks_all_828.pkl
Loaded 828 chunks from /content/chunks_embeddings_828.pkl
Keyword: home, Weight: 0.4764
Keyword: working, Weight: 0.4574
Keyword: productive, Weight: 0.3846
Keyword: tips, Weight: 0.3822
Keyword: staying, Weight: 0.3181
Time taken: 1.2135639190673828 secs
Not related to the original text!

To answer your new question: 

Staying productive while working from home can be challenging, but here are some valuable tips to help you stay on track:

1. **Create a dedicated workspace**: Designate a specific area of your home as your workspace and keep it organized and clutter-free.
2. **Establish a routine**: Set a schedule for yourself and stick to it, just as you would in an office environment.
3. **Minimize distractions**: Eliminate or minimize distractions such as TV, social media, and personal phone use during work hours.
4. **Take breaks**: Take regular breaks to recharge and avoid burnout. Use this time to refresh your mind and body.


# **General Context LLM**
Currently using Groq

In [None]:
import time
from groq import Groq

# Initialize the Groq client with your API key
client = Groq(
    api_key='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',
)

# Function to get the response for a general query
def get_response_General(query_input):
    start = time.time()

    # Send the input query directly to the Groq API
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"You are a friendly and helpful chatbot. Your task is to answer all questions asked of you in a straightforward and Respectfull manner. If the sentiment of the user's input is low (as determined by a sentiment score between 0 and 1), respond with extra consideration and empathy. Provide support and understanding in your replies, aiming to uplift the user and address their concerns with care. For all other inputs, continue to answer questions in a straightforward and informative manner : {global_sentiment_score}"
            },
            {
                "role": "user",
                "content": f"{Adversarial_Checked_query}",
            }
        ],
        model="llama3-70b-8192",
    )

    end = time.time()
    print("Time taken:", end - start, "secs")

    # Retrieve and return the response from Groq
    groq_response = chat_completion.choices[0].message.content
    return groq_response

# # Test the function with a sample input
# General_Output = get_response_General("What are some good productivity tips?")
# print(General_Output)


In [None]:
General_Output = get_response_General(NLP_Keywords)
print(General_Output)

Time taken: 1.8009624481201172 secs
Staying productive while working from home can be a challenge, but with the right strategies, you can stay focused and achieve your goals. Here are some tips to help you stay productive while working from home:

1. **Create a dedicated workspace**: Designate a specific area of your home as your workspace and keep it organized and clutter-free. This will help you establish a clear boundary between work and personal life.

2. **Establish a routine**: Set a schedule for your workday and stick to it, just as you would if you were working in an office. This will help you stay on track and avoid procrastination.

3. **Minimize distractions**: Eliminate or minimize distractions such as TV, social media, and personal phone use during work hours. Use tools like website blockers or apps that help you stay focused.

4. **Take breaks**: Working long hours without taking breaks can lead to burnout. Take short breaks every hour to refresh your mind and recharge yo

# **Encryption Part 2**

In [None]:
# Encrypt the query
encrypted_query = encrypt_data(Query, key)
print(encrypted_query)

In [None]:
# Decrypt the query
decrypted_query = decrypt_data(encrypted_query, key)
print(decrypted_query)