In [None]:
!pip install faiss-cpu PyPDF2 sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2, faiss-cpu, sentence-transformers
Succes

# **Creating Chunks and Embeddings**
Text Cleanup: Ensures that extracted text does not have excessive spaces or newlines, making sentence detection more reliable.
Chunk Merging: Dynamically merges sentences into chunks that are contextually coherent, targeting 50-100 words.
Sentence Splitting with SpaCy: Uses NLP to accurately split text into sentences, which are then grouped to maintain context

In [None]:
import os
import re
import faiss
import pickle
import spacy
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer

nlp = spacy.load('en_core_web_sm')

def extract_text_from_pdf(pdf_path):
    # Extract text from a PDF file and clean it up
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            # Clean up text by removing excessive newlines and unnecessary spaces
            page_text = re.sub(r'\n+', ' ', page_text).strip()
            text += page_text + " "
    return text

def context_aware_chunking(text, min_length=50, max_length=100):
    # Chunk text into contextually meaningful blocks of sentences.
    # Split into sentences using SpaCy
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]

    chunks = []
    current_chunk = []
    current_length = 0

    for sentence in sentences:
        sentence_length = len(sentence.split())
        if current_length + sentence_length <= max_length:
            current_chunk.append(sentence)
            current_length += sentence_length
        else:
            # Only save chunk if it's longer than min_length
            if current_length >= min_length:
                chunks.append(' '.join(current_chunk))
            current_chunk = [sentence]
            current_length = sentence_length

    # Add the last chunk if it meets the min_length criteria
    if current_chunk and current_length >= min_length:
        chunks.append(' '.join(current_chunk))

    return chunks

def save_chunks_and_embeddings(pdf_path, index_file='faiss_index.index'):
    # Extract text from PDF, create chunks, generate embeddings, and save to FAISS index.
    text = extract_text_from_pdf(pdf_path)
    chunks = context_aware_chunking(text)

    # Load the model
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(chunks, convert_to_tensor=True).cpu().numpy()

    # Save chunks to a txt file
    with open('chunks.txt', 'w') as f:
        for chunk in chunks:
            f.write(f"{chunk}\n")

    # Save embeddings to FAISS
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    faiss.write_index(index, index_file)

    # print(f"Chunks and embeddings saved. Number of chunks: {len(chunks)}")

  from tqdm.autonotebook import tqdm, trange


In [None]:
# Example usage:
pdf_path = '/content/Chemistry pdf.pdf'
save_chunks_and_embeddings(pdf_path)

Chunks and embeddings saved. Number of chunks: 1404


Printing the chunks for verification

In [None]:
def print_chunks(chunks_file='chunks.txt'):
    # Print the chunks saved in the chunks.txt file.
    try:
        with open(chunks_file, 'r') as file:
            chunks = file.readlines()

        print(f"Total Chunks: {len(chunks)}\n")
        for i, chunk in enumerate(chunks):
            print(f"Chunk {i+1}:\n{chunk.strip()}\n{'-'*40}\n")
    except FileNotFoundError:
        print(f"File {chunks_file} not found. Please ensure the file path is correct.")

# Example usage:
print_chunks()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Chunk 155:
a. 45.26 ft        b. 0.109 in    c. 0.00025 kg        d. 2.3659 × 10-8 cm       e. 52.0 cm3       f. 0.00020 kg   g. 8.50 × 104 mm     h. 300.0 cg B. Round off each of the following  quantities to two significant figures :  a. 25.55 mL     b. 0.00254 m    c. 1.491 × 105 mg   d. 199 g C. Round off each of the following  quantities to three significant figures :   a. 1.43 cm3   b. 458 × 102 cm     c. 643 cm2 d. 0.039 m
----------------------------------------

Chunk 156:
e. 6.398 × 10-3 km  f. 0.0179 g   g. 79,000 m      h. 42,150   i. 649.85;     j. 23,642,000 mm  k. 0.0041962 kg D. Express the following sum to appropriate  number of significant figures :  a. 2.3 × 103 mL + 4.22 × 104 mL + 9.04 ×       103 mL + 8.71 × 105 mL;  b. 319.5 g - 20460 g - 0.0639 g - 45.642  g       - 4.173 g 4. Solve the following problems  A. Express the following quantities in  exponential terms.   a. 0.0003498  b. 235.4678    c. 7

# **Sematic Search based chunk retrival**

In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load pre-trained MiniLM-v6 model for embedding
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load the FAISS index
index_file = '/content/faiss_index.index'


def retrieve_semantic_chunks(query, top_k=5):
    index = faiss.read_index(index_file)
    # Step 1: Embed the query using MiniLM-v6
    query_embedding = model.encode(query, convert_to_tensor=True).reshape(1, -1)  # Ensure shape is (1, d)

    # Step 2: Perform a FAISS search to retrieve top K relevant chunks
    distances, indices = index.search(query_embedding, top_k)

    # Step 3: Apply late interaction techniques - re-rank results
    refined_results = late_interaction_refinement(indices[0], distances[0])

    return refined_results

def late_interaction_refinement(indices, distances):
    index = faiss.read_index(index_file)
    # Ensure distances is a NumPy array
    distances = np.array(distances)

    # Normalize distances (lower distance = more relevant)
    max_distance = np.max(distances) if distances.size > 0 else 1
    normalized_scores = 1 - (distances / max_distance)

    # Re-rank based on normalized scores
    sorted_indices = np.argsort(normalized_scores)[::-1]  # Higher scores first

    # Load and return refined chunks, ranked by relevance
    refined_chunks = [load_chunk_from_index(indices[idx]) for idx in sorted_indices]

    return refined_chunks

def load_chunk_from_index(index):
    with open('/content/chunks.txt', 'r') as f:
        chunks = f.readlines()
    return chunks[index].strip()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Example usage:
query = "What is the chemical Equilibrium? Give me some chemical reactions"
relevant_chunks = retrieve_semantic_chunks(query)

print("Top relevant chunks:")
for chunk in relevant_chunks:
    print(f"- {chunk}")

Top relevant chunks:
- The reactant is KClO3 and its 2 molecules  appear in the balanced equation.∴ Rate   α [KClO 3]2 ∴ Rate = k [KClO3]2 At equilibrium the ratio of product  multiplicative term denoting the ratio of  concentraton of products to that of the reactants is unchanged and equals K C. The value of KC  depends upon the temperature. It is interesting to note that though the concentration ratio remains unchanged, both the forward as well as reverse reactions do proceed at equilibrium, but at the same rate. Therefore the chemical equilibrium is a dynamic equilibrium.
- [H2O(g)]2 KC = 1 KC  =  1 2.4 x 1047 KC = 4.1 x 10-48 = 0.41 x 10-47 at 500 K 1. Value of KC is very high (KC > 103). 1. Value of KC is very low (KC < 10-3). 2. At equilibrium there is a high proportion of  products compared to reactants.2. At equilibrium, only a small fraction of the reactants are converted into products. 3. Forward reaction is favoured. 3. Reverse reaction is favoured. 4. Reaction is in favour 

# **Keywords + sentiment analysis**

In [None]:
!pip install KeyBERT

Collecting KeyBERT
  Downloading keybert-0.8.5-py3-none-any.whl.metadata (15 kB)
Downloading keybert-0.8.5-py3-none-any.whl (37 kB)
Installing collected packages: KeyBERT
Successfully installed KeyBERT-0.8.5


In [None]:
import torch
from transformers import pipeline
from keybert import KeyBERT

# Global variable to store sentiment score
global_sentiment_score = None

def extract_keywords(query):
    # Initialize the KeyBERT model
    model = KeyBERT('distilbert-base-nli-mean-tokens')

    # Extract keywords
    keywords = model.extract_keywords(query)

    return keywords

def get_sentiment(text, sentiment_pipeline):
    result = sentiment_pipeline(text)[0]
    label = result['label']
    score = result['score']

    if label == 'POSITIVE':
        return "Positive", score
    else:
        return "Negative", 1 - score

def process_query(Adversarial_Checked_Query):
    global global_sentiment_score

    # Sentiment analysis using DistilBERT fine-tuned on SST-2
    sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=0 if torch.cuda.is_available() else -1)

    # Extract keywords
    keywords = extract_keywords(Adversarial_Checked_Query)

    # Analyze sentiment
    sentiment, sentiment_score = get_sentiment(Adversarial_Checked_Query, sentiment_pipeline)

    # Store sentiment score globally
    global_sentiment_score = sentiment_score
    return keywords


In [None]:
# Process the query and store the results
query = "What is the chemical Equilibrium? Give me some chemical reactions"
NLP_Keywords = process_query(query)
print(NLP_Keywords)

[('chemical', 0.7168), ('reactions', 0.5757), ('equilibrium', 0.5178)]


# **Keyword based chunk retrival**

In [None]:
import re
from collections import defaultdict

def keyword_based_search(keywords, top_k=5):
    # Load chunks from file
    with open('/content/chunks.txt', 'r') as f:
        chunks = f.readlines()

    # Initialize a dictionary to hold relevance scores for each chunk
    chunk_scores = defaultdict(float)

    # Search for keywords in chunks and calculate scores
    for i, chunk in enumerate(chunks):
        chunk_text = chunk.strip()
        for keyword, relevance in keywords:
            # Use regular expression to count occurrences of the keyword in the chunk
            keyword_count = len(re.findall(r'\b' + re.escape(keyword) + r'\b', chunk_text, re.IGNORECASE))
            # Update chunk score based on keyword relevance and occurrence
            chunk_scores[i] += keyword_count * relevance

    # Rank chunks based on the calculated scores
    ranked_chunks = sorted(chunk_scores.items(), key=lambda item: item[1], reverse=True)

    # Get top K relevant chunks
    top_chunks = [chunks[idx].strip() for idx, score in ranked_chunks[:top_k]]

    return top_chunks

In [None]:
# Example usage:

relevant_chunks = keyword_based_search(NLP_Keywords, 5)

print("Top relevant chunks based on keywords:")
for chunk in relevant_chunks:
    print(f"- {chunk}")


Top relevant chunks based on keywords:
- The nuclear  transmutation is transformation of a stable nucleus into another nucleus be it stable or unstable. The nuclear transmutation where the product nucleus is radioactive is called artificial radioactivity. Table 13.3 : Comparison of chemical reactions  and nuclear reactions Chemical Reactions Nuclear Reactions 1. Rearrangement of  atoms by breaking and forming of chemical bonds.1. Elements or isotopes of one elements are converted into another element in a nuclear reaction. 2. Different isotopes of an element have same behaviour.2. Isotopes of an element behave differently. 198 3. Only outer shell  electrons take part in the chemical reaction.3.
- Contd on next page 18 2.5 Chemical reactions and stoichiometric calculations  Calculation based on  a balanced chemical equations are known as stoichiometric calculations. Balanced chemical equation is symbolic representation of a chemical reaction. It supplies the  following information which

# **Dynamic Context Size Adjustment**
Dynamically adjusts the amount of retrieved context fed into the generative model based on query complexity. For more complex queries, we pull additional context to improve the quality of the generated response.

In [None]:
import re
def combine_and_prioritize_chunks(semantic_chunks, keyword_chunks):
    # Combine chunks from both sources
    combined_chunks = semantic_chunks + keyword_chunks

    # Remove duplicates while preserving order
    seen_chunks = set()
    unique_chunks = []
    for chunk in combined_chunks:
        if chunk not in seen_chunks:
            seen_chunks.add(chunk)
            unique_chunks.append(chunk)

    return unique_chunks

def assess_query_complexity(query):
    # Define complexity based on query length and keyword count
    query_length = len(query.split())
    keywords = process_query(query)
    keyword_count = len(keywords)

    # Simple heuristic: complexity increases with query length and keyword count
    complexity_score = query_length * keyword_count
    print(complexity_score)
    return complexity_score

def dynamic_context_adjustment(query, semantic_top_k=5, keyword_top_k=5):
    # Assess query complexity
    complexity_score = assess_query_complexity(query)

    # Define thresholds to adjust context size
    if complexity_score > 20:
        # High complexity: retrieve more chunks
        semantic_top_k = 10
        keyword_top_k = 10
    elif complexity_score > 10:
        # Medium complexity: default amount
        semantic_top_k = 7
        keyword_top_k = 7
    else:
        # Low complexity: retrieve fewer chunks
        semantic_top_k = 5
        keyword_top_k = 5

    # Retrieve chunks using pre-made functions
    semantic_chunks = retrieve_semantic_chunks(query)
    keywords = process_query(query)
    keyword_chunks = keyword_based_search(keywords, top_k=keyword_top_k)
    # print("Semantic Chunks:", semantic_chunks)
    # print("Keyword Chunks:", keyword_chunks)

    # Combine, prioritize, and remove duplicates
    context_chunks = combine_and_prioritize_chunks(semantic_chunks, keyword_chunks)

    separator=' '
    cleaned_text = separator.join(context_chunks)

    return cleaned_text


def clean_text(text):
    # Remove spaces between individual characters (within words)
    text = re.sub(r'(?<=\w)\s(?=\w)', '', text)

    # Replace multiple spaces between words with a single space
    text = re.sub(r'\s+', ' ', text).strip()

    return text



In [None]:
# Example usage:
query = "Detection of diabetic retinopathy in retinal fundus images"
context_chunks = dynamic_context_adjustment(query)

print(context_chunks)

40


IndexError: list index out of range

# **Generating Responses**

In [None]:
# Groq installation
!pip install groq

Collecting groq
  Downloading groq-0.11.0-py3-none-any.whl.metadata (13 kB)
Collecting httpx<1,>=0.23.0 (from groq)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading groq-0.11.0-py3-none-any.whl (106 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.5/106.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB

# **General Response code**

In [None]:
import time
from groq import Groq

# Initialize the Groq client with your API key
client = Groq(
    api_key='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',
)

# Function to get the response for a general query
def get_response_General1(context_chunks,global_sentiment_score,query):
    start = time.time()

    # Send the input query directly to the Groq API
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"You are a general Chatbot, Answer all queries asked to you with taking ONLY the provided chunks as reference. If the required data is not present in the chunks, respond with 'Data not found in the Document'. Format it properly for the frontend (only give string with spaces etc. dont add '*' for bold). Chunks : {context_chunks}"
            },
            {
                "role": "user",
                "content": f"{query}",
            }
        ],
        model="llama3-70b-8192",
    )
    # print(context_chunks)
    end = time.time()
    # print("Time taken:", end - start, "secs")

    # Retrieve and return the response from Groq
    groq_response = chat_completion.choices[0].message.content
    return groq_response

# # Test the function with a sample input
# General_Output = get_response_General("What are some good productivity tips?")
# print(General_Output)


In [None]:
# Example usage:
query = "What is the chemical Equilibrium? Give me some chemical reactions"

# print(context_chunks)
# # Test the function with a sample input
General_Output = get_response_General(context_chunks,global_sentiment_score,query)
print(General_Output)
# print(response)

Chemical equilibrium is a dynamic state in which the concentration of reactants and products in a chemical reaction remain constant over time, but not necessarily equal. This means that the forward reaction rate equals the reverse reaction rate, and the system is in a stable state.

Here are some examples of chemical reactions:

1. 2SO2(g) + O2(g) → 2SO3(g)
2. N2O4(g) → 2NO2(g)
3. H2(g) + I2(g) → 2HI(g)
4. N2(g) + 3H2(g) → 2NH3(g) (Haber process)
5. BaSO4(s) → Ba2+(aq) + SO42-(aq)
6. AgCl(s) → Ag+(aq) + Cl-(aq)

Note: (g) denotes gas phase, (s) denotes solid phase, and (aq) denotes aqueous solution.


# **Conditioned response gork**

In [None]:
import time
from groq import Groq

# Initialize the Groq client with your API key
client = Groq(
    api_key='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',
)

# Function to get the response for a general query based on document context
def get_response_General(global_sentiment_score, context_chunks, query):
    start = time.time()

    # Modify sentiment-based tone of response
    # if global_sentiment_score < 0.4:
    #     tone = "Answer the questions with empathy and support. The user may be upset or concerned, so please be considerate."
    # else:
    #     tone = "Answer the questions in a straightforward and informative manner."

    # Prepare the prompt to answer based on document content and handle missing data
    prompt = (
        f"You are a helpful assistant. Your job is to answer the following query based on the given document content. "
        f"If the required information is not found in the document, respond with 'Requested Data not found in the document' for that part of the question. "
        f"If the information is found, use only the provided context for answering the query. If adding extra information, explicitly mention so."
        f"Please ensure that all aspects of the query are addressed.\n\n"
        f"Context (from document): {context_chunks}\n\n"
        f"User Query: {query}\n\n"
        # f"Sentiment score: {global_sentiment_score} ({tone})"
    )

    # Send the input query with context to the Groq API
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are an AI assistant Chatbot : Docufy. respond with proper answer formatting for frontend without '*',etc."
            },
            {
                "role": "user",
                "content": prompt
            },
            {
                "role": "user",
                "content": f"{query}",
            }
        ],
        model="llama3-70b-8192",
    )
    # print(context_chunks)
    end = time.time()
    # print("Time taken:", end - start, "secs")

    # Retrieve and return the response from Groq
    groq_response = chat_completion.choices[0].message.content
    return groq_response


In [None]:
# Example usage
query = " Detection of diabetic retinopathy in retinal fundus images "

output = get_response_General(global_sentiment_score, context_chunks, query)
print(output)

Requested Data not found in the document.


In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:
from flask import Flask, request, render_template_string, redirect, url_for
from pyngrok import ngrok
import os

app = Flask(__name__)

# Set ngrok authtoken
ngrok.set_auth_token("2lVyfZ9K1OoCqvIhUdFGCf9CADp_7FMRjdUHMDtGztzJJXu2d")

# HTML templates
html_upload_pdf = '''
<!DOCTYPE html>
<html>
<head>
    <title>AI Chatbot</title>
</head>
<body style="background-color: #2C2F33; color: white;">
    <h1>Upload PDF Document</h1>
    <form action="/upload" method="POST" enctype="multipart/form-data">
        <input type="file" name="pdf" accept="application/pdf" required>
        <input type="submit" value="Upload">
    </form>
</body>
</html>
'''

html_chatbot = '''
<!DOCTYPE html>
<html>
<head>
    <title>AI Chatbot</title>
</head>
<body style="background-color: #2C2F33; color: white;">
    <h1>AI Chatbot</h1>
    <form action="/query" method="POST">
        <label for="query">Enter your query:</label><br>
        <input type="text" id="query" name="query" required><br><br>
        <input type="submit" value="Submit">
    </form>
    <form action="/" method="GET">
        <input type="submit" value="Change PDF">
    </form>
    <h2>Response:</h2>
    <p>{{ result }}</p>
</body>
</html>
'''

# Global variables for embeddings and chunks
pdf_uploaded = False

@app.route('/', methods=['GET'])
def upload_pdf():
    return render_template_string(html_upload_pdf)

@app.route('/upload', methods=['POST'])
def handle_upload():
    global pdf_uploaded
    file = request.files['pdf']
    if file:
        # Save the PDF to the environment
        file_path = os.path.join(os.getcwd(), "user_document.pdf")
        file.save(file_path)

        # Generate chunks and embeddings (done only once when PDF is uploaded)
        save_chunks_and_embeddings('/content/user_document.pdf')
        pdf_uploaded = True

        # Redirect to the chatbot page
        return redirect(url_for('chatbot'))
    return "PDF upload failed"

@app.route('/chatbot', methods=['GET', 'POST'])
def chatbot():
    global context_chunks
    if request.method == 'POST':
        query_input = request.form['query']
        if query_input:
            # Process the query using the context_chunks
            context_chunks = dynamic_context_adjustment(query_input)
            result = get_response_General(global_sentiment_score, context_chunks, query)
            return render_template_string(html_chatbot, result=result)
    return render_template_string(html_chatbot, result="")

if __name__ == '__main__':
    # Setup Ngrok tunnel
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel: ", public_url)

    # Run the Flask app
    app.run(port=5000)

 * ngrok tunnel:  NgrokTunnel: "https://9872-34-91-64-92.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:31:23] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:31:24] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:32:21] "[32mPOST /upload HTTP/1.1[0m" 302 -
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:32:22] "GET /chatbot HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:33:12] "[33mPOST /query HTTP/1.1[0m" 404 -


In [None]:
pdf_path1 = "/content/user_document.pdf"
save_chunks_and_embeddings(pdf_path1)

Chunks and embeddings saved. Number of chunks: 284


In [None]:
def final_function(query):

    #code to upload the pdf
    # collect the uploaded pdf and save as /content/user_document.pdf
    pdf_path = "/content/AI in health and healthcare.pdf"
    # function to create chunks and embeddings
    save_chunks_and_embeddings(pdf_path)

    # code to get the query from the user
    # query = uploaded_query
    # Process the query using the context_chunks
    context_chunks = dynamic_context_adjustment(query)
    print(context_chunks)
    result = get_response_General(global_sentiment_score, context_chunks, query)
    print(result)
    return result



In [None]:
uploaded_query = " Detection of diabetic retinopathy in retinal fundus images "
result = final_function(uploaded_query)
print(result)

40
[19 ].   Early screening for diabetic retinopathy is important as early treatment can  prevent vision loss and blindness in the rapidly growing population of patients with diabetes.   Such screening also provides the opportunity to identify other eye diseases, as well as providing indicators of cardiovascular disease.      T he increasing need for such screening, and the demands for expert ana lysis that it creates,  motivates the goal of low cost, quantitative retinal image analysis.   Routine imaging for screening uses the specially designed optics of a ‘fundus camera,’ with several images taken at differen t orientations (fields, see Figure  2) 2010:   Retinal Imaging and Image Analysis, Michael D. Abràmoff,  Mona K. Garvin,  Milan Sonka,  IEEE Trans Med Imaging. 2010 January 1; 3: 169–208. doi:10.1109/RBME.2010.2084567.    20.   E YEPACS LLC PHOTOGRAPHER MANUAL  Downloaded June 2017:   https://www.eyepacs.org/photographer/protocol.jsp#image_right      21.   2016:   Retinal Imagi

In [None]:
from flask import Flask, request, render_template_string, redirect, url_for
from pyngrok import ngrok
import os

app = Flask(__name__)

# Set ngrok authtoken
ngrok.set_auth_token("2lVyfZ9K1OoCqvIhUdFGCf9CADp_7FMRjdUHMDtGztzJJXu2d")

# HTML templates
html_upload_pdf = '''
<!DOCTYPE html>
<html>
<head>
    <title>Docufy AI Document based Chatbot</title>
</head>
<body style="background-color: #2C2F33; color: white;">
    <h1>Upload PDF Document</h1>
    <form action="/upload" method="POST" enctype="multipart/form-data">
        <input type="file" name="pdf" accept="application/pdf" required>
        <input type="submit" value="Upload">
    </form>
</body>
</html>
'''

# Modified chatbot HTML to match your new UI styling and removed the side buttons
html_chatbot = '''
<!DOCTYPE html>
<html>
<head>
    <title>LawGPT Version Alpha</title>
    <style>
        body { margin: 0; padding: 0; font-family: 'Arial', sans-serif; background-color: #0A0A23; color: #ffffff; }
        .main { width: 100%; padding: 20px; display: flex; flex-direction: column; justify-content: space-between; }
        .header { display: flex; justify-content: space-between; align-items: center; }
        .chat-window { background-color: #0F0F1F; padding: 20px; margin: 20px 0; border-radius: 10px; overflow-y: auto; position: relative; }
        .chat-message { margin-bottom: 20px; }
        .user-query { font-weight: bold; }
        .chat-response p { margin: 5px 0; line-height: 1.5; }
        .input-area { display: flex; align-items: center; }
        .input-area input { width: 90%; padding: 10px; border-radius: 5px; border: 1px solid #3E3E5A; background-color: #0F0F1F; color: #ffffff; margin-right: 10px; }
        .input-area button { background-color: #E94560; color: #ffffff; padding: 10px 20px; border: none; cursor: pointer; border-radius: 5px; }
    </style>
</head>
<body>
    <div class="main">
        <div class="header">
            <span class="model">Model: All-MiniLM LawGPT</span>
        </div>
        <div class="chat-window">
            {% if query %}
            <div class="chat-message user-query">
                <p>{{ query }}</p>
            </div>
            {% endif %}
            {% if result %}
            <div class="chat-message chat-response">
                <p>{{ result }}</p>
            </div>
            {% endif %}
        </div>
        <div class="input-area">
            <form method="post">
                <input type="text" id="query" name="query" placeholder="Enter your query here">
                <button type="submit">Send</button>
            </form>
        </div>
    </div>
</body>
</html>
'''

@app.route('/', methods=['GET'])
def upload_pdf():
    return render_template_string(html_upload_pdf)

@app.route('/upload', methods=['POST'])
def handle_upload():
    file = request.files['pdf']
    if file:
        # Save the PDF to the environment
        file_path = os.path.join(os.getcwd(), "user_document.pdf")
        file.save(file_path)
        print(file_path)
        # Generate chunks and embeddings (done only once when PDF is uploaded)
        save_chunks_and_embeddings(file_path)

        # Redirect to the chatbot page
        return redirect(url_for('chatbot'))
    return "PDF upload failed"

@app.route('/chatbot', methods=['GET', 'POST'])
def chatbot():
    global context_chunks
    if request.method == 'POST':
        query_input = request.form['query']
        if query_input:
            # Process the query using the context_chunks
            query = query_input
            context_chunks = dynamic_context_adjustment(query)
            result = get_response_General(global_sentiment_score, context_chunks, query)
            return render_template_string(html_chatbot, result=result, query=query_input)
    return render_template_string(html_chatbot, result="", query="")

if __name__ == '__main__':
    # Setup Ngrok tunnel
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel: ", public_url)

    # Run the Flask app
    app.run(port=5000)


 * ngrok tunnel:  NgrokTunnel: "https://e1f9-34-91-64-92.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:33:45] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:33:46] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


/content/user_document.pdf


INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:34:32] "[32mPOST /upload HTTP/1.1[0m" 302 -
INFO:werkzeug:127.0.0.1 - - [09/Sep/2024 15:34:32] "GET /chatbot HTTP/1.1" 200 -
ERROR:__main__:Exception on /chatbot [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 2529, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1825, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1823, in full_dispatch_request
    rv = self.dispatch_request()
  File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1799, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "<ipython-input-38-f77fa5f1e432>", line 101, in chatbot
    context_chunks = dynamic_context_adjustment(query)
  File "<ipython-input-35-c15da1a66905>", line 46, in dynamic

40
