In [2]:
import json
import faiss
import numpy as np
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer

# Function to extract text from a PDF using pypdf
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text_chunks = []
    
    for page in reader.pages:
        text = page.extract_text()
        if text:
            paragraphs = text.split("\n\n")  # Split text into paragraphs
            text_chunks.extend([p.strip() for p in paragraphs if p.strip()])
    
    return text_chunks

# Load sentence embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Path to the PDF file
pdf_path = "The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf"

# Extract text chunks from the PDF
chunks = extract_text_from_pdf(pdf_path)

# Generate embeddings for each chunk
embeddings = model.encode(chunks, convert_to_numpy=True)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Store text chunks with embeddings
data = [{"text": chunk, "embedding": embedding.tolist()} for chunk, embedding in zip(chunks, embeddings)]

# Save FAISS index to a file
faiss.write_index(index, "medical_faiss_index.bin")

# Save extracted text and embeddings to a JSONL file
with open("medical_data.jsonl", "w") as f:
    for entry in data:
        f.write(json.dumps(entry) + "\n")

print(f"Processed {len(chunks)} text chunks. Embeddings stored successfully!")


  from .autonotebook import tqdm as notebook_tqdm


Processed 759 text chunks. Embeddings stored successfully!


In [1]:
# Chatbot function
def chatbot(query):
    query_embedding = model.encode([query])
    D, I = index.search(query_embedding, k=1)  # Get top-1 match
    
    if I[0][0] != -1:  # If a match is found
        return data[I[0][0]]["text"]
    else:
        return "I'm sorry, I don't have relevant information on that."  

# Test chatbot
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        break
    response = chatbot(user_input)
    print("Bot:", response)


NameError: name 'model' is not defined