In [1]:
from pydantic import BaseModel  # Import Pydantic for data validation
import pymongo  # Import PyMongo for MongoDB connection
import traceback  # Import traceback for error handling
import os, sys  # Import OS and SYS for system operations

### Setting Up MongoDB for storing and managing chat history for the document based AI application

In [2]:
import pymongo
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Connect to MongoDB
try:
    client = pymongo.MongoClient(os.getenv("MONGO_URL"), tls=True, tlsAllowInvalidCertificates=True)
    db = client["chat_with_doc"]
    print("✅ Successfully connected to MongoDB!")
except Exception as e:
    print("❌ MongoDB Connection Failed:", e)

✅ Successfully connected to MongoDB!


In [6]:
import openai
import os
from dotenv import load_dotenv

load_dotenv() 
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 

try:
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "Hello, how are you?"}]
    )
    print("OpenAI API is working! Response:", response.choices[0].message.content)
except Exception as e:
    print("❌ OpenAI API Error:", e)

OpenAI API is working! Response: Hello! I'm just a virtual assistant, so I don't have feelings, but thank you for asking. How can I assist you today?


In [None]:
pip install langchain langchain-openai langchain-community faiss-cpu boto3 awswrangler pymupdf

In [None]:
%pip install fastapi uvicorn nest_asyncio pyngrok boto3 fitz


In [4]:
import boto3
import fitz  # PyMuPDF
import io

s3 = boto3.client("s3")  

def extract_text_from_s3(bucket_name="ai-document-storage", s3_key="ai_document.pdf"):
    """Reads a PDF from S3 and extracts text."""
    try:
        obj = s3.get_object(Bucket=bucket_name, Key=s3_key)  
        pdf_stream = io.BytesIO(obj["Body"].read())  
        doc = fitz.open(stream=pdf_stream, filetype="pdf") 

        pages = [page.get_text("text") for page in doc]  
        return pages  

    except Exception as e:
        print(f"❌ Failed to process PDF from S3: {e}")
        return None

# Extract text from S3 PDF
pdf_text = extract_text_from_s3()

if pdf_text:
    for i, page in enumerate(pdf_text[:3]):
        print(f"Page {i+1}:\n{page}\n")

Page 1:
See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/383212769
AI in Healthcare: Revolutionizing Diagnosis and Therapy
Article  in  International Journal of Multidisciplinary Sciences and Arts · July 2024
DOI: 10.47709/ijmdsa.v3i3.4546
CITATIONS
31
READS
777
4 authors, including:
Nasrullah Abbasi
Independent Researcher
15 PUBLICATIONS   132 CITATIONS   
SEE PROFILE
All content following this page was uploaded by Nasrullah Abbasi on 18 August 2024.
The user has requested enhancement of the downloaded file.


Page 2:
 
 
International Journal of 
Multidisciplinary Sciences and Arts 
E-ISSN : 2962-1658 
Volume 3, Number 3, July 2024 
https://doi.org/10.47709/ijmdsa.v3i3.4546  
 
  
 
This is an Creative Commons License This work is licensed under a Creative 
Commons Attribution-NonCommercial 4.0 International License. 
118 
 
AI in Healthcare: Revolutionizing Diagnosis and Therapy 
 
Shah Zeb1, Nizamullah FNU2, Nasrullah Abb

#### Testing the FAISS Search Functionality, confirming FAISS search works correctly—i.e., when the user provides input, it should return relevant results from the stored data.

In [None]:
import requests
url = "http://127.0.0.1:8000/process-pdf"

# Send a POST request to process the PDF stored in S3
response = requests.post(url)

print(response.status_code)  
print(response.json())  

200
{'message': '✅ PDF processed and stored in FAISS successfully!'}


In [None]:
import requests
import json

# the URL for the FastAPI server
url = "http://127.0.0.1:8000/chat"

# Create the payload for the POST 
payload = {
    "session_id": "test_session_1",  # This can be any session ID
    "user_input": "What is AI in healthcare?",  
    "data_source": "PDF"  
}

# Send the POST request
response = requests.post(url, json=payload)

print(response.status_code)  
print(response.json())  

200
{'response': "AI in healthcare refers to the utilization of machine learning algorithms and cognitive technologies to aid in the diagnosis, treatment, and overall management of patient care. It involves advanced image recognition and pattern analysis to help clinicians identify diseases earlier, leading to more effective treatments and improved patient outcomes. The integration of AI technology in healthcare is revolutionizing various aspects of the industry, including drug discovery, surgical robotics, diagnostics, personalized treatment, natural language processing, AI-enhanced telemedicine, wearable health technologies, and ethical AI governance. As AI technology continues to advance, its impact on healthcare is expected to become increasingly significant, driving progress in patient care, operational efficiency, and medical research with collaborative efforts among technologists, clinicians, researchers, and policymakers playing a crucial role in maximizing AI's potential.", 's

In [24]:
import requests

url = "http://127.0.0.1:8000/chat"
data = {
    "session_id": "",
    "user_input": "why is ai important in health care.",
    "data_source": "ai_document.pdf"
}

response = requests.post(url, json=data)
print("Raw Response Status:", response.status_code)
print("Raw Response Text:", response.text)

Raw Response Status: 500
Raw Response Text: {"error":"Failed to generate AI response"}


### Read the PDF from S3

In [None]:
import boto3
import fitz  # PyMuPDF
import io

# AWS S3 Client
s3 = boto3.client("s3")

bucket_name = 'ai-document-storage'  
s3_key = 'ai_document.pdf'  

# Fetch file from S3
response = s3.get_object(Bucket=bucket_name, Key=s3_key)
pdf_file = io.BytesIO(response['Body'].read())

# Ensure the BytesIO object is correctly passed to PyMuPDF
pdf_bytes = pdf_file.read()  # Read the bytes of the PDF
pdf_file.close()  # Close the BytesIO object after reading

# Open the PDF file using PyMuPDF with the bytes
doc = fitz.open(stream=pdf_bytes, filetype="pdf")

# Verify the PDF is loaded by printing the first page's text
first_page_text = doc[0].get_text("text")
print(first_page_text)

See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/383212769
AI in Healthcare: Revolutionizing Diagnosis and Therapy
Article  in  International Journal of Multidisciplinary Sciences and Arts · July 2024
DOI: 10.47709/ijmdsa.v3i3.4546
CITATIONS
31
READS
777
4 authors, including:
Nasrullah Abbasi
Independent Researcher
15 PUBLICATIONS   132 CITATIONS   
SEE PROFILE
All content following this page was uploaded by Nasrullah Abbasi on 18 August 2024.
The user has requested enhancement of the downloaded file.



###  Modify the document content

In [None]:
# Content to add to the PDF
new_content = """
# E-Health at Psymeon
The e-health sector of Psymeon includes the POCKET-Therapist series of applications. These digital health tools are designed to help manage mental health conditions such as depression, anxiety, PTSD, and substance use disorders. They can be used preventively or as a complement to ongoing therapy.

## ALVIE - A Digital Health Application
ALVIE is a digital health application developed by Psymeon to support individuals with depression. It offers tools for managing depressive symptoms and can be used alongside therapy. ALVIE is part of the POCKET-Therapist series, which aims to bridge gaps in care for mental health patients.

# Consulting Services
Psymeon offers professional consulting services to help leaders overcome stress, uncertainty, and challenges in both personal and professional areas. These services aim to provide individuals with the support they need during difficult times.

# Research and AI in Medicine
Psymeon is at the forefront of applying artificial intelligence in medicine, particularly in the areas of mental health and neuroscience. We are actively developing AI-driven solutions to improve diagnostics, treatment plans, and patient outcomes. Our research focuses on understanding brain function and mental health disorders, and creating innovative treatment methods.
"""

# Add the new content to the last page of the PDF
page_num = doc.page_count - 1  # Use the last page
page = doc.load_page(page_num)

# Add the new content (this will add the content in the form of a text block)
page.insert_text((50, 50), new_content, fontsize=12)

# Save the modified PDF to a new file or overwrite the existing file
modified_pdf_path = "/tmp/modified_ai_document.pdf"
doc.save(modified_pdf_path)

print(f"New content added to the PDF. Saved at {modified_pdf_path}")

In [20]:
import os

modified_pdf_path = "/tmp/modified_ai_document.pdf"
if os.path.exists(modified_pdf_path):
    print(f"File exists at: {modified_pdf_path}")
else:
    print("File not found.")

File exists at: /tmp/modified_ai_document.pdf


###  Upload the modified file to S3 bucket.

In [None]:
import boto3

# S3 Client setup
s3 = boto3.client("s3")

bucket_name = "ai-document-storage"  #
file_path = "/tmp/modified_ai_document.pdf" 
s3_key = "ai_document.pdf"  

# Upload the modified file to S3
s3.upload_file(file_path, bucket_name, s3_key)

print(f"Successfully uploaded {file_path} to S3 with key {s3_key}")

Successfully uploaded /tmp/modified_ai_document.pdf to S3 with key ai_document.pdf


In [None]:
import boto3
import io
import fitz  # PyMuPDF for reading PDF files

# S3 configuration
bucket_name = "ai-document-storage"
s3_key = "ai_document.pdf"  

# AWS S3 client
s3 = boto3.client('s3')

# Function to read PDF from S3 and extract text
def extract_pdf_text_from_s3(bucket_name, s3_key):
    try:
        # Get the PDF file from S3
        obj = s3.get_object(Bucket=bucket_name, Key=s3_key)
        pdf_file = io.BytesIO(obj['Body'].read())

        # Open the PDF file using PyMuPDF
        doc = fitz.open(stream=pdf_file)  # Use 'stream' to open from the BytesIO object
        
        # Extract text from all pages
        text = ""
        for page_num in range(doc.page_count):
            page = doc.load_page(page_num)
            text += page.get_text("text")
        
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return None

# Function to search for the added content in the extracted text
def search_for_added_content(extracted_text, keyword):
    if keyword.lower() in extracted_text.lower():
        print(f"Found the keyword: '{keyword}' in the extracted text.")
    else:
        print(f"Keyword '{keyword}' not found in the extracted text.")

# Extract the text from the uploaded PDF
extracted_text = extract_pdf_text_from_s3(bucket_name, s3_key)

# Check if the new content is present
if extracted_text:
    print("Checking if new content is added...")

    # Check for keywords related to the added content
    search_for_added_content(extracted_text, "E-Health at Psymeon")
    search_for_added_content(extracted_text, "ALVIE - A Digital Health Application")
    search_for_added_content(extracted_text, "Consulting Services")
    search_for_added_content(extracted_text, "Research and AI in Medicine")

else:
    print("Failed to extract text from the PDF.")

Checking if new content is added...
Found the keyword: 'E-Health at Psymeon' in the extracted text.
Found the keyword: 'ALVIE - A Digital Health Application' in the extracted text.
Found the keyword: 'Consulting Services' in the extracted text.
Found the keyword: 'Research and AI in Medicine' in the extracted text.
