In [141]:
import os
import io
import fitz  # PyMuPDF (For extracting text from non-scanned PDFs)
import pytesseract  # OCR for scanned PDFs
import cv2  # OpenCV for image processing
import pdf2image  # Convert PDF to images
import re  # Regular expressions for cleaning text
import numpy as np
import pandas as pd  # For handling CSV
import requests  # For Google Maps API (Finding Nearby Specialists)
from tabulate import tabulate  # For structured output formatting
import ipywidgets as widgets
from IPython.display import display
from tabulate import tabulate

In [198]:
# Step 1: Upload PDF (Jupyter Compatible)
upload_button = widgets.FileUpload(accept='.pdf', multiple=False)
display(upload_button)

def get_uploaded_pdf():
    if upload_button.value:
        file_info = list(upload_button.value.values())[0]
        file_content = io.BytesIO(file_info['content'])
        return file_content
    return None

FileUpload(value={}, accept='.pdf', description='Upload')

In [174]:
# Step 2: Convert PDF to Images (if scanned)
def pdf_to_images(pdf_file):
    images = pdf2image.convert_from_bytes(pdf_file.read(), dpi=300)
    return images

In [175]:
# Step 3: Extract Text using OCR (for scanned PDFs)
def ocr_extract_text(images):
    extracted_text = ""
    for img in images:
        img_cv = np.array(img)
        img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2GRAY)
        _, img_cv = cv2.threshold(img_cv, 150, 255, cv2.THRESH_BINARY)
        text = pytesseract.image_to_string(img_cv, lang="eng")
        extracted_text += text + "\n"
    return extracted_text

In [176]:

# Step 4: Extract Text from Selectable PDFs
def extract_text_from_pdf(pdf_file):
    text = ""
    pdf_file.seek(0)
    with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
        for page in doc:
            text += page.get_text("text") + "\n"
    return text

In [177]:
# Step 5: Clean Extracted Text
def clean_text(text):
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^a-zA-Z0-9.,%():/-]', ' ', text)
    return text.strip()

In [178]:
# Step 6: Read Threshold Values from CSV
def load_thresholds(csv_path="threshold3.csv"):
    if not os.path.exists(csv_path):
        print("⚠️ Warning: Threshold CSV file not found!")
        return None
    return pd.read_csv(csv_path)

threshold_df = load_thresholds()

In [186]:
# Step 7: Diet Recommendations
recommendations = {
    "Glucose": {
        "high": "Reduce sugar intake, increase fiber, exercise regularly, and monitor blood sugar levels.",
        "low": "Eat frequent small meals, consume complex carbohydrates, and avoid excessive fasting."
    },
    "RBC": {
        "low": "Increase iron intake (red meat, leafy greens), take iron supplements, and check for anemia.",
        "high": "Stay hydrated and monitor for dehydration or lung disease."
    },
    "Creatinine": {
        "low": "Increase protein intake with lean meats, fish, eggs, dairy, and legumes.",
        "high": "Increase water intake, reduce protein consumption, and consult a nephrologist."
    },
    "Sodium": {
        "low": "Increase salt intake moderately, stay hydrated, and monitor for adrenal issues.",
        "high": "Reduce sodium intake, drink more water, and check for kidney function."
    },
    "Potassium": {
        "low": "Eat bananas, potatoes, and oranges to boost potassium levels.",
        "high": "Reduce potassium-rich foods and monitor kidney function."
    },
    "Lymphocytes": {
        "high": "Check for infections or immune conditions, and consider consulting a hematologist."
    },
    "Chloride": {
        "low": "Increase salt intake slightly and check for kidney or adrenal issues.",
        "high": "Stay hydrated and check for possible kidney dysfunction."
    },
    "Transferrin": {
        "low": "Consume more iron-rich foods, and consider iron supplements if needed.",
        "high": "Monitor for iron overload and consult a specialist if needed."
    },
    "Transferrin Saturation": {
        "low": "Increase iron and vitamin C intake to improve absorption.",
        "high": "Check for iron overload conditions like hemochromatosis."
    },
    "Copper": {
        "low": "Eat nuts, seeds, whole grains, and dark chocolate to boost copper levels.",
        "high": "Limit copper supplements and check liver function."
    },
    "Total protein": {
        "low": "Increase protein intake with lean meats, dairy, and legumes.",
        "high": "Check for dehydration or possible liver/kidney issues."
    },
    "Albumin": {
        "low": "Increase protein intake, monitor liver function, and stay hydrated.",
        "high": "Check for dehydration and kidney health."
    },
    "Globulins": {
        "high": "May indicate infection or immune disorder—monitor and consult if symptoms persist."
    },

    "Triglycerides": {
        "low": " Consume healthy fats (avocados, nuts, olive oil) and complex carbs (whole grains, fruits).",
        "high": "Reduce sugar, refined carbs, and saturated fats; increase fiber, healthy fats, and exercise."
    },
    "Uric acid": {
        "low": " Increase protein intake moderately and maintain hydration.",
        "high": " Limit purine-rich foods (red meat, seafood, alcohol); drink more water and eat cherries, citrus fruits."
    },
    "Transferrin saturation": {
        "low": " Consume more iron-rich foods like red meat, spinach, legumes, fortified cereals, and vitamin C-rich foods to enhance iron absorption",
        "high": ""
    },
    "HDL cholesterol": {
        "low": "Increase healthy fats (olive oil, nuts, fatty fish), exercise regularly, and avoid trans fats.",
        "high": " Increase intake of fiber-rich foods (fruits, vegetables, whole grains) and reduce saturated fat intake."
    },
    "Monocytes": {
        "low": "Consume immune-boosting foods like citrus fruits, garlic, turmeric, and lean proteins.",
        "high": " Eat anti-inflammatory foods such as berries, leafy greens, and fatty fish while reducing processed foods."
    },
    "Total bilirubin": {
        "low": "Focus on a balanced diet with healthy fats, whole foods, and adequate hydration.",
        "high": "Stay hydrated, eat antioxidant-rich foods (leafy greens, fruits), and avoid alcohol."
    },
    "Urea": {
        "low": "Increase protein intake with lean meats, dairy, eggs, and legumes.",
        "high": "Reduce protein intake, especially red meat, and drink plenty of water to support kidney function."
    },
    "Mean corpuscular volume (MCV)": {
        "low": "Increase iron-rich foods like red meat, spinach, legumes, and fortified cereals.",
        "high": "Increase vitamin B12 and folate intake (eggs, dairy, leafy greens, citrus fruits)."
    },
    "Random glucose": {
        "low": " Eat small, frequent meals with complex carbs (whole grains, fruits, nuts) and avoid long fasting periods.",
        "high": "Reduce sugar, refined carbs, and processed foods; focus on fiber-rich and low-glycemic foods."
    },
    "Packed Cell Volume (PCV)": {
        "low": "Increase iron, vitamin B12, and folate intake (lean meats, leafy greens, beans).",
        "high": " Stay hydrated and avoid excess iron and dehydrating factors like caffeine and alcohol."
    },
    "Hemoglobin (Hb)": {
        "low": "Increase iron-rich foods (red meat, spinach, legumes, fortified cereals) and vitamin C for better absorption.",
        "high": "Stay hydrated, reduce iron-rich foods if excessive, and monitor for underlying conditions."
    },
    
    
}

In [194]:
# Step 8: Specialist Recommendations (Based on Abnormal Values)
specialist_recommendations = {
    "Calcium": {
        "condition": "Calcium deficiency, which may lead to bone problems.",
        "specialists": [
            "Dr Balamurugan J - Kauvery Hospitals",
            "Dr Kanniraj - Magna Ortho Clinic",
            "Chennai Ortho Clinic",
            "Shri Bone & Joints"
        ]
    },
    "Sodium": {
        "condition": "Sodium imbalance, which may affect heart health.",
        "specialists": [
            "Dr. D Vaidhynathan - Apollo",
            "Dr A.B Gopalamurugan - Royapettah Chennai",
            "Dr.Dhamodaran K - Sidharam Heart Clinic Adyar"
        ]
    },
    "Monocytes": {
        "condition": "Liver or tumor-related issues.",
        "specialists": [
            "Dr. Aswin Krishna - Apollo (Liver Specialist)",
            "Dr. S. Arulprakash - MGM Healthcare",
            "Dr. Anisha Ashok - Laser and Laparoscopic Hospital"
        ]
    },
    "Lymphocytes": {
        "condition": "Possible signs of cancer.",
        "specialists": [
            "Adyar Cancer Institute",
            "Dr Vimalathithan - C Dot Hospital",
            "MGM Cancer Institute"
        ]
    },
    "Potassium": {
        "condition": "Potassium imbalance, which may indicate diabetes.",
        "specialists": [
            "Dr. Mohan's Diabetes Specialities Centre - Gopalapuram",
            "Dr Shanmugasundar - Magna Clinic",
            "Dr. Kavitha G - Nannalam Clinic"
        ]
    },
    "Iron": {
        "condition": "Excess iron or low transferrin, which may indicate Hemochromatosis.",
        "specialists": [
            "Dr. B Benjamin - MGM Healthcare Malar Hospitals, Adyar",
            "Dr Akila Mani - Apollo Speciality Hospitals, Vanagaram",
            "Dr. Mohamed Sajjid - MS Child Care Clinic, Royapettah"
        ]
    },
    "Uric acid": {
        "condition": "Excessive uric acid, which may lead to gout.",
        "specialists": [
            "Dr. Waseem Ahmed N - Billroth Hospital, Raja Annamalai Puram",
            "Dr. Sheethal Suresh - MGM Healthcare Malar Hospitals, Adyar",
            "Dr. Krishnamurthy - Apollo Cancer Centers, Teynampet Chennai"
        ]
    },
    "Tuberculosis": {
        "condition": "High lymphocytes and monocytes, which may indicate tuberculosis.",
        "specialists": [
            "Dr. Raghavan K - Billroth Hospital, Raja Annamalai Puram",
            "Dr. Roshan Kumar - SIMS Hospital, Vadapalani",
            "Dr. Pravin K Aggarwal - Apollo Spectra Hospital, Alwarpet"
        ]
    },
    "Random Glucose": {
        "condition": "Abnormal levels may cause Diabetic Ketoacidosis.",
        "specialists": [
            "Dr. Sundararaman P G - Billroth Hospital, Raja Annamalai Puram",
            "Dr. Bharat R - Arka Center fot Hormonal Health, Anna Nagar",
            "Dr. Geethalakshmi - Dr. Kamakshi Memorial Hospital, Pallikaranai"
        ]
    },
    "Albumin": {
        "condition": "Low albumin levels (hypoalbuminemia) \n can cause fluid retention and swelling (Edema)",
        "specialists": [
            "Dr. Jeysel Suraj - Suraj Hospital, Medavakkam",
            "Dr. Radhiga G - Billroth Hospital, Raja Annamalai Puram",
            "Dr. V. Jina Das - MGM Healthcare Malar Hospitals, Adyar"
        ]
    }
}

In [195]:
# Step 8: Extract Lab Values
def extract_lab_values(text, threshold_df):
    if threshold_df is None:
        return []
    
    results = []
    
    for _, row in threshold_df.iterrows():
        test_name = row["TEST"].strip()  # Test name
        
        try:
            min_val = float(row["MIN VALUE"]) if row["MIN VALUE"] != '-' else None
            max_val = float(row["MAX VALUE"]) if row["MAX VALUE"] != '-' else None
        except ValueError:
            continue  # Skip invalid row

        # Search for the test in the extracted text
        match = re.search(rf"({re.escape(test_name)}).*?(\d+\.?\d*)", text, re.IGNORECASE)
        if match:
            value = float(match.group(2))
            if min_val is not None and value < min_val:
                status = "🔻 Too Low"
                diet = recommendations.get(test_name, {}).get("low", "No specific dietary advice.")
            elif max_val is not None and value > max_val:
                status = "🔺 Too High"
                diet = recommendations.get(test_name, {}).get("high", "No specific dietary advice.")
            else:
                status = "✅ Normal"
                diet = "No dietary changes needed."

            results.append([test_name, value, status, diet])

    return results

In [202]:
import pandas as pd

# Step 9: Process Medical Report
def process_medical_report():
    print("\n📂 Upload a medical report PDF...")
    pdf_file = get_uploaded_pdf()

    if not pdf_file:
        print("❌ No file uploaded. Exiting...")
        return
    
    print("\n📄 Processing Report...\n")

    # Try extracting text directly
    text = extract_text_from_pdf(pdf_file)

    if not text.strip():  # If no text found, use OCR
        print("⚠️ No selectable text found. Using OCR...\n")
        images = pdf_to_images(pdf_file)
        text = ocr_extract_text(images)

    text = clean_text(text)  # Clean extracted text
    
    # Extract lab values
    extracted_values = extract_lab_values(text, threshold_df)

    # Append diet recommendations if abnormal values are detected
    table_data = []
    for item in extracted_values:
        if len(item) < 3:
            continue  # Skip if the data format is incorrect
        
        test, value, status = item[:3]  # Unpack the first three values correctly
        
        # Convert value to string without excessive decimal places
        value = str(value).rstrip('0').rstrip('.') if '.' in str(value) else str(value)

        diet_recommendation = ""
        if "Too High" in status and test in recommendations:
            diet_recommendation = recommendations[test]["high"]
        elif "Too Low" in status and test in recommendations:
            diet_recommendation = recommendations[test]["low"]
        
        table_data.append([test, value, status, diet_recommendation])

    # Display Lab Test Results in a structured table using pandas
    if table_data:
        df = pd.DataFrame(table_data, columns=["Test", "Result", "Status", "Diet Recommendation"])
        
        # Set index to start from 1
        df.index = range(1, len(df) + 1)
        
        print("\n📊 Lab Test Results 📊\n")
        display(df.style.set_properties(**{"text-align": "center"}).set_table_styles([
            {'selector': 'th', 'props': [('background-color', '#4CAF50'), ('color', 'white'), ('font-weight', 'bold'), ('text-align', 'center')]},
            {'selector': 'td', 'props': [('border', '1px solid black'), ('padding', '5px'), ('text-align', 'center')]},
            {'selector': 'td:nth-child(3)', 'props': [('min-width', '100px'), ('max-width', '250px')]}, # Adjust "Status" column width
            {'selector': 'td:nth-child(4)', 'props': [('min-width', '250px'), ('max-width', '400px')]}  # Decreased "Diet Recommendation" column width
        ]))
    else:
        print("⚠️ No lab test values detected in the report.")
        
    
    # Check for abnormalities and suggest specialists
    doctor_recommendations = []

    for test, value, status, _ in extracted_values:
        if "Too High" in status or "Too Low" in status:
            if test in specialist_recommendations:
                condition = specialist_recommendations[test]["condition"]
                specialists = specialist_recommendations[test]["specialists"]

                recommendation = f"\n⚠️ You have abnormal levels of {test}."
                recommendation += f"\n🔹 This may indicate: {condition}"
                recommendation += "\n Recommended Doctors & Hospitals:"
                for specialist in specialists:
                    recommendation += f"\n🔹 {specialist}"

                doctor_recommendations.append(recommendation)

    # Print all recommendations at once
    if doctor_recommendations:
        print("\n".join(doctor_recommendations))
    else:
        print("\n✅ No specialist recommendations needed.")



In [203]:
# Run the script after uploading a PDF
process_medical_report()


📂 Upload a medical report PDF...

📄 Processing Report...


📊 Lab Test Results 📊



Unnamed: 0,Test,Result,Status,Diet Recommendation
1,Lymphocytes,30,🔺 Too High,"Check for infections or immune conditions, and consider consulting a hematologist."
2,Monocytes,5,🔺 Too High,"Eat anti-inflammatory foods such as berries, leafy greens, and fatty fish while reducing processed foods."



⚠️ You have abnormal levels of Lymphocytes.
🔹 This may indicate: Possible signs of cancer.
 Recommended Doctors & Hospitals:
🔹 Adyar Cancer Institute
🔹 Dr Vimalathithan - C Dot Hospital
🔹 MGM Cancer Institute

⚠️ You have abnormal levels of Monocytes.
🔹 This may indicate: Liver or tumor-related issues.
 Recommended Doctors & Hospitals:
🔹 Dr. Aswin Krishna - Apollo (Liver Specialist)
🔹 Dr. S. Arulprakash - MGM Healthcare
🔹 Dr. Anisha Ashok - Laser and Laparoscopic Hospital
