In [1]:
!pip install pdfplumber


Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pdfplumber
import re


In [3]:
def extract_text_from_pdf(pdf_path):
    """
    Extract text from all pages of a PDF file
    """
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text


In [4]:
pdf_path = "C:\\Users\\Priyanka Sajnani\\Downloads\\sample_report.pdf"  # ensure file exists

text = extract_text_from_pdf(pdf_path)

print("Text length:", len(text))
print("\nExtracted Text:\n")
print(text)


Text length: 117

Extracted Text:

Blood Test Report
Hemoglobin: 10.8
RBC: 3.8
WBC: 9000
Platelets: 200000
Glucose: 135
Bilirubin: 0.9
Cholesterol: 180



In [5]:
patterns = {
    "hemoglobin": r"(Hemoglobin|Hb|HGB)\s*[:\-]?\s*([\d.]+)",
    "rbc": r"(RBC)\s*[:\-]?\s*([\d.]+)",
    "wbc": r"(WBC)\s*[:\-]?\s*([\d.]+)",
    "platelets": r"(Platelets|PLT)\s*[:\-]?\s*([\d.]+)",
    "glucose": r"(Glucose)\s*[:\-]?\s*([\d.]+)",
    "bilirubin": r"(Bilirubin)\s*[:\-]?\s*([\d.]+)",
    "cholesterol": r"(Cholesterol)\s*[:\-]?\s*([\d.]+)"
}


In [6]:
def extract_parameters(text):
    """
    Extract blood parameters using regex patterns
    """
    results = {}
    for param, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            results[param] = float(match.group(2))
    return results


In [7]:
params = extract_parameters(text)

print("ðŸ”¬ Extracted Parameters:")
print(params)


ðŸ”¬ Extracted Parameters:
{'hemoglobin': 10.8, 'rbc': 3.8, 'wbc': 9000.0, 'platelets': 200000.0, 'glucose': 135.0, 'bilirubin': 0.9, 'cholesterol': 180.0}


In [8]:
def validate_parameters(params):
    """
    Validate extracted parameter values
    """
    validated = {}
    for key, value in params.items():
        if value is not None and value > 0:
            validated[key] = value
    return validated


validated_params = validate_parameters(params)

print("âœ… Validated Parameters:")
print(validated_params)


âœ… Validated Parameters:
{'hemoglobin': 10.8, 'rbc': 3.8, 'wbc': 9000.0, 'platelets': 200000.0, 'glucose': 135.0, 'bilirubin': 0.9, 'cholesterol': 180.0}


In [9]:
reference_ranges = {
    "hemoglobin": (12, 16),
    "rbc": (4.2, 5.9),
    "wbc": (4000, 11000),
    "platelets": (150000, 450000),
    "glucose": (70, 110),
    "bilirubin": (0.1, 1.2),
    "cholesterol": (125, 200)
}


In [10]:
def interpret_parameter(param, value):
    """
    Classify parameter as Low / Normal / High
    """
    low, high = reference_ranges[param]
    if value < low:
        return "Low"
    elif value > high:
        return "High"
    else:
        return "Normal"


In [11]:
print("ðŸ“Š Parameter Interpretation Results:\n")

for param, value in validated_params.items():
    status = interpret_parameter(param, value)
    print(f"{param.capitalize():15} : {value} â†’ {status}")


ðŸ“Š Parameter Interpretation Results:

Hemoglobin      : 10.8 â†’ Low
Rbc             : 3.8 â†’ Low
Wbc             : 9000.0 â†’ Normal
Platelets       : 200000.0 â†’ Normal
Glucose         : 135.0 â†’ High
Bilirubin       : 0.9 â†’ Normal
Cholesterol     : 180.0 â†’ Normal
