In [19]:
pip install pdfplumber pytesseract pdf2image Pillow pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [20]:
import pdfplumber
import re
import json
import pandas as pd
import os
import pytesseract
from PIL import Image

pytesseract.pytesseract.tesseract_cmd = r"C:\Users\Rahul\AppData\Local\Programs\Tesseract-OCR\tesseract.exe"
print("Libraries imported")

Libraries imported


In [21]:
import importlib

import blood_report_parser
importlib.reload(blood_report_parser)
from blood_report_parser import extract_text, extract_parameters, validate
print("Module loaded")

Module loaded


In [39]:
report_path = "C:\\Users\\Rahul\\Desktop\\Project\\Health_Diagnosis\\datasets\\sterling-accuris-pathology-sample-report-unlocked.pdf"

if os.path.exists(report_path):
    raw_text = extract_text(report_path)
    print("Extracted text (first 500 chars):")
    print(raw_text[:500])
else:
    print("File not found")

Extracted text (first 500 chars):
MC-2202
Scan QR code to check
report authenticity
Passport No : LABORATORY TEST REPORT
Patient Information Sample Information Client/Location Information
Name : Lyubochka Svetka Lab Id : 02232160XXXX Client Name : Sterling Accuris Buddy
Registration on : 20-Feb-2023 09:10
Location :
Sex/Age : Male / 41 Y 01-Feb-1982 Collected at : non SAWPL
Approved on : 20-Feb-2023 11:09 Status : Final
Ref. Id : Collected on : 20-Feb-2023 08:53
Printed On : 28-Feb-2023 10:26
Ref. By : Sample Type : EDTA Blood
P


In [23]:
params = extract_parameters(raw_text)
print("Extracted Parameters:")
print(params)

Extracted Parameters:
{'hemoglobin': 12.0, 'wbc': 8347.0, 'platelets': 39.0, 'glucose': 110.0, 'cholesterol': 176.0}


In [24]:
validated_params = validate(params)
print("Validated Parameters:")
for param, value in validated_params.items():
    print(f"{param}: {value}")

Validated Parameters:
hemoglobin: 12.0
wbc: 8347.0
platelets: None
glucose: 110.0
cholesterol: 176.0


In [25]:
with open("parameter_ranges.json", 'r') as f:
    ranges = json.load(f)
print("Ranges loaded:")
print(ranges)

Ranges loaded:
{'hemoglobin': {'low': 12.0, 'high': 15.0, 'unit': 'g/dL'}, 'wbc': {'low': 4000.0, 'high': 10500.0, 'unit': '/cumm'}, 'rbc': {'low': 3.8, 'high': 4.8, 'unit': 'million/cumm'}, 'platelets': {'low': 1.5, 'high': 4.1, 'unit': 'lakh/cumm'}, 'hematocrit': {'low': 36.0, 'high': 46.0, 'unit': '%'}, 'mcv': {'low': 82.0, 'high': 92.0, 'unit': 'fL'}, 'mch': {'low': 27.0, 'high': 32.0, 'unit': 'pg'}, 'mchc': {'low': 31.5, 'high': 34.5, 'unit': 'g/dL'}, 'rdw': {'low': 11.6, 'high': 14.0, 'unit': '%'}, 'mpv': {'low': 8.0, 'high': 14.0, 'unit': 'fL'}, 'hdw': {'low': 2.2, 'high': 3.2, 'unit': 'g/dL'}, 'neutrophils': {'low': 40.0, 'high': 80.0, 'unit': '%'}, 'lymphocytes': {'low': 20.0, 'high': 40.0, 'unit': '%'}, 'eosinophils': {'low': 0.0, 'high': 6.0, 'unit': '%'}, 'monocytes': {'low': 2.0, 'high': 10.0, 'unit': '%'}, 'basophils': {'low': 0.0, 'high': 2.0, 'unit': '%'}, 'luc': {'low': 0.0, 'high': 3.0, 'unit': '%'}, 'glucose': {'low': 70.0, 'high': 140.0, 'unit': 'mg/dL'}, 'triglycer

In [26]:
from model1_parameter_interpreter import classify

classification_results = {}
for param, value in validated_params.items():
    if value is not None and param in ranges:
        status = classify(param, value, ranges)
        classification_results[param] = {"value": value, "status": status}
        print(f"{param}: {value} - {status}")

hemoglobin: 12.0 - Normal
wbc: 8347.0 - Normal
glucose: 110.0 - Normal
cholesterol: 176.0 - Normal


In [27]:
df_data = []
for param, info in classification_results.items():
    df_data.append({'Parameter': param, 'Value': info['value'], 'Status': info['status']})

df = pd.DataFrame(df_data)
df.to_csv("blood_parameters.csv", index=False)
print(df)
print("Saved to blood_parameters.csv")

     Parameter   Value  Status
0   hemoglobin    12.0  Normal
1          wbc  8347.0  Normal
2      glucose   110.0  Normal
3  cholesterol   176.0  Normal
Saved to blood_parameters.csv


In [None]:
import sys
sys.path.insert(0, ".")

import importlib
import model2_pattern_engine
importlib.reload(model2_pattern_engine)
from model2_pattern_engine import assess_risks_from_model1


patient_age = 41  
patient_gender = "Male"  

risk_assessment = assess_risks_from_model1(classification_results, age=patient_age, gender=patient_gender)

print("=" * 80)
print("MODEL 2: RISK ASSESSMENT WITH DEMOGRAPHIC CONTEXT")
print("=" * 80)
print(f"\nPATIENT DEMOGRAPHICS:")
print(f"  Age: {patient_age} years")
print(f"  Gender: {patient_gender}")
print(f"  Age Group: {risk_assessment['demographic_context']['age_group'].replace('_', ' ').title()}")

print(f"\nRISK LEVEL: {risk_assessment['risk_level']}")
print(f"TOTAL RISK SCORE: {risk_assessment['risk_score']}")
print(f"Risk Multiplier (Age): {risk_assessment['demographic_context']['risk_multiplier']}x")
print("\nIDENTIFIED RISKS (Age & Gender-Adjusted):")
for i, risk in enumerate(risk_assessment['identified_risks'], 1):
    print(f"  {i}. {risk}")
print("=" * 80)


MODEL 2: RISK ASSESSMENT WITH DEMOGRAPHIC CONTEXT

PATIENT DEMOGRAPHICS:
  Age: 71 years
  Gender: Female
  Age Group: Senior

RISK LEVEL: LOW
TOTAL RISK SCORE: 0
Risk Multiplier (Age): 1.5x

IDENTIFIED RISKS (Age & Gender-Adjusted):
  1. No significant risks identified


In [29]:
!ollama pull tinyllama


[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 2af3b81862c6: 100% ▕██████████████████▏ 637 MB                         [K
pulling af0ddbdaaa26: 100% ▕██████████████████▏   70 B                         [K
pulling c8472cd9daed: 100% ▕██████████████████▏   31 B                         [K
pulling fa

In [38]:
# Direct test of our updated ollama integration with fallback
#print("Testing updated synthesis and recommendation system...")

# Import our updated modules
import ollama_health_analysis  # Import first
importlib.reload(ollama_health_analysis)  # Then reload
from ollama_health_analysis import ollama_synthesize_findings, ollama_generate_recommendations

# Test synthesis
#print("\n=== TESTING OLLAMA SYNTHESIS ===")
try:
    clinical_summary = ollama_synthesize_findings(
        classification_results,
        risk_assessment,
        user_context={"age": patient_age, "gender": patient_gender},
    )
    #print("✅ Synthesis successful!")
    #print(f"Summary: {clinical_summary[:200]}...")
    
except Exception as e:
    print(f"❌ Synthesis failed: {e}")
    # Use fallback from the old approach
    from synthesis_finding import synthesize_findings
    
    synthesized = synthesize_findings(
        classification_results,
        risk_assessment,
        user_context={"age": patient_age, "gender": patient_gender},
    )
    clinical_summary = synthesized["summary"]
    print("✅ Using fallback synthesis")
    #print(f"Summary: {clinical_summary}")

# Test recommendations
#print("\n=== TESTING OLLAMA RECOMMENDATIONS ===")
try:
    abnormal_params = [f"{k} {v['status']}" for k, v in classification_results.items() 
                      if v.get('status') != 'Normal']
    
    recommendations = ollama_generate_recommendations(
        clinical_summary,
        abnormal_params,
        risk_assessment["risk_level"],
        user_context={"age": patient_age, "gender": patient_gender, "activity_level": "low"},
    )
    #print("✅ Recommendations successful!")
    #print(f"Generated {len(recommendations)} recommendations")
    #for rec in recommendations[:3]:
        #print(f"  - [{rec['category'].upper()}] {rec['text'][:70]}...")
        
except Exception as e:
    print(f"❌ Recommendations failed: {e}")
    # Use fallback from the old approach  
    from recoomendation_generator import generate_recommendations
    
    synthesized = {"summary": clinical_summary, "findings": [], "risk_level": risk_assessment["risk_level"]}
    rec_result = generate_recommendations(synthesized, user_context={"activity_level": "low"})
    recommendations = rec_result["recommendations"]
    print("✅ Using fallback recommendations")
    print(f"Generated {len(recommendations)} recommendations")

print("\n\n\n=== FINAL RESULTS ===")
print(f"Clinical Summary:\n{clinical_summary}")
print(f"\n Top Recommendations:")
for i, rec in enumerate(recommendations[:5], 1):
    print(f"{i}. [{rec['category'].upper()}] {rec['text']}")




=== FINAL RESULTS ===
Clinical Summary:
Patient: Age: 71, Gender: Female
Risk Level: LOW (score 0)
Abnormal Parameter:
- No significant risks identified
Key Risks:
- No risk identified for the patient's clinical profile.

 Top Recommendations:
1. [FOLLOW_UP] Arrange urgent clinical review due to elevated creatinine, bilirubin, and AST levels
2. [PRECAUTIONS] Have emergency contact information readily available
3. [DIET] Maintain gentle daily walks and light stretching exercises appropriate for seniors
4. [LIFESTYLE] Maintain gentle daily walks (15-20 minutes) avoiding overexertion
5. [PRECAUTIONS] Monitor for symptoms: swelling, dark urine, yellowing skin/eyes, fatigue
