In [5]:
import pandas as pd

# Read CSV file
df = pd.read_csv('/Users/amulyaveldandi/Downloads/mimic-iv-note-deidentified-free-text-clinical-notes-2.2/note/radiology.csv')

# Display the first 5 rows
df.head()

Unnamed: 0,note_id,subject_id,hadm_id,note_type,note_seq,charttime,storetime,text
0,10000032-RR-14,10000032,22595853.0,RR,14,2180-05-06 21:19:00,2180-05-06 23:32:00,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
1,10000032-RR-15,10000032,22595853.0,RR,15,2180-05-06 23:00:00,2180-05-06 23:26:00,EXAMINATION: LIVER OR GALLBLADDER US (SINGLE ...
2,10000032-RR-16,10000032,22595853.0,RR,16,2180-05-07 09:55:00,2180-05-07 11:15:00,"INDICATION: ___ HCV cirrhosis c/b ascites, hi..."
3,10000032-RR-18,10000032,,RR,18,2180-06-03 12:46:00,2180-06-03 14:01:00,EXAMINATION: Ultrasound-guided paracentesis.\...
4,10000032-RR-20,10000032,,RR,20,2180-07-08 13:18:00,2180-07-08 14:15:00,EXAMINATION: Paracentesis\n\nINDICATION: ___...


In [6]:
# Check how many rows have "Chest (PA AND LAT)" in the text column
chest_count = df['text'].str.contains('Chest \(PA AND LAT\)', case=False, na=False).sum()
print(f"Number of rows with 'Chest (PA AND LAT)': {chest_count}")

Number of rows with 'Chest (PA AND LAT)': 70082


In [7]:
# Check how many of the "Chest (PA AND LAT)" rows also contain "follow-up"
chest_with_followup = df[df['text'].str.contains('Chest \(PA AND LAT\)', case=False, na=False)]['text'].str.contains('follow-up', case=False, na=False).sum()
print(f"Number of 'Chest (PA AND LAT)' rows with 'follow-up': {chest_with_followup}")

Number of 'Chest (PA AND LAT)' rows with 'follow-up': 539


In [14]:
# Filter for CT Chest reports first
ct_chest = df[df['text'].str.contains('CT CHEST|CT OF THE CHEST|CHEST CT', case=False, na=False)]

# Then filter CT Chest reports for pulmonary nodules
nodule_terms = 'nodule|nodular|pulmonary nodule'
ct_chest_nodules = ct_chest[ct_chest['text'].str.contains(nodule_terms, case=False, na=False)]
print(f"Total CT Chest reports: {len(ct_chest)}")
print(f"CT Chest reports with pulmonary nodules: {len(ct_chest_nodules)}")
print(f"Percentage: {len(ct_chest_nodules)/len(ct_chest)*100:.2f}%")


Total CT Chest reports: 180449
CT Chest reports with pulmonary nodules: 94776
Percentage: 52.52%


In [17]:
# Example: View a sample report with nodules
if len(ct_chest_nodules) > 0:
    print("Sample CT Chest report with nodule:")
    print("="*80)
    print(ct_chest_nodules.iloc[0]['text'][:3000])  # First 1000 characters

Sample CT Chest report with nodule:
CTA CHEST

CLINICAL INDICATION:  ___ woman with alcoholic hepatitis, now with
persistent tachycardia.  Rule out PE.

COMPARISON EXAM:  ___.

TECHNIQUE:  Multidetector CT images were acquired through the thorax following
the administration of intravenous contrast as per PE protocol.

CT CHEST WITH IV CONTRAST:  

Satisfactory opacification of the pulmonary vasculature.  No filling defects
in the pulmonary vascular to suggest pulmonary embolism.  Pulmonary
vasculature is within normal limits in caliber.  Aorta shows no abnormalities.
No pericardial effusion.  The heart shows no gross abnormalities.
Tracheobronchial tree is patent.  The esophagus is unremarkable.  No
significant hilar, mediastinal, or axillary lymphadenopathy.

There is stable atelectasis at the right lung base and moderate right and
small left pleural effusions, unchanged.  Tiny hiatal hernia.  No focal lung
nodules are seen.  No pneumothorax.  No suspicious osseous lesions are
identif

In [19]:
# Check for Fleischner-related terms (size measurements and recommendations)
fleischner_keywords = 'fleischner|follow-up recommended|followup recommended|follow up recommended|recommend follow-up|recommend followup|recommend follow up|mm nodule|millimeter nodule'
ct_nodules_fleischner = ct_chest_nodules[ct_chest_nodules['text'].str.contains(fleischner_keywords, case=False, na=False)]
print(f"CT Chest nodule reports with Fleischner-related terms: {len(ct_nodules_fleischner)}")
print(f"Percentage of nodule reports: {len(ct_nodules_fleischner)/len(ct_chest_nodules)*100:.2f}%")

CT Chest nodule reports with Fleischner-related terms: 12882
Percentage of nodule reports: 13.59%


In [21]:
# Example: View a sample report with nodules
if len(ct_nodules_fleischner) > 0:
    print("Sample CT Chest report with nodule:")
    print("="*80)
    print(ct_nodules_fleischner.iloc[0]['text'][:3000])  # First 1000 characters

Sample CT Chest report with nodule:
HISTORY:  ___ female with fevers, nausea, vomiting, abdominal pain
status post appendectomy and cholecystectomy.  Question colitis.

COMPARISON:  None.

TECHNIQUE:  Helical CT images were acquired of the abdomen and pelvis
following the administration of intravenous contrast and reformatted into
coronal and sagittal planes.

FINDINGS:

LUNG BASES:  Perifissural nodule is seen on the uppermost slice on the right
major fissure measuring 3 mm.  A right lower lobe nodule measures 3mm. There
is bibasilar atelectasis, but no pleural effusion.  Note is made of coronary
arterial calcification, the heart is normal in size.

ABDOMEN:  The liver, spleen, pancreas, and adrenal glands appear normal. The
kidneys demonstrate symmetric contrast enhancement and brisk bilateral
excretion without hydronephrosis.  The stomach is collapsed, there is a small
hiatal hernia. Loops of small bowel are normal in caliber.  The small bowel
mesentery appears normal.  The abdomina

In [5]:
pip install tomli

Collecting tomli
  Downloading tomli-2.3.0-py3-none-any.whl.metadata (10 kB)
Downloading tomli-2.3.0-py3-none-any.whl (14 kB)
Installing collected packages: tomli
Successfully installed tomli-2.3.0
Note: you may need to restart the kernel to use updated packages.


# Running AuDRA-Rad with Ollama Backend

## Setup Instructions

Before running the cells below, you need to start the AuDRA-Rad API server with Ollama as the LLM backend.

### 1. Make sure Ollama is running
```bash
# Check if Ollama is running
curl http://localhost:11434/api/tags

# If not running, start it
ollama serve
```

### 2. Pull the required model (if not already available)
```bash
ollama pull llama3.2:latest
```

### 3. Start the AuDRA-Rad API server
```bash
# From the project root directory
cd /Users/amulyaveldandi/Desktop/AuDRA-Rad

# Start the API (it will use Ollama based on .env configuration)
python -m src.api.app
```

The API will start on `http://localhost:8000` and use Ollama (llama3.2:latest) as the LLM backend.

You can verify the API is running by visiting: http://localhost:8000/docs

In [4]:
# Import necessary libraries for calling the AuDRA-Rad API
import requests
import json

# API configuration
API_BASE_URL = "http://localhost:8000/api/v1"

# Test if the API is running
try:
    response = requests.get("http://localhost:8000/")
    print("‚úì AuDRA-Rad API is running!")
    print(json.dumps(response.json(), indent=2))
except requests.exceptions.ConnectionError:
    print("‚úó Cannot connect to AuDRA-Rad API at http://localhost:8000")
    print("Please start the API server first using:")
    print("  python -m src.api.app")
    print("or")
    print("  make run-api")

‚úó Cannot connect to AuDRA-Rad API at http://localhost:8000
Please start the API server first using:
  python -m src.api.app
or
  make run-api


In [None]:
# Create a function to analyze reports using the AuDRA-Rad API
def analyze_report_via_api(report_text, report_id=None):
    """
    Send a radiology report to the AuDRA-Rad API for analysis.
    
    Args:
        report_text: The radiology report text
        report_id: Optional report identifier
    
    Returns:
        Dictionary with API response
    """
    url = f"{API_BASE_URL}/process-report"
    
    payload = {
        "report_text": report_text,
        "report_id": report_id
    }
    
    try:
        response = requests.post(url, json=payload, timeout=120)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.Timeout:
        return {"error": "Request timed out after 120 seconds"}
    except requests.exceptions.RequestException as e:
        return {"error": f"API request failed: {str(e)}"}

print("‚úì Function created: analyze_report_via_api(report_text, report_id=None)")

In [None]:
# Select a few sample reports from the Fleischner dataframe
# Let's take 3 samples (start small to test)
sample_reports = ct_nodules_fleischner.sample(n=3, random_state=42)

print(f"Selected {len(sample_reports)} reports for analysis")
print("\nSample report IDs:")
for idx, row in sample_reports.iterrows():
    print(f"  {idx+1}. {row['note_id']} (Subject: {row['subject_id']})")

In [None]:
# Analyze Report 1
report_1 = sample_reports.iloc[0]

print("="*80)
print(f"ANALYZING REPORT 1: {report_1['note_id']}")
print(f"Subject ID: {report_1['subject_id']}")
print("="*80)
print("\nReport Text:")
print("-"*80)
print(report_1['text'])
print("-"*80)

print("\nSending to AuDRA-Rad API for analysis...")
result_1 = analyze_report_via_api(
    report_text=report_1['text'],
    report_id=report_1['note_id']
)

print("\n" + "="*80)
print("API RESPONSE:")
print("="*80)
print(json.dumps(result_1, indent=2, default=str))

In [None]:
# Analyze Report 2
report_2 = sample_reports.iloc[1]

print("="*80)
print(f"ANALYZING REPORT 2: {report_2['note_id']}")
print(f"Subject ID: {report_2['subject_id']}")
print("="*80)
print("\nReport Text:")
print("-"*80)
print(report_2['text'])
print("-"*80)

print("\nSending to AuDRA-Rad API for analysis...")
result_2 = analyze_report_via_api(
    report_text=report_2['text'],
    report_id=report_2['note_id']
)

print("\n" + "="*80)
print("API RESPONSE:")
print("="*80)
print(json.dumps(result_2, indent=2, default=str))

In [None]:
# Analyze Report 3
report_3 = sample_reports.iloc[2]

print("="*80)
print(f"ANALYZING REPORT 3: {report_3['note_id']}")
print(f"Subject ID: {report_3['subject_id']}")
print("="*80)
print("\nReport Text:")
print("-"*80)
print(report_3['text'])
print("-"*80)

print("\nSending to AuDRA-Rad API for analysis...")
result_3 = analyze_report_via_api(
    report_text=report_3['text'],
    report_id=report_3['note_id']
)

print("\n" + "="*80)
print("API RESPONSE:")
print("="*80)
print(json.dumps(result_3, indent=2, default=str))

In [None]:
# Summary of all analysis results
import pandas as pd

# Collect all results
all_results = [result_1, result_2, result_3]

print("="*80)
print("SUMMARY OF ALL ANALYSES")
print("="*80)

summary_data = []
for idx, (result, (_, report)) in enumerate(zip(all_results, sample_reports.iterrows()), 1):
    if 'error' not in result:
        summary_data.append({
            'Report #': idx,
            'Note ID': report['note_id'],
            'Subject ID': report['subject_id'],
            'Status': result.get('status', 'N/A'),
            'Findings Count': len(result.get('findings', [])),
            'Recommendations': len(result.get('recommendations', [])),
            'Tasks Created': len(result.get('tasks', [])),
            'Requires Review': result.get('requires_human_review', False),
            'Processing Time (ms)': f"{result.get('processing_time_ms', 0):.2f}"
        })
    else:
        summary_data.append({
            'Report #': idx,
            'Note ID': report['note_id'],
            'Subject ID': report['subject_id'],
            'Status': 'ERROR',
            'Findings Count': 0,
            'Recommendations': 0,
            'Tasks Created': 0,
            'Requires Review': True,
            'Processing Time (ms)': 'N/A',
            'Error': result['error']
        })

summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))
print("\n" + "="*80)

In [None]:
# Detailed view of findings and recommendations for each report
print("="*80)
print("DETAILED FINDINGS AND RECOMMENDATIONS")
print("="*80)

for idx, (result, (_, report)) in enumerate(zip(all_results, sample_reports.iterrows()), 1):
    print(f"\n{'='*80}")
    print(f"REPORT {idx}: {report['note_id']}")
    print(f"{'='*80}")
    
    if 'error' in result:
        print(f"‚ùå ERROR: {result['error']}")
        continue
    
    print(f"Status: {result.get('status', 'N/A')}")
    print(f"Requires Human Review: {result.get('requires_human_review', False)}")
    print(f"Processing Time: {result.get('processing_time_ms', 0):.2f} ms")
    
    # Display findings
    findings = result.get('findings', [])
    print(f"\nüìã Findings ({len(findings)}):")
    if findings:
        for i, finding in enumerate(findings, 1):
            print(f"\n  Finding {i}:")
            print(f"    Type: {finding.get('type', 'N/A')}")
            print(f"    Location: {finding.get('location', 'N/A')}")
            print(f"    Size: {finding.get('size_mm', 'N/A')} mm")
            print(f"    Confidence: {finding.get('confidence', 0):.2f}")
            characteristics = finding.get('characteristics', [])
            if characteristics:
                print(f"    Characteristics: {', '.join(characteristics)}")
    else:
        print("  No findings detected")
    
    # Display recommendations
    recommendations = result.get('recommendations', [])
    print(f"\nüí° Recommendations ({len(recommendations)}):")
    if recommendations:
        for i, rec in enumerate(recommendations, 1):
            print(f"\n  Recommendation {i}:")
            print(f"    Follow-up Type: {rec.get('follow_up_type', 'N/A')}")
            print(f"    Timeframe: {rec.get('timeframe_months', 'N/A')} months")
            print(f"    Urgency: {rec.get('urgency', 'N/A')}")
            print(f"    Reasoning: {rec.get('reasoning', 'N/A')}")
            print(f"    Citation: {rec.get('citation', 'N/A')}")
            print(f"    Confidence: {rec.get('confidence', 0):.2f}")
    else:
        print("  No recommendations generated")
    
    # Display tasks
    tasks = result.get('tasks', [])
    print(f"\nüìÖ Tasks Created ({len(tasks)}):")
    if tasks:
        for i, task in enumerate(tasks, 1):
            print(f"\n  Task {i}:")
            print(f"    Procedure: {task.get('procedure', 'N/A')}")
            print(f"    Scheduled Date: {task.get('scheduled_date', 'N/A')}")
            print(f"    Reason: {task.get('reason', 'N/A')}")
    else:
        print("  No tasks created")

print(f"\n{'='*80}")