In [None]:
# Severe Injury Detection - Data Preparation

This notebook will guide you through loading MAUDE MDR data, labeling severe injury cases, and preparing text data for machine learning.

## Steps
1. Load MDR data (CSV/Excel)
2. Define severe injury keywords/phrases
3. Label reports as severe/non-severe
4. Clean and preprocess text
5. Save processed dataset for modeling

---

SyntaxError: invalid syntax (2895311543.py, line 3)

In [None]:
!pip install pandas

import pandas as pd
import re

# 1. Load MDR data (update path as needed)
data_path = '../data/2025-03_death_events.csv'  # Using March death event CSV file
try:
    df = pd.read_csv(data_path)
except Exception as e:
    print(f'Error loading file: {e}')
    raise

print(f"Loaded {len(df)} reports.")

# 2. Define severe injury keywords/phrases
severe_keywords = [
    'death', 'life[- ]?threatening', 'permanent injury', 'amputation', 'stroke',
    'cardiac arrest', 'coma', 'paralysis', 'hospitalization', 'ICU', 'intensive care',
    'organ failure', 'brain damage', 'fatal', 'severe bleeding', 'major surgery'
]

# 3. Label reports as severe/non-severe (assume narrative column is 'Event Text' or similar)
def is_severe(text):
    if pd.isnull(text):
        return 0
    text = str(text).lower()
    for kw in severe_keywords:
        if re.search(kw, text):
            return 1
    return 0

narr_col = None
for col in df.columns:
    if 'event' in col.lower() or 'narrative' in col.lower():
        narr_col = col
        break
if narr_col is None:
    raise ValueError('No narrative/event text column found!')

df['severe_injury'] = df[narr_col].apply(is_severe)
print(df['severe_injury'].value_counts())

# 4. Clean and preprocess text (basic)
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['clean_text'] = df[narr_col].apply(clean_text)

# 5. Save processed dataset
out_path = '../data/mdr_severe_labeled.csv'
df.to_csv(out_path, index=False)
print(f"Saved labeled data to {out_path}")

ModuleNotFoundError: No module named 'pandas'