In [10]:
import pandas as pd
import os

# === 📁 Step 1: Paths Setup ===
try:
    project_root = os.path.dirname(os.path.abspath(__file__))
except NameError:
    project_root = os.getcwd()

project_root = os.path.dirname(project_root)  # Go one level up

dataset_path = os.path.join(project_root, "data", "Dataset.csv")
patient_dir = os.path.join(project_root, "data", "patients")
os.makedirs(patient_dir, exist_ok=True)

# === 📄 Step 2: Load Dataset ===
with open(dataset_path, "r", encoding="utf-8") as f:
    lines = f.readlines()

header = lines[0].replace('"', '').strip().split(",")
data = [line.replace('"', '').strip().split(",") for line in lines[1:]]
df = pd.DataFrame(data, columns=header)
for col in df.columns:
    if col != "seqn":
        df[col] = pd.to_numeric(df[col], errors="coerce")

# === 🧬 Step 3: Extract Patient & Save ===
patient_row = df.iloc[[1]]  # Change index to simulate other patients
patient_path = os.path.join(patient_dir, "patient_1_cleaned.csv")
patient_row.to_csv(patient_path, index=False)
print("✅ Saved cleaned patient to:", patient_path)

# === ⚖️ Step 4: Normal Ranges ===
normal_ranges = {
    'lbxsal': (135, 145), 'lbdsalsi': (135, 145), 'lbxsassi': (10, 40), 'lbxsapsi': (44, 147),
    'lbxsbu': (7, 20), 'lbdsbusi': (2.5, 7.1), 'lbxsca': (8.5, 10.2), 'lbdscasi': (2.12, 2.55),
    'lbxsck': (22, 198), 'lbxsch': (0, 200), 'lbdschsi': (0, 5.17), 'lbxsc3si': (22, 29),
    'lbxscr': (0.74, 1.35), 'lbdscrsi': (65.4, 119.3), 'lbxsgtsi': (9, 48), 'lbxsgl': (70, 99),
    'lbdsglsi': (3.9, 5.5), 'lbxsir': (60, 170), 'lbdsirsi': (10.7, 30.4), 'lbxsldsi': (140, 280),
    'lbxsph': (2.5, 4.5), 'lbdsphsi': (0.81, 1.45), 'lbxstb': (0.1, 1.2), 'lbdstbsi': (1.71, 20.5),
    'lbxstp': (6.0, 8.3), 'lbdstpsi': (60, 83), 'lbxsua': (3.5, 7.2), 'lbdsuasi': (208, 428),
    'lbxsnasi': (135, 145), 'lbxsksi': (3.5, 5.1), 'lbxsclsi': (98, 107), 'lbxsossi': (275, 295),
    'lbxsgb': (2.0, 3.5), 'lbdsgbsi': (20, 35), 'lbxstr': (0, 150), 'lbdstrsi': (0, 1.7),
    'lbxsatsi': (20, 55)
}

def normalize(val, low, high):
    if val < low:
        return -1
    elif val > high:
        return 1
    return 0

# === ⚠️ Step 5: Get Abnormal Markers ===
patient = pd.read_csv(patient_path)
abnormal_markers = []

for col, (low, high) in normal_ranges.items():
    val = pd.to_numeric(patient[col].values[0], errors="coerce")
    status = normalize(val, low, high)
    if status == -1:
        abnormal_markers.append(f"{col}↓")
    elif status == 1:
        abnormal_markers.append(f"{col}↑")

print("\n🧬 Abnormal markers for Patient 1:\n")
for marker in abnormal_markers:
    print("•", marker)

# Save abnormal markers to file
with open(os.path.join(patient_dir, "abnormal_markers_patient_1.txt"), "w", encoding="utf-8") as f:
    f.write("\n".join(abnormal_markers))

# === 📌 Step 6: Load Rules & Match ===
rules_path = os.path.join(project_root, "data", "generated_rules.txt")
with open(rules_path, "r", encoding="utf-8") as f:
    rules = [r.strip() for r in f if r.strip()]

# matched = []
# for rule in rules:
#     if "If [" in rule and "] →" in rule:
#         ant = rule.split("If [")[1].split("]")[0]
#         antecedents = [x.strip() for x in ant.split(",")]
#         if all(a in abnormal_markers for a in antecedents):
#             matched.append(rule)

# . Filter rules that mention at least one abnormal marker
matching_rules = []

# Normalize markers to lower-case
patient_markers = [m.lower() for m in abnormal_markers]

for rule in rules:
    rule_lower = rule.lower().strip()
    if not rule_lower:
        continue

    if any(marker in rule_lower for marker in patient_markers):
        matching_rules.append(rule)


# === 💾 Step 7: Save Matched Rules ===
matched_path = os.path.join(patient_dir, "matched_rules_for_patient_1.txt")
with open(matched_path, "w", encoding="utf-8") as f:
    for rule in matching_rules:
        f.write(rule + "\n")

print(f"\n✅ {len(matching_rules)} matching rules saved to:", matched_path)


✅ Saved cleaned patient to: c:\Users\dell\OneDrive\Desktop\Final-Project-NABDH\ai_service\data\patients\patient_1_cleaned.csv

🧬 Abnormal markers for Patient 1:

• lbxsal↓
• lbdsalsi↓
• lbxsapsi↑
• lbxscr↓
• lbdscrsi↓
• lbxsgtsi↓
• lbxsgl↓
• lbdsglsi↓
• lbxsldsi↓
• lbxsph↑
• lbdsphsi↑
• lbxsossi↓
• lbxsatsi↓

✅ 52577 matching rules saved to: c:\Users\dell\OneDrive\Desktop\Final-Project-NABDH\ai_service\data\patients\matched_rules_for_patient_1.txt
