# IntelHealth Eval Inference (Base vs Finetuned)

This notebook compares API inference quality/latency for two model settings and exports metrics JSON for Home charts.

In [None]:
# Clone repo (skip if already cloned)
import os
if not os.path.exists('Intel_Health'):
    !git clone https://github.com/DemonRain7/Intel_Health.git
%cd Intel_Health

In [None]:
!pip -q install requests

In [None]:
import json, time, re
from pathlib import Path
import requests

API_BASE_URL = 'http://127.0.0.1:8081'  # change if needed
ACCESS_TOKEN = 'YOUR_SUPABASE_ACCESS_TOKEN'

BASE_PROFILE = 'fast'
FINETUNED_PROFILE = 'balanced'

EVAL_CASES = Path('data_clean/eval/eval_cases.jsonl')
OUT_METRICS = Path('src/assets/metrics/training_metrics.json')


In [None]:
def load_cases(path: Path):
    rows = []
    with path.open('r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            rows.append(json.loads(line).get('input', {}))
    return rows

def extract_text(output_obj):
    if not isinstance(output_obj, dict):
        return ''
    parts = []
    for r in output_obj.get('results', []):
        if isinstance(r, dict):
            parts.append(str(r.get('condition', '')))
            parts.append(str(r.get('description', '')))
    parts.extend([str(x) for x in output_obj.get('recommendations', [])])
    return ' '.join(parts)

def keyword_coverage(case_input, output_obj):
    text = extract_text(output_obj)
    if not text:
        return 0.0
    keywords = []
    body_part = case_input.get('body_part')
    if body_part:
        keywords.append(str(body_part))
    keywords.extend([str(x) for x in case_input.get('symptoms', [])])
    other = str(case_input.get('other_symptoms', '')).strip()
    if other:
        keywords.extend([x for x in re.split(r'[,ï¼Œ\s]+', other) if x])
    if not keywords:
        return 0.0
    hit = sum(1 for k in keywords if k and k in text)
    return hit / len(keywords)

def is_valid_response(resp_json):
    if not isinstance(resp_json, dict):
        return False
    results = resp_json.get('results')
    recs = resp_json.get('recommendations')
    return isinstance(results, list) and len(results) >= 1 and isinstance(recs, list)

def run_batch(cases, profile_id):
    headers = {
        'Authorization': f'Bearer {ACCESS_TOKEN}',
        'Content-Type': 'application/json',
    }
    valid = 0
    coverage_scores = []
    latencies = []

    for item in cases:
        payload = dict(item)
        payload['model_profile_id'] = profile_id

        t0 = time.time()
        resp = requests.post(f'{API_BASE_URL}/api/diagnoses', headers=headers, json=payload, timeout=120)
        dt = time.time() - t0
        latencies.append(dt)

        if resp.status_code >= 400:
            coverage_scores.append(0.0)
            continue

        data = resp.json()
        output_obj = {
            'results': data.get('results', []),
            'recommendations': data.get('recommendations', []),
        }
        if is_valid_response(output_obj):
            valid += 1
        coverage_scores.append(keyword_coverage(item, output_obj))

    n = max(1, len(cases))
    return {
        'json_valid_rate': valid / n,
        'keyword_coverage': sum(coverage_scores) / n,
        'latency_sec': sum(latencies) / n,
    }


In [None]:
cases = load_cases(EVAL_CASES)
base_metrics = run_batch(cases, BASE_PROFILE)
finetuned_metrics = run_batch(cases, FINETUNED_PROFILE)

metrics = {
    'labels': ['base', 'finetuned'],
    'json_valid_rate': [round(base_metrics['json_valid_rate'], 4), round(finetuned_metrics['json_valid_rate'], 4)],
    'keyword_coverage': [round(base_metrics['keyword_coverage'], 4), round(finetuned_metrics['keyword_coverage'], 4)],
    'latency_sec': [round(base_metrics['latency_sec'], 4), round(finetuned_metrics['latency_sec'], 4)]
}

OUT_METRICS.parent.mkdir(parents=True, exist_ok=True)
OUT_METRICS.write_text(json.dumps(metrics, ensure_ascii=False, indent=2), encoding='utf-8')
print('Saved metrics to', OUT_METRICS)
print(json.dumps(metrics, ensure_ascii=False, indent=2))