In [6]:
import numpy as np
import random
import json

data = [
    {
        'name': '体温',
        'attribute': 'body temperature',
        'unit': 'degree Celsius',
        'gt_lb': 36.0,
        'gt_ub': 37.0,
        'lb': 56.0,
        'ub': 57.2,
        'description': 'normal',
    },
    {
        'name': '舒张血压',
        'attribute': 'diastolic blood pressure',
        'unit': 'mmHg',
        'gt_lb': 60,
        'gt_ub': 79,
        'lb': 10,
        'ub': 30,
        'description': 'normal',
    },
    {
        'name': '收缩血压',
        'attribute': 'systolic blood pressure',
        'unit': 'mmHg',
        'gt_lb': 90,
        'gt_ub': 119,
        'lb': 150,
        'ub': 170,
        'description': 'normal',
    },
    {
        'name': '心率',
        'attribute': 'heart rate',
        'unit': 'bpm',
        'gt_lb': 60,
        'gt_ub': 100,
        'lb': 30,
        'ub': 50,
        'description': 'normal',
    },
    {
        'name': '呼吸频率',
        'attribute': 'respiratory rate',
        'unit': 'breaths/min',
        'gt_lb': 12,
        'gt_ub': 20,
        'lb': 51,
        'ub': 90,
        'description': 'normal',
    },
    {
        'name': '男性血红蛋白',
        'attribute': 'hemoglobin for male',
        'unit': 'g/L',
        'gt_lb': 130,
        'gt_ub': 175,
        'lb': 10,
        'ub': 70,
        'description': 'normal',
    },
    {
        'name': '女性血红蛋白',
        'attribute': 'hemoglobin for female',
        'unit': 'g/L',
        'gt_lb': 120,
        'gt_ub': 160,
        'lb': 180,
        'ub': 210,
        'description': 'normal',
    },
    {
        'name': 'BMI',
        'attribute': 'body mass index',
        'unit': 'kg/m²',
        'gt_lb': 18.5,
        'gt_ub': 24.9,
        'lb': 7.5,
        'ub': 15.8,
        'description': 'normal',
    },
    {
        'name': '脉搏',
        'attribute': 'pulse rate',
        'unit': 'bpm',
        'gt_lb': 60,
        'gt_ub': 100,
        'lb': 20,
        'ub': 50,
        'description': 'normal',
    },
    {
        'name': '血小板计数',
        'attribute': 'platelet count',
        'unit': '×10^9/L',
        'gt_lb': 100,
        'gt_ub': 300,
        'lb': 400,
        'ub': 500,
        'description': 'normal',
    },
    {
        'name': '白细胞计数',
        'attribute': 'white blood cell count',
        'unit': '×10^9/L',
        'gt_lb': 4,
        'gt_ub': 10,
        'lb': 12,
        'ub': 20,
        'description': 'normal',
    },
    {
        'name': '空腹血糖',
        'attribute': 'fasting blood glucose',
        'unit': 'mmol/L',
        'gt_lb': 3.9,
        'gt_ub': 6.1,
        'lb': 2.1,
        'ub': 3.8,
        'description': 'normal',
    },
]
max_range_num = 20

benchmarks = []
reason_prompt = "Analyse the medical attribute. A person's {attribute} is {num} {unit}.\n\nAnalysis:"

for d in data:
    if d.get('name'):
        del d['name']
    item = {}
    item['request'] = d
    neighbor_attribute = d['attribute']
    
    while neighbor_attribute == d['attribute']: 
        neighbor = random.choice(data)
        neighbor_attribute = neighbor['attribute']
    
    if isinstance(d['lb'], int):
        target_range = list(range(d['lb'],d['ub']))
        target_range = random.sample(target_range, k=min(max_range_num, len(target_range)))
        list.sort(target_range)
        llb = max(d['lb'] - max_range_num//2, 0)
        uub = d['ub'] + max_range_num//2
        neighbor_range = list(range(llb,d['lb'])) +  list(range(d['ub'],uub))
        list.sort(neighbor_range)
    elif isinstance(d['lb'], float):
        target_range = [round(i,1) for i in list(np.arange(d['lb'],d['ub'], 0.1))]
        target_range = random.sample(target_range, k=min(max_range_num, len(target_range)))
        list.sort(target_range)
        llb = max(d['lb'] - max_range_num//2*0.1, 0)
        uub = d['ub'] + max_range_num//2*0.1
        neighbor_range = [round(i,1) for i in list(np.arange(llb,d['lb'], 0.1))] +  [round(i,1) for i in list(np.arange(d['ub'],uub, 0.1))]
        list.sort(neighbor_range)
    else:
        raise ValueError('lb should be int or float')
    
    item['target_range'] = target_range
    
    item['neighborhood_range'] = neighbor_range
    
    item['reason_prompt'] = reason_prompt
    
    item['rewrite_prompts'] = [
        "The {description} range for {attribute} should fall between",
        "The {description} range for the {attribute} is",
        "The {attribute} is considered {description} when it falls within the range of.",
        "For the {attribute} to be within the {description} range, it should typically range from.",
        "A normal {attribute} value would typically be between.",
        "The {description} range for the {attribute} is defined as."
    ]
    
    benchmarks.append(item)

json.dump(benchmarks, open('numeric_attributes.json', 'w'), indent=4)
