In [2]:
import numpy as np
import pandas as pd
import os
def find_project_root(marker=".git"):
    """
    从当前目录向上搜索，直到找到包含指定标记目录的父目录，将其视为项目根目录。

    Args:
        marker: 用于标识项目根目录的子目录名称列表，默认为 ['.git', '.idea']。
    Returns:
        str: 项目根目录的绝对路径，如果未找到标记目录，则返回 None。
    """
    # 获取当前脚本或 Notebook 的工作目录
    # 在 Jupyter Notebook 中，os.getcwd() 通常是 Notebook 文件所在的目录
    
    current_dir = os.path.abspath(os.getcwd())
    while True:

        # 检查当前目录是否包含任何一个标记目录
        if os.path.exists(os.path.join(current_dir, marker)):
            return current_dir  # 找到根目录，返回

        # 向上移动到父目录
        parent_dir = os.path.dirname(current_dir)
        current_dir = parent_dir


In [3]:
project_root_path = find_project_root()

file_names = ['附件1.xlsx', '附件2.xlsx', '附件3.xlsx']
data_subdirectory = 'data'
file_path = [os.path.join(project_root_path, data_subdirectory, name) for name in file_names]

In [4]:
df_man = pd.read_excel(file_path[0])
df_woman = pd.read_excel(file_path[1])
df_food = pd.read_excel(file_path[2])

In [8]:
df_food.columns

Index(['Unnamed: 0', '餐次', '食物名称', '主要成分', '食物编码', '可食部 (克/份)', '价格 (元/份)',
       '是否可半份', '能量 (kcal)', '能量 (kJ)', '蛋白质', '脂肪', '碳水化合物', '维生素A', '维生素C',
       '钙', '铁', '锌', '异亮氨酸', '亮氨酸', '赖氨酸', '含硫氨基酸(SAA)_Total',
       '含硫氨基酸(SAA)_蛋氨酸', '含硫氨基酸(SAA)_胱氨酸', '芳香族氨基酸(AAA)_Total',
       '芳香族氨基酸(AAA)_苯丙氨酸', '芳香族氨基酸(AAA)_酪氨酸', '苏氨酸', '色氨酸', '缬氨酸'],
      dtype='object')

In [5]:
standards = {
    "daily_variety_min": 12,
    "energy_target": {"男": 2400, "女": 1900},
    "energy_acceptable_range_percent": 10,
    "meal_ratio_range": {
        "早餐": (0.25, 0.35),
        "午餐": (0.30, 0.40),
        "晚餐": (0.30, 0.40),
    },
    "micro_target": {
        "男": {
            "钙": 800,
            "铁": 12,
            "锌": 12.5,
            "维生素A": 800,
            "维生素B1": 1.4,
            "维生素B2": 1.4,
            "维生素C": 100,
        },
        "女": {
            "钙": 800,
            "铁": 20,
            "锌": 7.5,
            "维生素A": 700,
            "维生素B1": 1.2,
            "维生素B2": 1.2,
            "维生素C": 100,
        },
    },
    "macro_ratio_range": {
        "蛋白质": (0.10, 0.15),
        "脂肪": (0.20, 0.30),
        "碳水化合物": (0.50, 0.65),
    },
    "energy_conversion": {"蛋白质": 4, "脂肪": 9, "碳水化合物": 4},  # No Fiber
    "aas_ref_pattern": {
        "异亮氨酸": 40,
        "亮氨酸": 70,
        "赖氨酸": 55,
        "含硫氨基酸": 35,
        "芳香族氨基酸": 60,
        "苏氨酸": 40,
        "色氨酸": 10,
        "缬氨酸": 50,
    },
    "aas_eval_criteria": {
        "不合理": (0, 60),
        "不够合理": (60, 80),
        "比较合理": (80, 90),
        "合理": (90, float("inf")),
    },
    "five_major_food_names": [
        "谷、薯类",
        "蔬菜、菌藻、水果类",
        "畜、禽、鱼、蛋类及制品",
        "奶、干豆、坚果、种子类及制品",
        "植物油类",
    ],
    "code_prefix_to_major_group": {
        "01": "谷、薯类",
        "02": "谷、薯类",
        "04": "蔬菜、菌藻、水果类",
        "05": "蔬菜、菌藻、水果类",
        "06": "蔬菜、菌藻、水果类",
        "08": "畜、禽、鱼、蛋类及制品",
        "09": "畜、禽、鱼、蛋类及制品",
        "12": "畜、禽、鱼、蛋类及制品",
        "11": "畜、禽、鱼、蛋类及制品",
        "10": "奶、干豆、坚果、种子类及制品",
        "03": "奶、干豆、坚果、种子类及制品",
        "07": "奶、干豆、坚果、种子类及制品",
        "19": "植物油类",
    },
    "weight_col": "食物重量(克)",
    "meal_col": "餐次",
    "meal_order": ["早餐", "午餐", "晚餐"],
}
# 更新氨基酸列名，确保与df_food列名匹配
standards["aas_cols"] = {
    "异亮氨酸": "异亮氨酸",
    "亮氨酸": "亮氨酸",
    "赖氨酸": "赖氨酸",
    "含硫氨基酸(SAA)_Total": "含硫氨基酸(SAA)_Total",
    "芳香族氨基酸(AAA)_Total": "芳香族氨基酸(AAA)_Total",
    "苏氨酸": "苏氨酸",
    "色氨酸": "色氨酸",
    "缬氨酸": "缬氨酸",
}
# 更新微量营养素名称，确保与df_food列名匹配
standards["micro_target_cols"] = {
    "钙": "钙", "铁": "铁", "锌": "锌",
    "维生素A": "维生素A", "维生素C": "维生素C"
}




In [6]:
df_food.columns

Index(['Unnamed: 0', '餐次', '食物名称', '主要成分', '食物编码', '可食部 (克/份)', '价格 (元/份)',
       '是否可半份', '能量 (kcal)', '能量 (kJ)', '蛋白质', '脂肪', '碳水化合物', '维生素A', '维生素C',
       '钙', '铁', '锌', '异亮氨酸', '亮氨酸', '赖氨酸', '含硫氨基酸(SAA)_Total',
       '含硫氨基酸(SAA)_蛋氨酸', '含硫氨基酸(SAA)_胱氨酸', '芳香族氨基酸(AAA)_Total',
       '芳香族氨基酸(AAA)_苯丙氨酸', '芳香族氨基酸(AAA)_酪氨酸', '苏氨酸', '色氨酸', '缬氨酸'],
      dtype='object')

In [10]:
list(standards["micro_target_cols"].values())

['钙', '铁', '锌', '维生素A', '维生素C']

In [None]:
# 氨基酸 g/100g -> mg/100g

# 计算aas
amino_acid_cols_g_per_100g = list(standards["aas_cols"].values())

# 对氨基酸进行单位转换,存储在新列中
for col in amino_acid_cols_g_per_100g:
    if col in df_food.columns:
        df_food[f'{col}_mg_per_100g'] = df_food[col] * 1000
        print(f"转换单位 {col} 从 g/100g 到 mg/100g.")
    else:
        print(f"Warning: Amino acid column '{col}' not found in df_food.")
        # 处理确实值
        

# 计算每份食物的各种营养素含量

# 定义需要计算 per_serving 含量的列
# 营养素和非营养素
nutrient_cols_per_100g = ['能量 (kcal)', '蛋白质', '脂肪', '碳水化合物'] + list(standards["micro_target_cols"].values())
a

# 非营养素列名
mino_acid_cols_mg_per_100g = [f'{col}_mg_per_100g' for col in amino_acid_cols_g_per_100g if f'{col}_mg_per_100g' in df_food.columns] # Use the new mg/100g columns

all_nutrient_cols_per_100g = nutrient_cols_per_100g + amino_acid_cols_mg_per_100g

# Create new columns for per_serving content
for col_100g in all_nutrient_cols_per_100g:
     # Ensure the corresponding per_100g column exists
    if col_100g in df_food.columns or col_100g.replace('_mg_per_100g', '') in df_food.columns: # Check original or converted
        col_serving = col_100g.replace(' (kcal)', '').replace(' (g)', '').replace(' (mg)', '').replace(' (μg)', '') + '_per_serving'
        # Use the correct 100g column (original for energy, macro, micro; converted for AA)
        source_col = col_100g if col_100g in df_food.columns else col_100g.replace('_mg_per_100g', '')
        df_food[col_serving] = df_food[source_col] * (df_food[standards["weight_col"]] / 100)
        # Correct column name for per_serving after calculation for easier use
        if '_mg_per_100g_per_serving' in col_serving:
             df_food.rename(columns={col_serving: col_serving.replace('_mg_per_100g_per_serving', '_mg_per_serving')}, inplace=True)

print("\n已计算每份食物的营养素含量 (per_serving)。")
print("新增列示例:", [col for col in df_food.columns if '_per_serving' in col][:5])


# 获取食物列表和相关数据字典
food_ids = df_food['食物编码'].tolist()
food_names = df_food['食物名称'].tolist()
food_meals = df_food[standards["meal_col"]].tolist()
food_weights_per_serving = df_food[standards["weight_col"]].tolist()
food_half_portion_allowed = df_food[standards["half_portion_col"]].tolist()

# 创建以食物编码为键的数据字典，方便模型中查找
food_info_dict = df_food.set_index('食物编码').to_dict(orient='index')

# 创建以营养素名称为键，食物编码为内层键的营养素含量字典
# 使用 per_serving 的列
nutrient_content_per_serving = {}
for col in [col for col in df_food.columns if '_per_serving' in col]:
    nutrient_name = col.replace('_per_serving', '')
    nutrient_content_per_serving[nutrient_name] = df_food.set_index('食物编码')[col].to_dict()

# 创建一个字典，用于快速查找食物是否可半份
is_half_portion_allowed = df_food.set_index('食物编码')[standards["half_portion_col"]].apply(lambda x: x == '是').to_dict()

# 创建餐次与食物编码的映射
food_by_meal = {meal: [] for meal in standards["meal_order"]}
for index, row in df_food.iterrows():
    meal = row[standards["meal_col"]]
    food_id = row['食物编码']
    if meal in food_by_meal:
        food_by_meal[meal].append(food_id)

['Unnamed: 0',
 '餐次',
 '食物名称',
 '主要成分',
 '食物编码',
 '可食部(克/份)',
 '食用份数',
 '碳水化合物 (g/100g)',
 '蛋白质 (g/100g)',
 '脂肪 (g/100g)',
 '钙 (mg/100g)',
 '铁 (mg/100g)',
 '锌 (mg/100g)',
 '维生素A (μg/100g)',
 '维生素B1 (mg/100g)',
 '维生素B2 (mg/100g)',
 '维生素C (mg/100g)',
 '异亮氨酸 (g/100g)',
 '亮氨酸 (g/100g)',
 '赖氨酸 (g/100g)',
 '含硫氨基酸 (g/100g)',
 '芳香族氨基酸 (g/100g)',
 '苏氨酸 (g/100g)',
 '色氨酸 (g/100g)',
 '缬氨酸 (g/100g)']