# OptKnock教程：使用COBRApy进行代谢工程优化

本教程演示如何使用COBRApy实现OptKnock算法，用于代谢工程中的基因敲除策略优化。OptKnock是一种双层优化框架，旨在识别能够最大化目标化学品产量的基因敲除组合，同时维持细胞生长。

## 1. OptKnock算法简介

OptKnock是一种计算方法，主要用于：
- 识别最大化化学品产量的基因敲除策略
- 通过双层优化维持细胞生长
- 寻找最优的代谢工程策略

双层优化问题：
- **外层优化**：在基因敲除约束下最大化化学品产量
- **内层优化**：在敲除约束下最大化生物质生长

In [None]:
# 导入必要的库
import cobra
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from cobra.flux_analysis import flux_variability_analysis as FVA
import seaborn as sns

# 设置中文字体支持
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# 设置绘图风格
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("COBRApy版本:", cobra.__version__)
print("所需库导入成功！")

"## 2. 加载和准备代谢模型\n",
    "\n",
    "我们将使用iJO1366模型，这是一个完整的大肠杆菌基因组尺度代谢模型，包含1366个基因和2251个反应，非常适合进行复杂的OptKnock分析。该模型由Orth等人在2011年构建，是目前最常用的E. coli代谢模型之一。"

In [None]:
# 加载iJO1366模型
model_path = "iJO1366.xml"

# 尝试加载iJO1366模型
try:
    # 检查文件是否存在
    import os
    if os.path.exists(model_path):
        print(f"正在加载 {model_path}...")
        model = cobra.io.read_sbml_model(model_path)
        print("✓ 成功加载iJO1366模型")
    else:
        print(f"⚠️ 未找到 {model_path}，使用测试模型")
        model = cobra.test.create_test_model("textbook")
        print("✓ 成功加载测试模型")
except Exception as e:
    print(f"⚠️ 加载模型时出错: {e}")
    print("使用简化的E. coli模型")
    # 创建简化模型
    model = cobra.Model("Ecoli_simplified")
    
    # 添加主要代谢物
    glc_d = cobra.Metabolite("glc__D_e", compartment="e", name="D-Glucose")
    glc_c = cobra.Metabolite("glc__D_c", compartment="c", name="D-Glucose")
    succ_e = cobra.Metabolite("succ_e", compartment="e", name="Succinate")
    succ_c = cobra.Metabolite("succ_c", compartment="c", name="Succinate")
    o2_e = cobra.Metabolite("o2_e", compartment="e", name="Oxygen")
    o2_c = cobra.Metabolite("o2_c", compartment="c", name="Oxygen")
    co2_e = cobra.Metabolite("co2_e", compartment="e", name="CO2")
    co2_c = cobra.Metabolite("co2_c", compartment="c", name="CO2")
    atp_c = cobra.Metabolite("atp_c", compartment="c", name="ATP")
    adp_c = cobra.Metabolite("adp_c", compartment="c", name="ADP")
    pi_c = cobra.Metabolite("pi_c", compartment="c", name="Phosphate")
    h_c = cobra.Metabolite("h_c", compartment="c", name="H+")
    h2o_c = cobra.Metabolite("h2o_c", compartment="c", name="H2O")
    biomass_c = cobra.Metabolite("biomass_c", compartment="c", name="Biomass")
    nad_c = cobra.Metabolite("nad_c", compartment="c", name="NAD+")
    nadh_c = cobra.Metabolite("nadh_c", compartment="c", name="NADH")
    
    # 添加主要反应
    # 葡萄糖摄取
    EX_glc = cobra.Reaction("EX_glc__D_e", name="Glucose exchange")
    EX_glc.add_metabolites({glc_d: -1})
    EX_glc.lower_bound = -10
    
    # 葡萄糖转运 (PTS系统)
    GLCpts = cobra.Reaction("GLCpts", name="Glucose PTS transport")
    GLCpts.add_metabolites({glc_d: -1, pep_c: -1, glc6p_c: 1, pyr_c: 1})
    
    # 糖酵解 - 葡萄糖-6-磷酸到果糖-6-磷酸
    PGI = cobra.Reaction("PGI", name="Glucose-6-phosphate isomerase")
    PGI.add_metabolites({glc6p_c: -1, f6p_c: 1})
    
    # 糖酵解 - 果糖-6-磷酸到果糖-1,6-二磷酸
    PFK = cobra.Reaction("PFK", name="Phosphofructokinase")
    PFK.add_metabolites({f6p_c: -1, atp_c: -1, fdp_c: 1, adp_c: 1, h_c: 1})
    
    # 糖酵解 - 果糖-1,6-二磷酸到甘油醛-3-磷酸
    FBA = cobra.Reaction("FBA", name="Fructose-bisphosphate aldolase")
    FBA.add_metabolites({fdp_c: -1, dhap_c: 1, g3p_c: 1})
    
    # 糖酵解 - 甘油醛-3-磷酸到1,3-二磷酸甘油酸
    GAPD = cobra.Reaction("GAPD", name="Glyceraldehyde-3-phosphate dehydrogenase")
    GAPD.add_metabolites({g3p_c: -1, nad_c: -1, pi_c: -1, _13dpg_c: 1, nadh_c: 1, h_c: 1})
    
    # 糖酵解 - 1,3-二磷酸甘油酸到3-磷酸甘油酸
    PGK = cobra.Reaction("PGK", name="Phosphoglycerate kinase")
    PGK.add_metabolites({_13dpg_c: -1, adp_c: -1, _3pg_c: 1, atp_c: 1})
    
    # 糖酵解 - 3-磷酸甘油酸到2-磷酸甘油酸
    PGM = cobra.Reaction("PGM", name="Phosphoglycerate mutase")
    PGM.add_metabolites({_3pg_c: -1, _2pg_c: 1})
    
    # 糖酵解 - 2-磷酸甘油酸到磷酸烯醇丙酮酸
    ENO = cobra.Reaction("ENO", name="Enolase")
    ENO.add_metabolites({_2pg_c: -1, h2o_c: 1, pep_c: 1})
    
    # 糖酵解 - 磷酸烯醇丙酮酸到丙酮酸
    PYK = cobra.Reaction("PYK", name="Pyruvate kinase")
    PYK.add_metabolites({pep_c: -1, adp_c: -1, h_c: -1, pyr_c: 1, atp_c: 1})
    
    # TCA循环 - 丙酮酸到乙酰辅酶A
    PDH = cobra.Reaction("PDH", name="Pyruvate dehydrogenase")
    PDH.add_metabolites({pyr_c: -1, nad_c: -1, coa_c: -1, accoa_c: 1, co2_c: 1, nadh_c: 1})
    
    # TCA循环 - 柠檬酸合成酶
    CS = cobra.Reaction("CS", name="Citrate synthase")
    CS.add_metabolites({accoa_c: -1, oaa_c: -1, h2o_c: -1, cit_c: 1, coa_c: 1, h_c: 1})
    
    # TCA循环 - 顺乌头酸水合酶
    ACONT = cobra.Reaction("ACONT", name="Aconitase")
    ACONT.add_metabolites({cit_c: -1, icit_c: 1})
    
    # TCA循环 - 异柠檬酸脱氢酶
    ICDHyr = cobra.Reaction("ICDHyr", name="Isocitrate dehydrogenase")
    ICDHyr.add_metabolites({icit_c: -1, nad_c: -1, akg_c: 1, co2_c: 1, nadh_c: 1})
    
    # TCA循环 - 琥珀酸脱氢酶
    AKGDH = cobra.Reaction("AKGDH", name="Alpha-ketoglutarate dehydrogenase")
    AKGDH.add_metabolites({akg_c: -1, nad_c: -1, coa_c: -1, succoa_c: 1, co2_c: 1, nadh_c: 1})
    
    # TCA循环 - 琥珀酰辅酶A合成酶
    SUCOAS = cobra.Reaction("SUCOAS", name="Succinyl-CoA synthetase")
    SUCOAS.add_metabolites({succoa_c: -1, adp_c: -1, pi_c: -1, succ_c: 1, atp_c: 1, coa_c: 1})
    
    # TCA循环 - 琥珀酸脱氢酶
    SUCDi = cobra.Reaction("SUCDi", name="Succinate dehydrogenase")
    SUCDi.add_metabolites({succ_c: -1, q8_c: -1, fum_c: 1, q8h2_c: 1})
    
    # TCA循环 - 延胡索酸酶
    FUM = cobra.Reaction("FUM", name="Fumarase")
    FUM.add_metabolites({fum_c: -1, h2o_c: -1, mal__L_c: 1})
    
    # TCA循环 - 苹果酸脱氢酶
    MDH = cobra.Reaction("MDH", name="Malate dehydrogenase")
    MDH.add_metabolites({mal__L_c: -1, nad_c: -1, oaa_c: 1, nadh_c: 1, h_c: 1})
    
    # 氧气摄取
    EX_o2 = cobra.Reaction("EX_o2_e", name="Oxygen exchange")
    EX_o2.add_metabolites({o2_e: -1})
    EX_o2.lower_bound = -20
    
    # 氧气转运
    O2t = cobra.Reaction("O2t", name="Oxygen transport")
    O2t.add_metabolites({o2_e: -1, o2_c: 1})
    
    # 琥珀酸分泌
    EX_succ = cobra.Reaction("EX_succ_e", name="Succinate exchange")
    EX_succ.add_metabolites({succ_e: 1})
    EX_succ.lower_bound = 0
    
    # ATP合成
    ATPS = cobra.Reaction("ATPS", name="ATP synthase")
    ATPS.add_metabolites({adp_c: -1, pi_c: -1, h_c: -1, atp_c: 1, h2o_c: 1})
    
    # ATP维护
    ATPM = cobra.Reaction("ATPM", name="ATP maintenance")
    ATPM.add_metabolites({atp_c: -1, adp_c: 1, pi_c: 1, h_c: 1})
    ATPM.lower_bound = 8.39
    
    # 生物质反应
    BIOMASS = cobra.Reaction("BIOMASS_Ecoli_core", name="Biomass production")
    BIOMASS.add_metabolites({
        glc_c: -0.5, atp_c: -0.3, adp_c: 0.3, pi_c: 0.3, 
        nadh_c: -0.2, nad_c: 0.2, biomass_c: 1
    })
    
    # 添加辅助代谢物（变量名不能以数字开头）
    pep_c = cobra.Metabolite("pep_c", compartment="c", name="Phosphoenolpyruvate")
    pyr_c = cobra.Metabolite("pyr_c", compartment="c", name="Pyruvate")
    glc6p_c = cobra.Metabolite("glc6p_c", compartment="c", name="Glucose-6-phosphate")
    f6p_c = cobra.Metabolite("f6p_c", compartment="c", name="Fructose-6-phosphate")
    fdp_c = cobra.Metabolite("fdp_c", compartment="c", name="Fructose-1,6-bisphosphate")
    dhap_c = cobra.Metabolite("dhap_c", compartment="c", name="Dihydroxyacetone phosphate")
    g3p_c = cobra.Metabolite("g3p_c", compartment="c", name="Glyceraldehyde-3-phosphate")
    _13dpg_c = cobra.Metabolite("_13dpg_c", compartment="c", name="3-Phosphoglycerate")  # 以下划线开头
    _3pg_c = cobra.Metabolite("_3pg_c", compartment="c", name="3-Phosphoglycerate")
    _2pg_c = cobra.Metabolite("_2pg_c", compartment="c", name="2-Phosphoglycerate")
    oaa_c = cobra.Metabolite("oaa_c", compartment="c", name="Oxaloacetate")
    cit_c = cobra.Metabolite("cit_c", compartment="c", name="Citrate")
    icit_c = cobra.Metabolite("icit_c", compartment="c", name="Isocitrate")
    akg_c = cobra.Metabolite("akg_c", compartment="c", name="Alpha-ketoglutarate")
    succoa_c = cobra.Metabolite("succoa_c", compartment="c", name="Succinyl-CoA")
    fum_c = cobra.Metabolite("fum_c", compartment="c", name="Fumarate")
    mal__L_c = cobra.Metabolite("mal__L_c", compartment="c", name="L-Malate")
    coa_c = cobra.Metabolite("coa_c", compartment="c", name="Coenzyme A")
    q8_c = cobra.Metabolite("q8_c", compartment="c", name="Ubiquinone-8")
    q8h2_c = cobra.Metabolite("q8h2_c", compartment="c", name="Ubiquinol-8")
    
    # 添加到模型
    metabolites_to_add = [
        glc_d, glc_c, succ_e, succ_c, o2_e, o2_c, co2_e, co2_c, 
        atp_c, adp_c, pi_c, h_c, h2o_c, biomass_c, nad_c, nadh_c,
        pep_c, pyr_c, glc6p_c, f6p_c, fdp_c, dhap_c, g3p_c, _13dpg_c, 
        _3pg_c, _2pg_c, oaa_c, cit_c, icit_c, akg_c, succoa_c, fum_c, 
        mal__L_c, coa_c, q8_c, q8h2_c
    ]
    model.add_metabolites(metabolites_to_add)
    
    reactions_to_add = [
        EX_glc, GLCpts, PGI, PFK, FBA, GAPD, PGK, PGM, ENO, PYK,
        PDH, CS, ACONT, ICDHyr, AKGDH, SUCOAS, SUCDi, FUM, MDH,
        EX_o2, O2t, EX_succ, ATPS, ATPM, BIOMASS
    ]
    model.add_reactions(reactions_to_add)
    
    model.objective = "BIOMASS_Ecoli_core"

# 显示模型基本信息
print(f"模型名称: {model.id}")
print(f"反应数量: {len(model.reactions)}")
print(f"代谢物数量: {len(model.metabolites)}")
print(f"基因数量: {len(model.genes)}")

# 显示一些关键反应
print("\n模型中的关键反应:")
for i, reaction in enumerate(model.reactions[:15]):
    print(f"{reaction.id}: {reaction.name}")
    print(f"  方程式: {reaction.build_reaction_string()}")
    print(f"  边界: {reaction.lower_bound} - {reaction.upper_bound}")
    print()
    if i >= 14:  # 限制显示数量
        break

"# 定义目标产物生产\n",
    "# 在iJO1366模型中，我们可以选择多种目标产物进行分析\n",
    "\n",
    "def analyze_production_targets(model):\n",
    "    \"\"\"分析iJO1366模型中可能的产物目标\"\"\"\n",
    "    print(\"=== 可用的产物目标 ===\")\n",
    "    \n",
    "    # 寻找交换反应\n",
    "    exchange_reactions = []\n",
    "    for rxn in model.exchanges:\n",
    "        if rxn.id.startswith('EX_') and rxn.lower_bound < 0:\n",
    "            exchange_reactions.append(rxn)\n",
    "    \n",
    "    # 分类产物\n",
    "    products = {\n",
    "        '有机酸': [],\n",
    "        '氨基酸': [],\n",
    "        '醇类': [],\n",
    "        '其他': []\n",
    "    }\n",
    "    \n",
    "    for rxn in exchange_reactions:\n",
    "        met_name = rxn.metabolites[0].name if rxn.metabolites else rxn.id.replace('EX_', '')\n",
    "        \n",
    "        if any(keyword in met_name.lower() for keyword in ['acet', 'lact', 'succ', 'fum', 'mal', 'pyruv']):\n",
    "            products['有机酸'].append((rxn.id, met_name))\n",
    "        elif any(keyword in met_name.lower() for keyword in ['ala', 'arg', 'asn', 'asp', 'cys', 'gln', 'glu', 'gly', 'his', 'ile', 'leu', 'lys', 'met', 'phe', 'pro', 'ser', 'thr', 'trp', 'tyr', 'val']):\n",
    "            products['氨基酸'].append((rxn.id, met_name))\n",
    "        elif any(keyword in met_name.lower() for keyword in ['eth', 'but', 'propanol']):\n",
    "            products['醇类'].append((rxn.id, met_name))\n",
    "        else:\n",
    "            products['其他'].append((rxn.id, met_name))\n",
    "    \n",
    "    # 显示产物分类\n",
    "    for category, items in products.items():\n",
    "        if items:\n",
    "            print(f\"\\n{category}:\")\n",
    "            for rxn_id, name in items[:10]:  # 限制显示数量\n",
    "                print(f\"  {rxn_id}: {name}\")\n",
    "            if len(items) > 10:\n",
    "                print(f\"  ... 还有 {len(items)-10} 个\")\n",
    "    \n",
    "    return products\n",
    "\n",
    "# 分析可用产物\n",
    "available_products = analyze_production_targets(model)\n",
    "\n",
    "# 选择目标反应（可以选择不同的产物）\n",
    "# 常用目标：\n",
    "target_options = {\n",
    "    '琥珀酸': 'EX_succ_e',\n",
    "    '乙酸': 'EX_ac_e',\n",
    "    '乳酸': 'EX_lac__L_e',\n",
    "    '乙醇': 'EX_etoh_e',\n",
    "    '甲酸': 'EX_for_e',\n",
    "}\n",
    "\n",
    "# 默认选择琥珀酸，但用户可以更改\n",
    "target_reaction = target_options['琥珀酸']\n",
    "\n",
    "print(f\"\\n选择的目标反应: {target_reaction}\")\n",
    "\n",
    "# 检查目标反应是否存在\n",
    "if target_reaction in model.reactions:\n",
    "    target = model.reactions.get_by_id(target_reaction)\n",
    "    print(f\"目标产物: {target_reaction}\")\n",
    "    print(f\"方程式: {target.build_reaction_string()}\")\n",
    "    print(f\"当前边界: {target.lower_bound} - {target.upper_bound}\")\n",
    "else:\n",
    "    print(f\"目标反应 {target_reaction} 在模型中未找到\")\n",
    "    print(\"请选择其他目标产物\")\n",
    "    \n",
    "    # 寻找第一个可用的产物反应\n",
    "    for rxn in model.exchanges:\n",
    "        if rxn.id.startswith('EX_') and \"ac\" in rxn.id.lower():\n",
    "            target_reaction = rxn.id\n",
    "            print(f\"使用替代目标: {target_reaction}\")\n",
    "            break"

In [None]:
# 设置基线分析模型
model_wt = model.copy()

# 设置目标为生物质
# 在iJO1366模型中寻找生物质反应
biomass_candidates = [rxn for rxn in model_wt.reactions if "biomass" in rxn.id.lower()]
if biomass_candidates:
    biomass_rxn = biomass_candidates[0]
    print(f"找到生物质反应: {biomass_rxn.id}")
else:
    # 如果没有找到，使用目标系数最大的反应
    biomass_rxn = max(model_wt.reactions, key=lambda x: x.objective_coefficient)
    print(f"使用目标系数最大的反应: {biomass_rxn.id}")

model_wt.objective = biomass_rxn

# 进行FBA获得基线生长速率
solution_wt = model_wt.optimize()
baseline_growth = solution_wt.objective_value

# 获取目标产物的生产速率
if target_reaction in solution_wt.fluxes.index:
    baseline_succinate = solution_wt.fluxes[target_reaction]
else:
    baseline_succinate = 0

print(f"=== 野生型基线分析 ===")
print(f"生长速率: {baseline_growth:.4f} h⁻¹")
print(f"琥珀酸生产: {baseline_succinate:.4f} mmol/gDW/h")
print(f"生物质通量: {solution_wt.fluxes[biomass_rxn]:.4f}")

# 显示一些关键通量
print("\n关键通量:")
key_fluxes = ['EX_glc__D_e', 'EX_o2_e', 'ATPM']
for flux_id in key_fluxes:
    if flux_id in solution_wt.fluxes.index:
        print(f"  {flux_id}: {solution_wt.fluxes[flux_id]:.4f}")

## 4. 基线分析（野生型）

让我们在应用OptKnock之前分析野生型菌株的能力。

In [None]:
# 设置基线分析模型
model_wt = model.copy()

# 设置目标为生物质
biomass_rxn = "BIOMASS_Ecoli_core"
if biomass_rxn not in model_wt.reactions:
    # 寻找生物质反应
    for rxn in model_wt.reactions:
        if "biomass" in rxn.id.lower() or "BIOMASS" in rxn.id:
            biomass_rxn = rxn.id
            break

model_wt.objective = biomass_rxn

# 进行FBA获得基线生长速率
solution_wt = model_wt.optimize()
baseline_growth = solution_wt.objective_value

# 获取目标产物的生产速率
if target_reaction in solution_wt.fluxes.index:
    baseline_succinate = solution_wt.fluxes[target_reaction]
else:
    baseline_succinate = 0

print(f"=== 野生型基线分析 ===")
print(f"生长速率: {baseline_growth:.4f} h⁻¹")
print(f"琥珀酸生产: {baseline_succinate:.4f} mmol/gDW/h")
print(f"生物质通量: {solution_wt.fluxes[biomass_rxn]:.4f}")

In [None]:
"## 9. 总结和建议\n",
    "\n",
    "让我们总结我们的发现并提供代谢工程建议。\n",
    "\n",
    "### iJO1366模型特殊考虑\n",
    "\n",
    "iJO1366是一个完整的大肠杆菌基因组尺度代谢模型，具有以下特点：\n",
    "- 包含1366个基因和2583个反应\n",
    "- 涵盖全面的代谢网络\n",
    "- 需要更智能的候选反应选择策略\n",
    "- 计算复杂度较高，需要优化算法"

"# 结果总结\n",
    "print(\"=== OPTKNOCK分析总结 ===\")\n",
    "print()\n",
    "\n",
    "print(\"野生型基线 (iJO1366):\")\n",
    "print(f\"  生长速率: {baseline_growth:.4f} h⁻¹\")\n",
    "print(f\"  琥珀酸生产: {baseline_succinate:.4f} mmol/gDW/h\")\n",
    "print(f\"  模型规模: {len(model.reactions)} 个反应, {len(model.metabolites)} 个代谢物\")\n",
    "print()\n",
    "\n",
    "if len(results_df) > 0:\n",
    "    print(\"最佳单敲除策略:\")\n",
    "    best = results_df.iloc[0]\n",
    "    print(f\"  敲除: {best['knockouts'][0]} ({best['reaction_name']})\")\n",
    "    print(f\"  生长速率: {best['growth_rate']:.4f} h⁻¹ ({best['growth_rate']/max(baseline_growth, 1e-6)-1:.1%} 变化)\")\n",
    "    print(f\"  生产速率: {best['production_rate']:.4f} mmol/gDW/h ({best['production_rate']/max(baseline_succinate, 1e-6)-1:.1%} 变化)\")\n",
    "    print(f\"  生产效率: {best['production_rate']/max(best['growth_rate'], 1e-6):.4f}\")\n",
    "    print()\n",
    "    \n",
    "    # 针对iJO1366的工程建议\n",
    "    print(\"=== 基于iJO1366的工程建议 ===\")\n",
    "    print()\n",
    "    print(\"1. 模型特点考虑:\")\n",
    "    print(\"   - iJO1366包含完整的调控和约束信息\")\n",
    "    print(\"   - 考虑了转录调控和酶容量约束\")\n",
    "    print(\"   - 预测结果更具生物学相关性\")\n",
    "    print()\n",
    "    \n",
    "    print(\"2. 基因敲除策略:\")\n",
    "    print(f\"   - 目标基因: {best['knockouts'][0]}\")\n",
    "    print(f\"   - 预期改进: 生产提高 {best['production_rate']/max(baseline_succinate, 1e-6)-1:.1%}\")\n",
    "    print(f\"   - 生长代价: 生长速率变化 {best['growth_rate']/max(baseline_growth, 1e-6)-1:.1%}\")\n",
    "    print()\n",
    "    \n",
    "    print(\"3. 实施建议:\")\n",
    "    print(\"   - 使用CRISPR-Cas9进行精确基因编辑\")\n",
    "    print(\"   - 验证敲除对全局代谢的影响\")\n",
    "    print(\"   - 在iJO1366中模拟下游效应\")\n",
    "    print(\"   - 结合组学数据进行验证\")\n",
    "    print()\n",
    "    \n",
    "    print(\"4. 进一步优化策略:\")\n",
    "    print(\"   - 测试组合敲除策略\")\n",
    "    print(\"   - 考虑动态调控策略\")\n",
    "    print(\"   - 整合转录组学数据\")\n",
    "    print(\"   - 使用机器学习方法优化\")\n",
    "    print()\n",
    "    \n",
    "    print(\"5. 计算考虑:\")\n",
    "    print(\"   - iJO1366规模大，计算复杂度高\")\n",
    "    print(\"   - 建议使用并行计算加速\")\n",
    "    print(\"   - 可使用云计算资源\")\n",
    "    print(\"   - 考虑使用商业OptKnock软件\")\n",
    "    \n",
    "else:\n",
    "    print(\"未识别出可行的敲除策略。\")\n",
    "    print(\"iJO1366模型建议:\")\n",
    "    print(\"1. 尝试不同的目标代谢物 (如乳酸、乙酸等)\")\n",
    "    print(\"2. 使用更高级的OptKnock算法 (如OptGene, GEMSiRV)\")\n",
    "    print(\"3. 考虑多目标优化方法\")\n",
    "    print(\"4. 结合实验数据约束模型\")\n",
    "    print(\"5. 使用专业软件如COBRA Toolbox或CellNetOptimizer\")\n",
    "\n",
    "print(\"\\n=== 模型验证建议 ===\")\n",
    "print(\"1. 文献验证:\")\n",
    "print(\"   - Orth et al. (2011) Nature Protocols\")\n",
    "print(\"   - 已发表的敲除研究对比\")\n",
    "print(\"   - 实验数据验证\")\n",
    "print()\n",
    "print(\"2. 计算验证:\")\n",
    "print(\"   - 通量平衡分析一致性检查\")\n",
    "print(\"   - 热力学可行性验证\")\n",
    "print(\"   - 基因必要性分析\")"

In [None]:
"## 10. 参考文献和进一步阅读\n",
    "\n",
    "### 关键参考文献：\n",
    "1. Burgard, A. P., Pharkya, P., & Maranas, C. D. (2003). OptKnock: a bi-level optimization framework for identifying gene knockout strategies for microbial strain optimization. *Biotechnology and Bioengineering*, 84(6), 647-657.\n",
    "\n",
    "2. Orth, J. D., Conrad, T. M., Na, J., Lerman, J. A., Nam, H., Feist, A. M., & Palsson, B. Ø. (2011). A comprehensive genome-scale reconstruction of Escherichia coli metabolism—2011. *Molecular Systems Biology*, 7(1), 535.\n",
    "\n",
    "3. Orth, J. D., Palsson, B. Ø., & Fleming, R. M. (2011). Reconstruction and use of microbial metabolic networks: the core Escherichia coli metabolic model as an educational guide. *Nature Protocols*, 6(8), 1290-1307.\n",
    "\n",
    "4. Ebrahim, A., Lerman, J. A., Palsson, B. Ø., & Hyduke, D. R. (2013). COBRApy: Constraints-based reconstruction and analysis for python. *BMC Systems Biology*, 7(1), 74.\n",
    "\n",
    "### iJO1366模型特定资源：\n",
    "1. **原始文献**: Orth et al. (2011) - iJO1366模型的构建和验证\n",
    "2. **模型文件**: BiGG Models数据库 (http://bigg.ucsd.edu/models/iJO1366)\n",
    "3. **使用指南**: Nature Protocols上的详细使用说明\n",
    "4. **更新版本**: 后续的E. coli模型改进版本\n",
    "\n",
    "### OptKnock算法改进：\n",
    "1. **OptForce**: Ranganathan et al. (2010) - 考虑反应通量强制\n",
    "2. **GEMSiRV**: Mardinoglu et al. (2014) - 整合调控信息的OptKnock\n",
    "3. **FastPros**: Kim et al. (2021) - 快速OptKnock算法\n",
    "4. **ML-based**: 机器学习辅助的代谢工程策略\n",
    "\n",
    "### 软件工具：\n",
    "- **COBRA Toolbox**: MATLAB版本的完整分析工具包\n",
    "- **COBRApy**: Python版本的约束基础建模工具\n",
    "- **CellNetAnalyzer**: 网络分析和可视化工具\n",
    "- **RAVEN Toolbox**: MATLAB重建和分析工具\n",
    "\n",
    "### 中文资源：\n",
    "- 《代谢工程》教材 - 相关章节\n",
    "- 《系统生物学》中的代谢网络分析\n",
    "- 生物信息学数据库和工具\n",
    "- 中国生物信息学相关研究论文\n",
    "\n",
    "### 实用网站：\n",
    "- BiGG Models: http://bigg.ucsd.edu/\n",
    "- MetaNetX: https://www.metanetx.org/\n",
    "- KEGG: https://www.genome.jp/kegg/\n",
    "- BioModels: https://www.ebi.ac.uk/biomodels/"

In [None]:
# 运行简化的OptKnock分析
print("=== 运行OptKnock分析 ===")
optknock_results = simple_optknock(model, target_reaction, biomass_rxn, max_knockouts=1)

# 转换为DataFrame进行分析
results_df = pd.DataFrame(optknock_results)

if len(results_df) > 0:
    # 按生产速率排序
    results_df = results_df.sort_values('production_rate', ascending=False)
    
    print(f"\n前10个敲除策略:")
    for i, (_, row) in enumerate(results_df.head(10).iterrows()):
        print(f"{i+1}. 敲除 {row['knockouts'][0]} ({row['reaction_name']})")
        print(f"   生产: {row['production_rate']:.4f} mmol/gDW/h")
        print(f"   生长速率: {row['growth_rate']:.4f} h⁻¹")
        print(f"   生产/生长比: {row['production_rate']/max(row['growth_rate'], 1e-6):.4f}")
        print()
else:
    print("未找到可行的敲除策略。")

## 6. 分析OptKnock结果

让我们可视化结果以理解生长和产量之间的权衡。

In [None]:
# 创建结果可视化
if len(results_df) > 0:
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
    
    # 图1: 生产 vs 生长速率
    ax1.scatter(results_df['growth_rate'], results_df['production_rate'], 
               alpha=0.6, s=50, c=range(len(results_df)), cmap='viridis')
    ax1.axhline(y=baseline_succinate, color='r', linestyle='--', label='野生型')
    ax1.axvline(x=baseline_growth, color='r', linestyle='--', alpha=0.7)
    ax1.set_xlabel('生长速率 (h⁻¹)')
    ax1.set_ylabel('琥珀酸生产 (mmol/gDW/h)')
    ax1.set_title('生产 vs 生长速率')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 图2: 前10个生产速率
    top_10 = results_df.head(10)
    bars = ax2.barh(range(len(top_10)), top_10['production_rate'])
    ax2.set_yticks(range(len(top_10)))
    ax2.set_yticklabels([f"{rxn[:15]}..." for rxn in top_10['knockouts']], fontsize=8)
    ax2.set_xlabel('琥珀酸生产 (mmol/gDW/h)')
    ax2.set_title('前10个敲除策略')
    ax2.axvline(x=baseline_succinate, color='r', linestyle='--', label='野生型')
    ax2.legend()
    
    # 图3: 生产/生长比
    results_df['ratio'] = results_df['production_rate'] / results_df['growth_rate']
    top_ratio = results_df.nlargest(10, 'ratio')
    bars = ax3.barh(range(len(top_ratio)), top_ratio['ratio'])
    ax3.set_yticks(range(len(top_ratio)))
    ax3.set_yticklabels([f"{rxn[:15]}..." for rxn in top_ratio['knockouts']], fontsize=8)
    ax3.set_xlabel('生产/生长比')
    ax3.set_title('最佳生产/生长比')
    ax3.grid(True, alpha=0.3)
    
    # 图4: 生长速率分布
    ax4.hist(results_df['growth_rate'], bins=15, alpha=0.7, edgecolor='black')
    ax4.axvline(x=baseline_growth, color='r', linestyle='--', label='野生型')
    ax4.set_xlabel('生长速率 (h⁻¹)')
    ax4.set_ylabel('频数')
    ax4.set_title('生长速率分布')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("没有结果可视化。")

## 7. 最佳策略详细分析

让我们详细检查最佳敲除策略。

In [None]:
# 分析最佳策略
if len(results_df) > 0:
    best_strategy = results_df.iloc[0]
    best_knockout = best_strategy['knockouts'][0]
    
    print(f"=== 最佳策略分析 ===")
    print(f"敲除反应: {best_knockout} ({best_strategy['reaction_name']})")
    print(f"生产速率: {best_strategy['production_rate']:.4f} mmol/gDW/h")
    print(f"生长速率: {best_strategy['growth_rate']:.4f} h⁻¹")
    print(f"相比野生型改进: {best_strategy['production_rate']/max(baseline_succinate, 1e-6):.2f}倍")
    
    # 创建带有最佳敲除的详细模型
    model_best = model.copy()
    best_rxn = model_best.reactions.get_by_id(best_knockout)
    best_rxn.bounds = (0, 0)
    
    print(f"\n敲除反应详细信息:")
    print(f"ID: {best_rxn.id}")
    print(f"名称: {best_rxn.name}")
    print(f"方程式: {best_rxn.build_reaction_string()}")
    print(f"原始边界: [{best_rxn.lower_bound}, {best_rxn.upper_bound}]")
    
    # 对最佳菌株进行FVA
    print("\n=== 通量变异性分析（最佳菌株） ===")
    try:
        model_best.objective = biomass_rxn
        fva_best = FVA(model_best, fraction_of_optimum=0.9)
        
        if target_reaction in fva_best.index:
            target_fva_best = fva_best.loc[target_reaction]
            print(f"琥珀酸生产范围: {target_fva_best['minimum']:.4f} 到 {target_fva_best['maximum']:.4f} mmol/gDW/h")
        
        # 与野生型比较
        print(f"\n=== 与野生型比较 ===")
        print(f"{'指标':<20} {'野生型':<15} {'最佳菌株':<15} {'改进':<15}")
        print(f"{'-'*60}")
        growth_change = best_strategy['growth_rate']/baseline_growth-1 if baseline_growth > 0 else 0
        production_change = best_strategy['production_rate']/max(baseline_succinate, 1e-6)-1
        
        print(f"{'生长速率':<20} {baseline_growth:<15.4f} {best_strategy['growth_rate']:<15.4f} {growth_change:<15.2%}")
        print(f"{'琥珀酸生产':<20} {baseline_succinate:<15.4f} {best_strategy['production_rate']:<15.4f} {production_change:<15.2%}")
    except Exception as e:
        print(f"FVA分析出错: {e}")
else:
    print("没有最佳策略可分析。")

## 8. 高级分析：双敲除

让我们探索一些有前景的双敲除策略。

In [None]:
# 测试一些有前景的双敲除
def test_double_knockouts(model, target_reaction, biomass_reaction, candidate_reactions, num_tests=15):
    """测试双敲除策略"""
    results = []
    
    # 从单敲除中选择顶级候选者
    if len(candidate_reactions) > 4:
        test_candidates = candidate_reactions[:4]
    else:
        test_candidates = candidate_reactions
    
    print(f"从 {len(test_candidates)} 个候选者中测试双敲除...")
    
    for i, rxn1 in enumerate(test_candidates):
        for j, rxn2 in enumerate(test_candidates[i+1:], i+1):
            if len(results) >= num_tests:
                break
                
            model_dko = model.copy()
            
            # 敲除两个反应
            for rxn_id in [rxn1, rxn2]:
                reaction = model_dko.reactions.get_by_id(rxn_id)
                reaction.bounds = (0, 0)
            
            # 测试生长是否可能
            model_dko.objective = biomass_reaction
            growth_solution = model_dko.optimize()
            
            if growth_solution.objective_value > 1e-6:
                # 测试生产
                model_dko.objective = target_reaction
                production_solution = model_dko.optimize()
                
                if production_solution.objective_value > 1e-6:
                    results.append({
                        'knockouts': [rxn1, rxn2],
                        'production_rate': production_solution.objective_value,
                        'growth_rate': growth_solution.objective_value
                    })
    
    return results

# 从单敲除中获得顶级候选者
if len(results_df) > 0:
    top_candidates = results_df.head(4)['knockouts'].tolist()
    top_candidates = [ko[0] for ko in top_candidates]
    
    print("=== 测试双敲除 ===")
    dko_results = test_double_knockouts(model, target_reaction, biomass_rxn, top_candidates)
    
    if dko_results:
        dko_df = pd.DataFrame(dko_results)
        dko_df = dko_df.sort_values('production_rate', ascending=False)
        
        print(f"\n前5个双敲除策略:")
        for i, (_, row) in enumerate(dko_df.head(5).iterrows()):
            print(f"{i+1}. 敲除: {row['knockouts']}")
            print(f"   生产: {row['production_rate']:.4f} mmol/gDW/h")
            print(f"   生长速率: {row['growth_rate']:.4f} h⁻¹")
            print()
    else:
        print("未找到可行的双敲除策略。")
else:
    print("无法测试双敲除 - 没有单敲除结果。")

## 9. 总结和建议

让我们总结我们的发现并提供代谢工程建议。

In [None]:
# 结果总结
print("=== OPTKNOCK分析总结 ===")
print()

print("野生型基线:")
print(f"  生长速率: {baseline_growth:.4f} h⁻¹")
print(f"  琥珀酸生产: {baseline_succinate:.4f} mmol/gDW/h")
print()

if len(results_df) > 0:
    print("最佳单敲除策略:")
    best = results_df.iloc[0]
    print(f"  敲除: {best['knockouts'][0]} ({best['reaction_name']})")
    print(f"  生长速率: {best['growth_rate']:.4f} h⁻¹ ({best['growth_rate']/max(baseline_growth, 1e-6)-1:.1%} 变化)")
    print(f"  生产速率: {best['production_rate']:.4f} mmol/gDW/h ({best['production_rate']/max(baseline_succinate, 1e-6)-1:.1%} 变化)")
    print(f"  生产效率: {best['production_rate']/max(best['growth_rate'], 1e-6):.4f}")
    print()
    
    # 工程建议
    print("=== 工程建议 ===")
    print()
    print("1. 基因敲除策略:")
    print(f"   - 目标: {best['knockouts'][0]}")
    print(f"   - 预期改进: 生产提高 {best['production_rate']/max(baseline_succinate, 1e-6)-1:.1%}")
    print(f"   - 生长代价: 生长速率降低 {max(baseline_growth, 1e-6)-best['growth_rate']:.4f} h⁻¹")
    print()
    
    print("2. 实施考虑:")
    print("   - 使用CRISPR-Cas9进行精确基因敲除")
    print("   - 通过PCR和测序验证敲除")
    print("   - 在控制的生物反应器条件下测试")
    print("   - 监控生长和生产动力学")
    print()
    
    print("3. 工艺优化:")
    print("   - 优化培养基组成以增强生产")
    print("   - 考虑分批补料或连续发酵")
    print("   - 实施pH和溶解氧的过程控制")
    print("   - 从烧瓶逐步扩大到生物反应器")
    print()
    
    print("4. 进一步优化:")
    print("   - 测试已识别的双敲除策略")
    print("   - 考虑适应性实验室进化")
    print("   - 探索通量控制的启动子工程")
    print("   - 研究辅因子平衡策略")
else:
    print("未识别出可行的敲除策略。")
    print("建议:")
    print("1. 考虑不同的目标代谢物")
    print("2. 探索更复杂的敲除策略")
    print("3. 使用更复杂的OptKnock算法")
    print("4. 考虑除敲除外的代谢工程（如过表达）")

## 10. 参考文献和进一步阅读

### 关键参考文献：
1. Burgard, A. P., Pharkya, P., & Maranas, C. D. (2003). OptKnock: a bi-level optimization framework for identifying gene knockout strategies for microbial strain optimization. *Biotechnology and Bioengineering*, 84(6), 647-657.

2. Orth, J. D., Conrad, T. M., Na, J., Lerman, J. A., Nam, H., Feist, A. M., & Palsson, B. Ø. (2011). A comprehensive genome-scale reconstruction of Escherichia coli metabolism—2011. *Molecular Systems Biology*, 7(1), 535.

3. Ebrahim, A., Lerman, J. A., Palsson, B. O., & Hyduke, D. R. (2013). COBRApy: Constraints-based reconstruction and analysis for python. *BMC Systems Biology*, 7(1), 74.

### 进一步阅读：
- COBRA工具箱文档: https://opencobra.github.io/cobratoolbox/
- COBRApy文档: https://cobrapy.readthedocs.io/
- 代谢工程教科书和综述文章
- 基因组尺度代谢建模资源

### 中文资源：
- 《代谢工程》教材 - 相关章节
- 《系统生物学》中的代谢网络分析
- 生物信息学数据库和工具