In [2]:
#!/usr/bin/env python3
import pandas as pd
from pronto import Ontology, Definition

# --- File paths (update these as needed) ---
excel_file = "./mapping_cb.xlsx"   # Replace with your Excel file path
obo_output = "cb_trait.obo"                # Replace with your desired output file path
crop_name = "" #Replace with name of the crop

# --- Read the Excel file ---
df = pd.read_excel(excel_file)

# --- Create a new ontology ---
o = Ontology()

# --- Process each row in the Excel file ---
for index, row in df.iterrows():
    # Check if "TO ID" is empty or does not start with "TO:"
    raw_to_id = row["TO ID"]
    if pd.isna(raw_to_id):
        print(f"Skipping row {index}: TO ID is missing")
        continue
    to_id = str(raw_to_id).strip()
    if not to_id.startswith("TO:"):
        print(f"Skipping row {index}: TO ID '{to_id}' does not start with 'TO:'")
        continue

    # Extract and clean other values
    trait_id = str(row["Trait ID"]).strip()
    trait_name = crop_name+ " " + str(row["Trait name"]).strip().lower() + " trait"
    trait_desc = str(row["Trait description"]).strip() if row["Trait description"] else ""
    trait_synonyms = str(row["Trait synonyms"]).strip() if row["Trait synonyms"] else ""

    # --- Ensure the trait class (from TO ID) exists ---
    if to_id not in o:
        trait_class_term = o.create_term(to_id)
        trait_class_term.name = str(row["TO label"]).strip()
    else:
        trait_class_term = o[to_id]

    # --- Create the trait term ---
    if trait_id in o:
        trait_term = o[trait_id]
    else:
        trait_term = o.create_term(trait_id)
    trait_term.name = trait_name
    if trait_desc:
        trait_term.definition = Definition(trait_desc)
    if trait_synonyms:
        for syn in trait_synonyms.split(","):
            trait_term.add_synonym(syn.strip(), scope='EXACT')

    # --- Set the trait term as a subclass of the trait class term ---
    trait_term.superclasses().add(trait_class_term)

# --- Write the ontology to an OBO file ---
with open(obo_output, "wb") as f:
    o.dump(f, format="obo")

print("OBO file generated at:", obo_output)


Skipping row 69: TO ID is missing
Skipping row 75: TO ID is missing
Skipping row 120: TO ID is missing
OBO file generated at: cb_trait.obo
