# ðŸŒ¿ Trait Extraction Knowledge Base Builder

In [41]:
# ðŸ“¦ Load Data
import pandas as pd
descriptions_df = pd.read_csv('descriptions.csv')
traits_df = pd.read_csv('traits.csv')
merged_df = pd.merge(descriptions_df, traits_df, on='taxon')
merged_df.head()

Unnamed: 0,taxon,description,life form,habitat,habit,clonality,perennial organ,plant min. height [m],plant max. height [m],indumentum,...,seed max. width [mm],seed min. length [mm],seed max. length [mm],seed min. diameter [mm],seed max. diameter [mm],dispersal mode,ploidy level (2n),root min. depth [cm],root max. depth [cm],source
0,Achillea millefolium,A strongly scented perennial herb with far-cre...,perennial,,"scrambler, erect leafy, herb",stoloniferous,stem,,,"woolly, hairs absent",...,,,,,,,,,,ecoflora
1,Achillea ptarmica,"Infl a loose terminal corymb; Capitula few, 12...",,,,,,,,,...,,,,,,,,,,ecoflora
2,Agrostis canina,A tufted and shortly rhizomatous or stolonifer...,perennial,,"tussock, erect leafy/tussock, prostrate","rhizomatous, stoloniferous",,,,"scabrous, pubescent, glabrous",...,,,,,,ectzoochory,,,,ecoflora
3,Agrostis stolonifera,"A stoloniferous, not rhizomatous perennial 8-4...",perennial,,"prostrate, erect leafy/tussock","stoloniferous, rhizomatous",,,,"glabrous, scabrous",...,,,,,,ectzoochory,,,,ecoflora
4,Agrostis vinealis,A densely tufted rhizomatous perennial to 10-...,perennial,,"tussock, erect leafy/tussock",rhizomatous,rhizome,,,pubescent,...,,,,,,ectzoochory,,,,ecoflora


In [2]:
# ðŸ“„ Create Few-Shot JSONL Examples
import json
jsonl_data = []
trait_columns = [col for col in traits_df.columns if col != 'taxon']

for _, row in merged_df.iterrows():
    traits = {col: row[col] for col in trait_columns if pd.notna(row[col]) and row[col] != ''}
    jsonl_data.append({
        'taxon': row['taxon'],
        'description': row['description'],
        'traits': traits
    })

# Save JSONL
with open('few_shot_training_data.jsonl', 'w') as f:
    for item in jsonl_data:
        f.write(json.dumps(item) + '\n')

print(f"Saved {len(jsonl_data)} examples to few_shot_training_data.jsonl")

Saved 49 examples to few_shot_training_data.jsonl


In [3]:
# ðŸ“˜ Create Trait-Value Dictionary
from collections import defaultdict
trait_dict = defaultdict(set)

for col in trait_columns:
    for val in traits_df[col].dropna().unique():
        if isinstance(val, str) and val.strip():
            trait_dict[col].add(val.strip())

# Convert sets to lists and save
trait_dict = {k: sorted(list(v)) for k, v in trait_dict.items()}
import json
with open('trait_dictionary.json', 'w') as f:
    json.dump(trait_dict, f, indent=2)

print(f"Saved trait dictionary with {len(trait_dict)} traits to trait_dictionary.json")

Saved trait dictionary with 36 traits to trait_dictionary.json


### To check the traits.parquet details

In [12]:
import pandas as pd

df = pd.read_parquet("data/traits.parquet")

In [13]:
df

Unnamed: 0,term,part,character,category,group,trait,type
0,v-form,leaf,conduplicate,arrangement,angiosperm,leaf architecture,discrete
1,ciliate,leaf,ciliate,architecture,angiosperm,indumentum,discrete
2,ciliate,plant,ciliate,architecture,angiosperm,indumentum,discrete
3,crenate,leaf,crenate,shape,angiosperm,leaf margin,discrete
4,dentate,leaf,dentate,shape,angiosperm,leaf margin,discrete
...,...,...,...,...,...,...,...
8149,white-flecked,flower,white,coloration,gymnosperm,flower colour,colour
8150,white-barked,flower,white,architecture,gymnosperm,flower colour,colour
8151,substramineous,flower,yellow,coloration,gymnosperm,flower colour,colour
8152,spots,flower,spotted,coloration,gymnosperm,flower colour,colour


In [3]:
df['trait'].value_counts()

trait
indumentum                   1154
leaf shape                    793
inflorescence arrangement     630
pilosity surface              563
spinescence                   477
                             ... 
leaf orinetation                1
leaf margen                     1
pinnules base                   1
vegetative propagule type       1
reproductive structure          1
Name: count, Length: 136, dtype: int64

In [4]:
df['group'].value_counts()

group
angiosperm      2252
bryophyte       2150
gymnosperm      2089
pteridophyte    1653
Name: count, dtype: int64

In [5]:
df['type'].value_counts()

type
discrete    7836
colour       308
Name: count, dtype: int64

In [6]:
df['category'].value_counts()

category
shape                          2090
pubescence                     1495
structure                      1160
architecture                   1028
growth-form                     336
coloration                      308
arrangement                     289
reproduction                    101
orientation                      82
position                         76
habitat                          68
duration                         58
course                           35
texture                          30
fusion                           25
relief                           22
location                         18
structure-in-adjective-form      14
pilosity-surface                 10
dehiscence                       10
fixation                          8
reflectance                       8
length                            4
quantity                          4
nutrition                         4
indumentum                        4
density                           3
character          

In [7]:
df['character'].value_counts()

character
pubescent            455
spinescent           276
orbicular            143
climber/scrambler    137
tomentose            129
                    ... 
5-merous               1
tetramerous            1
tubulose               1
tubulariform           1
tubular                1
Name: count, Length: 656, dtype: int64

In [11]:
df['part'].value_counts()

part
leaf               2600
plant               736
flower              488
blade               462
inflorescence       331
spikelet            331
fruit               261
stem                206
thallus             120
lobule               83
underleaf            81
petal                80
spore                68
stamen               40
capsule              38
ovary                15
carpel               15
style                15
stigma               15
pinnae               15
gynoecium            15
lamina               14
staminodium          12
sporophyte           12
frond                10
indusium              8
seta                  7
sori                  7
mucilage cavity       6
perianth              5
rhizoid               5
scale                 5
labellum              4
stomata               4
epiphyte              4
underleaves           4
indusia               4
propagule             3
peristome             3
corolla               2
protonema             1
megaspore  

In [9]:
df['term'].value_counts()

term
spinulose                 22
semigynobasic             18
gynobasic                 18
subterminal               18
terminal                  18
                          ..
widely-spaced              1
widely spaced              1
transversely insterted     1
transverse                 1
spreading                  1
Name: count, Length: 1989, dtype: int64

### Extract the Full FLOPO ontology

In [70]:
from rdflib import Graph, Namespace, RDF, RDFS, OWL, URIRef

FLOPO = Namespace("http://purl.obolibrary.org/obo/FLOPO_")
OBO = Namespace("http://purl.obolibrary.org/obo/")
BFO = OBO["BFO_0000051"]
RO = OBO["RO_0000053"]

def extract_flopo_labels_and_logic(owl_path):
    g = Graph()
    g.parse(owl_path)

    results = []
    for s in g.subjects(RDF.type, OWL.Class):
        s_str = str(s)
        # Skip blank nodes and unnamed entries
        # if not isinstance(s, URIRef) or "obo/FLOPO_" not in str(s):
        if not isinstance(s, URIRef) or ("FLOPO_" not in s_str):
            continue
    
        label = g.value(s, RDFS.label)
        if label is None:
            continue  # skip classes with no label

        # flopo_id = str(s).split("/")[-1].replace("_", ":")
        flopo_id = s_str.split("/")[-1] if "#" not in s_str else s_str.split("#")[-1]
        flopo_id = flopo_id.replace("_", ":")

        po_part = None
        pato_part = None

        # Look for logical definitions
        for eq in g.objects(s, OWL.equivalentClass):
            for restriction in g.objects(eq, OWL.onProperty):
                if restriction == BFO:
                    for svf in g.objects(eq, OWL.someValuesFrom):
                        for inter in g.objects(svf, OWL.intersectionOf):
                            items = list(g.items(inter))
                            for item in items:
                                if (item, RDF.type, OWL.Class) in g or "PO_" in str(item):
                                    po_part = str(item).split("/")[-1].replace("_", ":")
                                for p in g.objects(item, OWL.onProperty):
                                    if p == RO:
                                        for v in g.objects(item, OWL.someValuesFrom):
                                            pato_part = str(v).split("/")[-1].replace("_", ":")

        results.append({
            "flopo_id": flopo_id,
            "label": str(label) if label else "",
            "po_id": po_part,
            "pato_id": pato_part
        })

    return results


In [71]:
# Provide the path to your local FLOPO OWL file
owl_path = "flopo.owl"

results = extract_flopo_labels_and_logic(owl_path)

df = pd.DataFrame(results)
print(df.head())

        flopo_id             label       po_id       pato_id
0  FLOPO:0000000   flora phenotype        None          None
1  FLOPO:0000001    stem phenotype        None  PATO:0000001
2  FLOPO:0000002     stem branched  PO:0009047  PATO:0000402
3  FLOPO:0000003  stem branchiness  PO:0009047  PATO:0002009
4  FLOPO:0000004    leaf phenotype        None  PATO:0000001


In [72]:
df

Unnamed: 0,flopo_id,label,po_id,pato_id
0,FLOPO:0000000,flora phenotype,,
1,FLOPO:0000001,stem phenotype,,PATO:0000001
2,FLOPO:0000002,stem branched,PO:0009047,PATO:0000402
3,FLOPO:0000003,stem branchiness,PO:0009047,PATO:0002009
4,FLOPO:0000004,leaf phenotype,,PATO:0000001
...,...,...,...,...
24199,FLOPO:0980083,shoot axis suffruticose,,
24200,FLOPO:0210200,seed weight,PO:0009010,PATO:0000128
24201,FLOPO:0210201,seed increased weight,PO:0009010,PATO:0000582
24202,FLOPO:0210202,seed decreased weight,PO:0009010,PATO:0000583


In [57]:
from rdflib.namespace import SKOS, DC

def map_ontology_terms(ontology_path, id_prefix="PO"):
    g = Graph()
    g.parse(ontology_path)

    results = []
    for s in g.subjects(RDF.type, OWL.Class):
        if not isinstance(s, URIRef) or id_prefix not in str(s):
            continue

        term_id = str(s).split("/")[-1].replace("_", ":")
        label = g.value(s, RDFS.label)
        definition = (
            g.value(s, URIRef("http://purl.obolibrary.org/obo/IAO_0000115")) or
            g.value(s, SKOS.definition) or
            g.value(s, DC.description)
        )

        results.append({
            "id": term_id,
            "label": str(label) if label else None,
            "definition": str(definition) if definition else None
        })

    return pd.DataFrame(results)


In [11]:
# Map PO terms
po_df = map_ontology_terms("po.owl", id_prefix="PO")
po_df.columns = ["po_id", "po_label", "po_definition"]

# Map PATO terms
pato_df = map_ontology_terms("pato.owl", id_prefix="PATO")
pato_df.columns = ["pato_id", "pato_label", "pato_definition"]

In [26]:
# Map FLOPO terms
flopo_df = map_ontology_terms("flopo.owl", id_prefix="FLOPO")
flopo_df.columns = ["flopo_id", "flopo_label", "flopo_definition"]

In [73]:
# Merge into your main FLOPO mapping
df = df.merge(po_df, on="po_id", how="left")
df = df.merge(pato_df, on="pato_id", how="left")
print(df.head())

        flopo_id             label       po_id       pato_id po_label  \
0  FLOPO:0000000   flora phenotype        None          None      NaN   
1  FLOPO:0000001    stem phenotype        None  PATO:0000001      NaN   
2  FLOPO:0000002     stem branched  PO:0009047  PATO:0000402     stem   
3  FLOPO:0000003  stem branchiness  PO:0009047  PATO:0002009     stem   
4  FLOPO:0000004    leaf phenotype        None  PATO:0000001      NaN   

                                       po_definition   pato_label  \
0                                                NaN          NaN   
1                                                NaN      quality   
2  A shoot axis (PO:0025029) that is the primary ...     branched   
3  A shoot axis (PO:0025029) that is the primary ...  branchiness   
4                                                NaN      quality   

                                     pato_definition  
0                                                NaN  
1  A dependent entity that inheres i

In [74]:
df.to_csv('graph/full_ontology.csv', index=False)  

In [15]:
import yaml

def yaml_to_part_synonym_table(yaml_path):
    """
    Converts a YAML file containing lists of synonyms into a flat DataFrame with part and synonym columns.    
    Args: yaml_path (str): Path to the YAML file.
    Returns: pd.DataFrame: DataFrame with 'part' and 'synonym' columns.
    """
    with open(yaml_path, 'r') as f:
        data = yaml.safe_load(f)

    rows = []
    for part, values in data.items():
        if values is None:
            # Just the part with no synonym
            rows.append({"part": part, "synonym": ""})
        elif isinstance(values, list):
            # Include the part line itself
            rows.append({"part": part, "synonym": ""})
            for synonym in values:
                rows.append({"part": part, "synonym": synonym})
        else:
            # Single value (just in case)
            rows.append({"part": part, "synonym": str(values)})

    return pd.DataFrame(rows)

In [16]:
part_map = yaml_to_part_synonym_table("data/anatomical-parts.yml")

In [18]:
part_map.head()

Unnamed: 0,part,synonym
0,androecium,
1,androecium,androecia
2,anther,
3,anthophore,
4,areoles,


In [19]:
part_map.to_csv("part_synonym_mapping.csv", index=False)

In [35]:
import re
from difflib import get_close_matches

def normalise_text(text):
    """Lowercase and remove punctuation for matching."""
    return re.sub(r'[^\w\s]', ' ', text.lower().strip()) if isinstance(text, str) else ""

def map_terms_to_ontologies(mapping_df, po_df, pato_df, flopo_df):
    """
    Map part and synonym columns to ontology labels from PO, PATO, and FLOPO.
    Returns a new DataFrame with additional columns for matched IDs and labels.
    """
    results = []

    for _, row in mapping_df.iterrows():
        term = row["synonym"] if row["synonym"] else row["part"]
        term_norm = normalise_text(term)

        def find_matches(df, label_col, id_col):
            return df[df[label_col].apply(lambda x: term_norm in normalise_text(x) if isinstance(x, str) else False)][[id_col, label_col]]

        po_matches = find_matches(po_df, "po_label", "po_id")
        pato_matches = find_matches(pato_df, "pato_label", "pato_id")
        flopo_matches = find_matches(flopo_df, "flopo_label", "flopo_id")

        results.append({
            "part": row["part"],
            "synonym": row["synonym"],
            "PO_id": "; ".join(po_matches["po_id"].dropna().astype(str)) if not po_matches.empty else "",
            "PO_label": "; ".join(po_matches["po_label"].dropna().astype(str)) if not po_matches.empty else "",
            "PATO_id": "; ".join(pato_matches["pato_id"].dropna().astype(str)) if not pato_matches.empty else "",
            "PATO_label": "; ".join(pato_matches["pato_label"].dropna().astype(str)) if not pato_matches.empty else "",
            "FLOPO_id": "; ".join(flopo_matches["flopo_id"].dropna().astype(str)) if not flopo_matches.empty else "",
            "FLOPO_label": "; ".join(flopo_matches["flopo_label"].dropna().astype(str)) if not flopo_matches.empty else ""
        })

    return pd.DataFrame(results)

In [36]:
mapped_df = map_terms_to_ontologies(part_map, po_df, pato_df, flopo_df)

In [47]:
mapped_df.to_csv("part_ontology_mapping.csv", index=False)

In [50]:
from difflib import get_close_matches

def map_fields_to_parts(trait_fields, part_synonym_df, cutoff=0.8):
    """
    Maps each trait field to possible plant parts using part/synonym list.

    Parameters:
    - trait_fields (list of str): The trait field names to map.
    - part_synonym_df (pd.DataFrame): A DataFrame with columns ['part', 'synonym'].
    - cutoff (float): Similarity threshold for fuzzy matching (0 to 1).

    Returns:
    - pd.DataFrame with columns ['field', 'matched_parts'].
    """
    # Combine all part and synonym terms
    all_parts = part_synonym_df['part'].dropna().unique().tolist()
    all_synonyms = part_synonym_df['synonym'].dropna().unique().tolist()
    all_terms = set(all_parts + all_synonyms)

    results = []
    for field in trait_fields:
        # Tokenize field into words (remove punctuation)
        tokens = field.lower().replace('[', '').replace(']', '').replace('(', '').replace(')', '').replace('.', '').split()
        matched = []
        for token in tokens:
            matches = get_close_matches(token, all_terms, cutoff=cutoff)
            matched.extend(matches)
        results.append({
            "field": field,
            "matched_parts": ", ".join(set(matched)) if matched else ""
        })

    return pd.DataFrame(results)

In [51]:
# Trait fields 
trait_fields = [
    "life form", "habitat", "habit", "clonality", "perennial organ", "plant min. height [m]",
    "plant max. height [m]", "indumentum", "spinescence", "succulence", "leaf arrangement",
    "leaf architecture", "leaf position", "leaf shape", "leaf apex", "leaf base", "leaf margin",
    "leaf min. width [cm]", "leaf max. width [cm]", "leaf min. length [cm]", "leaf max. length [cm]",
    "bract colour", "bracteole colour", "inflorescence arrangement", "inflorescence orientation",
    "flower sex", "flower architecture", "flower merosity", "flower symmetry", "flower shape",
    "flower colour", "flower orientation", "petal fusion", "petal colour", "petal min. width [cm]",
    "petal max. width [cm]", "calyx colour", "calyx min. length [cm]", "calyx max. length [cm]",
    "corolla colour", "corolla tube min. length [cm]", "corolla tube max. length [cm]",
    "corolla tube lipped", "labellum colour", "labellum min. width [cm]", "labellum max. width [cm]",
    "spur min. length [cm]", "spur max. length [cm]", "reproduction architecture", "reproduction system",
    "stamen number", "stamen arrangement", "stamenoid number", "stamen min. length [cm]",
    "stamen max. length [cm]", "staminodium", "pollen colour", "carpel/ovary number",
    "gynoecium arrangement", "heterostyly", "pollination", "reward", "fruit type", "fruit structure",
    "fruit dehiscence", "fruit shape", "fruit colour", "seed colour", "seeds per fruit min",
    "seeds per fruit max", "seed min. width [mm]", "seed max. width [mm]", "seed min. length [mm]",
    "seed max. length [mm]", "seed min. diameter [mm]", "seed max. diameter [mm]", "dispersal mode",
    "ploidy level (2n)", "root min. depth [cm]", "root max. depth [cm]"
]

In [52]:
# Load your CSV
# part_map = pd.read_csv("part_synonym_mapping.csv")

# Run the mapping
result_df = map_fields_to_parts(trait_fields, part_map)
print(result_df)

                      field matched_parts
0                 life form              
1                   habitat              
2                     habit              
3                 clonality              
4           perennial organ              
..                      ...           ...
75  seed max. diameter [mm]          seed
76           dispersal mode    dispersule
77        ploidy level (2n)         leave
78     root min. depth [cm]          root
79     root max. depth [cm]          root

[80 rows x 2 columns]


In [53]:
result_df.to_csv("field_mapping.csv", index=False)

### Trait Graph Builder

In [54]:
import pandas as pd
field_mapping_df = pd.read_csv('graph/field_mapping.csv')
part_synonym_mapping_df = pd.read_csv('graph/part_synonym_mapping.csv')
part_ontology_mapping_df = pd.read_csv('graph/part_ontology_mapping.csv')
ontology_df = pd.read_csv('graph/ontology.csv')

In [55]:
field_mapping_df

Unnamed: 0,field,matched_parts
0,life form,
1,habitat,
2,habit,
3,clonality,
4,perennial organ,
...,...,...
75,seed max. diameter [mm],seed
76,dispersal mode,dispersule
77,ploidy level (2n),leave
78,root min. depth [cm],root


### Check ontology linkage count

In [49]:
print(descriptions_df[descriptions_df['taxon'] == "Betula pubescens"]['description'].values)

['Tree to 20(-24) m with a single stem or shrub with several stems; Bark smooth, brown or grey, rarely white, not markedly different at the base of the trunk, sometimes with deep grooves but never broken up into rectangular bosses. Branches spreading or ascending (sometimes pendulous in mountain forms). Twigs Â±  pubsecent or glabrous, becoming dark brown or blackish, not or scarcely shining, with or without brown resinous warts (?sucker shoots always pubescent). Buds viscid or not,  Lvs:  blades 1.5-5.5 cm, very variable in shape, ovate, orbicular-ovate or rhombic-ovate, subacute or acute to slightly acuminate, rounded or cuneate at base, coarsely and sometimes irregularly serrate or serrate dentate, the teeth not curved towards the apex of the lf, usually pubescent at least on the veins beneath or in their axil, of a duller green than B. pendula. Petiole 7-15(-20) mm. Male infl 3-6 cm, drooping. Female infl in fr 1-4 x 0.5-1 cm, scales with short or long cuneate base, lateral lobes r

In [46]:
print(descriptions_df[descriptions_df['taxon'] == "Achillea millefolium"]['description'].values)

['A strongly scented perennial herb with far-creeping stolons and erect, furrowed, usually simple, ï¿½ woolly,  short, barren stems and taller flowering stems 8-45 (-60) cm. Lvs 5-15 cm, lanceolate, 2-3 times pinnate, the ultimate segments linear-subulate; basal lvs long stalked; upper lvs shorter, sessile, often with 2-3 small axillary lvs. Infl of Â±  dense terminal corymbs, 4-6 cm across. Capitula >25(-50) Involucre ovoid, bracts rigid, oblong, blunt, keeled, Â±  glabrous, with a broad brown or blackish scarious margin. Ray-florets usually 5, about half as long as the involucre and as broad as long, 3-toothed at the apex, white, rarely pink or reddish; disk florets white or cream-coloured.  Achenes 1.8-2 x 0.6-0.7 mm, cuneiform, compressed,  glabrous, smooth, shining greyish, somewhat winged. Very variable in hairyness and colour of the bracts.']


In [48]:
print(traits_df[traits_df['taxon'] == "Achillea millefolium"])

                  taxon  life form habitat                         habit  \
0  Achillea millefolium  perennial     NaN  scrambler, erect leafy, herb   

       clonality perennial organ  plant min. height [m]  \
0  stoloniferous            stem                    NaN   

   plant max. height [m]            indumentum  spinescence  ...  \
0                    NaN  woolly, hairs absent          NaN  ...   

   seed max. width [mm] seed min. length [mm] seed max. length [mm]  \
0                   NaN                   NaN                   NaN   

  seed min. diameter [mm] seed max. diameter [mm] dispersal mode  \
0                     NaN                     NaN            NaN   

  ploidy level (2n) root min. depth [cm]  root max. depth [cm]    source  
0               NaN                  NaN                   NaN  ecoflora  

[1 rows x 83 columns]


In [None]:
from rdflib import Graph, Namespace, RDF, RDFS, OWL, URIRef
from rdflib.collection import Collection

# Reload the uploaded FLOPO OWL file after code reset
flopo_graph = Graph()
flopo_graph.parse("/mnt/data/flopo.owl")

# Define namespaces
FLOPO = Namespace("http://purl.obolibrary.org/obo/FLOPO_")
OBO = Namespace("http://purl.obolibrary.org/obo/")
BFO = OBO["BFO_0000051"]  # has part
RO = OBO["RO_0000053"]    # bearer of

# Initialize counters
multi_po_counts = 0
multi_pato_counts = 0
both_multiple = 0
class_count = 0

# Analyze FLOPO OWL
for s in flopo_graph.subjects(RDF.type, OWL.Class):
    if not isinstance(s, URIRef) or "obo/FLOPO_" not in str(s):
        continue
    class_count += 1
    po_ids = set()
    pato_ids = set()

    for eq in flopo_graph.objects(s, OWL.equivalentClass):
        for restriction in flopo_graph.objects(eq, OWL.onProperty):
            if restriction == BFO:
                for svf in flopo_graph.objects(eq, OWL.someValuesFrom):
                    for inter in flopo_graph.objects(svf, OWL.intersectionOf):
                        try:
                            items = list(Collection(flopo_graph, inter))
                            for item in items:
                                if "PO_" in str(item):
                                    po_ids.add(str(item).split("/")[-1].replace("_", ":"))
                                for p in flopo_graph.objects(item, OWL.onProperty):
                                    if p == RO:
                                        for v in flopo_graph.objects(item, OWL.someValuesFrom):
                                            if "PATO_" in str(v):
                                                pato_ids.add(str(v).split("/")[-1].replace("_", ":"))
                        except Exception:
                            continue

    if len(po_ids) > 1:
        multi_po_counts += 1
    if len(pato_ids) > 1:
        multi_pato_counts += 1
    if len(po_ids) > 1 and len(pato_ids) > 1:
        both_multiple += 1

(class_count, multi_po_counts, multi_pato_counts, both_multiple)
