In [1]:
import pandas as pd

def parse_mammalian_phenotype_data(file_path):
    with open(file_path, 'r') as file:
        data = file.read()

    terms = data.strip().split("[Term]\n")

    df = pd.DataFrame(columns=["id", "name", "definition", "synonyms", "alt_ids", "is_obsolete", "is_a", "subsets", "comments", "consider"])

    for term in terms:
        if term:
            lines = term.strip().split("\n")
            term_data = {
                "id": "",
                "name": "",
                "definition": "",
                "synonyms": "",
                "alt_ids": "",
                "is_obsolete": False,
                "is_a": "",
                "subsets": "",
                "comments": "",
                "consider": ""
            }

            for line in lines:
                if line:
                    if ":" in line:
                        key, value = line.split(":", 1)
                        key = key.strip().lower()
                        value = value.strip()
                        if key == "id":
                            term_data["id"] = value if len(value) > 0 else " "
                        elif key == "name":
                            term_data["name"] = value if len(value) > 0 else " "
                        elif key == "def":
                            term_data["definition"] = value.strip('"') if len(value) > 0 else " "
                        elif key == "synonym":
                            term_data["synonyms"] = value if len(value) > 0 else " "
                        elif key == "alt_id":
                            term_data["alt_ids"] = value if len(value) > 0 else " "
                        elif key == "is_obsolete":
                            term_data["is_obsolete"] = value.lower() == "true"
                        elif key == "is_a":
                            term_data["is_a"] = value if len(value) > 0 else " "
                        elif key == "subset":
                            term_data["subsets"] = value if len(value) > 0 else " "
                        elif key == "comment":
                            term_data["comments"] = value if len(value) > 0 else " "
                        elif key == "consider":
                            term_data["consider"] = value if len(value) > 0 else " "

            df = pd.concat([df, pd.DataFrame([term_data])], ignore_index=True)
            
    return df

df = parse_mammalian_phenotype_data("./OGData/MPheno_OBO.ontology.txt")

df

Unnamed: 0,id,name,definition,synonyms,alt_ids,is_obsolete,is_a,subsets,comments,consider
0,MP:0000001,mammalian phenotype,"the observable morphological, physiological, b...",,,False,,,,
1,MP:0000002,obsolete Morphology,"OBSOLETE."" [MGI:csmith]","""Anatomy"" EXACT []",,True,,,,
2,MP:0000003,abnormal adipose tissue morphology,any structural anomaly of the connective tissu...,"""adipose tissue dysplasia"" EXACT []",MP:0000011,False,MP:0005375 ! adipose tissue phenotype,,,
3,MP:0000005,increased brown adipose tissue amount,increased amount of the thermogenic form of ad...,"""increased brown fat amount"" RELATED []",MP:0001779,False,MP:0001778 ! abnormal brown adipose tissue amount,,,
4,MP:0000008,increased white adipose tissue amount,increased quantity of fat-storing cells/tissue...,"""increased white fat amount"" RELATED []",MP:0001782,False,MP:0001781 ! abnormal white adipose tissue amount,,,
...,...,...,...,...,...,...,...,...,...,...
14290,MP:3000001,abnormal gastrula morphology,any structural anomaly of the pear shaped tril...,"""abnormal trilaminar germ disk morphology"" EXA...",,False,MP:0002085 ! abnormal embryonic tissue morphology,,,
14291,MP:3000002,obsolete abnormal skull morphology,"Any structural anomaly of a skull."" [GOC:TermG...",,,True,,,,
14292,MP:3000003,abnormal Ebner's gland morphology,any structural anomaly of the serous salivary ...,"""posterior lingual serous gland"" EXACT []",,False,MP:0008052 ! abnormal serous gland morphology,,,
14293,MP:3000004,abnormal nictitating membrane morphology,any structural anomaly of the translucent fold...,"""abnormal third eyelid morphology"" EXACT []",,False,MP:0001340 ! abnormal eyelid morphology,,,


In [9]:
bad_word = ["abnormal", "anomaly"]
neutral_word = ["obsolete"]
good_word = []
fitness_scores = []
for i in range(len(df)):
    score = 0
    for g, n, b in zip(good_word, neutral_word, bad_word):
        if n in df.iloc[i, 1].lower() or df.iloc[i, 2].lower() or df.iloc[i, 3].lower() or df.iloc[i, 4].lower():
            score += 0
        if b in df.iloc[i, 1].lower() or df.iloc[i, 2].lower() or df.iloc[i, 3].lower() or df.iloc[i, 4].lower():
            score -= 5
    
    fitness_scores.append(score)

In [10]:
min(fitness_scores), max(fitness_scores)

(-5, 0)

In [11]:
fitness_scores

[-5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
 -5,
