In [10]:
import pandas as pd

def parse_mammalian_phenotype_data(file_path):
    with open(file_path, 'r') as file:
        data = file.read()

    terms = data.strip().split("[Term]\n")

    df = pd.DataFrame(columns=["id", "name", "definition", "synonyms", "alt_ids", "is_obsolete", "is_a", "subsets", "comments", "consider"])

    for term in terms:
        if term:
            lines = term.strip().split("\n")
            term_data = {
                "id": None,
                "name": None,
                "definition": None,
                "synonyms": [],
                "alt_ids": [],
                "is_obsolete": False,
                "is_a": [],
                "subsets": [],
                "comments": [],
                "consider": []
            }

            for line in lines:
                if line:
                    if ":" in line:
                        key, value = line.split(":", 1)
                        key = key.strip().lower()
                        value = value.strip()
                        if key == "id":
                            term_data["id"] = value
                        elif key == "name":
                            term_data["name"] = value
                        elif key == "def":
                            term_data["definition"] = value.strip('"')
                        elif key == "synonym":
                            term_data["synonyms"].append(value)
                        elif key == "alt_id":
                            term_data["alt_ids"].append(value)
                        elif key == "is_obsolete":
                            term_data["is_obsolete"] = value.lower() == "true"
                        elif key == "is_a":
                            term_data["is_a"].append(value)
                        elif key == "subset":
                            term_data["subsets"].append(value)
                        elif key == "comment":
                            term_data["comments"].append(value)
                        elif key == "consider":
                            term_data["consider"].append(value)

            df = pd.concat([df, pd.DataFrame([term_data])], ignore_index=True)

    return df

df = parse_mammalian_phenotype_data("./OGData/MPheno_OBO.ontology.txt")
df



Unnamed: 0,id,name,definition,synonyms,alt_ids,is_obsolete,is_a,subsets,comments,consider
0,MP:0000001,mammalian phenotype,"the observable morphological, physiological, b...",[],[],False,[],[],[],[]
1,MP:0000002,obsolete Morphology,"OBSOLETE."" [MGI:csmith]","[""Anatomy"" EXACT []]",[],True,[],[],[],[]
2,MP:0000003,abnormal adipose tissue morphology,any structural anomaly of the connective tissu...,"[""abnormality of adipose tissue"" BROAD [], ""ad...",[MP:0000011],False,[MP:0005375 ! adipose tissue phenotype],[],[],[]
3,MP:0000005,increased brown adipose tissue amount,increased amount of the thermogenic form of ad...,"[""increased brown fat"" EXACT [], ""increased br...",[MP:0001779],False,[MP:0001778 ! abnormal brown adipose tissue am...,[],[],[]
4,MP:0000008,increased white adipose tissue amount,increased quantity of fat-storing cells/tissue...,"[""increased white fat"" EXACT [], ""increased wh...",[MP:0001782],False,[MP:0001781 ! abnormal white adipose tissue am...,[],[],[]
...,...,...,...,...,...,...,...,...,...,...
14290,MP:3000001,abnormal gastrula morphology,any structural anomaly of the pear shaped tril...,"[""abnormal trilaminar blastocyst morphology"" E...",[],False,[MP:0002085 ! abnormal embryonic tissue morpho...,[],[],[]
14291,MP:3000002,obsolete abnormal skull morphology,"Any structural anomaly of a skull."" [GOC:TermG...",[],[],True,[],[],[],[]
14292,MP:3000003,abnormal Ebner's gland morphology,any structural anomaly of the serous salivary ...,"[""abnormal gustatory gland morphology"" EXACT [...",[],False,[MP:0003791 ! abnormal minor salivary gland mo...,[],[],[]
14293,MP:3000004,abnormal nictitating membrane morphology,any structural anomaly of the translucent fold...,"[""abnormal nictitans morphology"" EXACT [], ""ab...",[],False,[MP:0001340 ! abnormal eyelid morphology],[],[],[]
