In [23]:
import os
import json
import argparse
import pandas as pd
import requests

from pygoslin.parser.Parser import LipidParser
from pygoslin.domain.LipidLevel import LipidLevel

In [24]:
lipid_parser = LipidParser()

In [41]:
def convert_lion_lipid_name(name: str) -> str | None:
    name = name.replace('FFA', 'FA')
    try:
        lipid = lipid_parser.parse(name)
        return lipid.get_lipid_string()
    except Exception as _:
        return None


def convert_lion_lipid_name_level(name: str) -> str | None:
    try:
        lipid = lipid_parser.parse(name)
        lipid_level = type(lipid.lipid).__name__
        if lipid_level == 'LipidCompleteStructure' or lipid_level == 'LipidFullStructure':
            return 'isomeric'
        elif lipid_level == 'LipidStructureDefined' or lipid_level == 'LipidSnPosition':
            return 'structural'
        elif lipid_level == 'LipidMolecularSpecies':
            return 'molecular'
        elif lipid_level == 'LipidSpecies':
            return 'sum'
        else:
            return None
    except Exception as _:
        return None


def parse_lion_database(file_path) -> dict:
    lion_association_db = pd.read_csv(file_path, delimiter='\t', names=['NAME', 'ID'])

    lion_association_db['GOSLIN_NAME'] = lion_association_db['NAME'].map(convert_lion_lipid_name)
    #lion_association_db['GOSLIN_LEVEL'] = lion_association_db['NAME'].map(convert_lion_lipid_name_level)

    return lion_association_db

In [42]:
# get association table from https://github.com/martijnmolenaar/LION-web/raw/refs/heads/master/OntologyApp/data/20190704%20LION_association.txt
lion_association_db = parse_lion_database('../data/lion/20190704 LION_association.txt')

In [43]:
lion_association_db

Unnamed: 0,NAME,ID,GOSLIN_NAME
0,6-keto-PGF1alpha,LION:0014873,"FA 20:1(13E);[8-12cy5:0;9OH,11OH];15OH;6oxo"
1,PGF2alpha,LION:0014874,"FA 20:2(5Z,13E);[8-12cy5:0;9OH,11OH];15OH"
2,PGE2,LION:0014875,"FA 20:2(5Z,13E);[8-12cy5:0;11OH;9oxo];15OH"
3,PGD2,LION:0014876,"FA 20:2(5Z,13E);[8-12cy5:0;9OH;11oxo];15OH"
4,PGA1,LION:0014877,PGA1
...,...,...,...
150159,SHexCer(d18:1/26:3),LION:0144511,SHexCer 18:1;O2/26:3
150160,SHexCer(d18:1/26:4),LION:0144512,SHexCer 18:1;O2/26:4
150161,SHexCer(d18:1/26:5),LION:0144513,SHexCer 18:1;O2/26:5
150162,SHexCer(d18:1/26:6),LION:0144514,SHexCer 18:1;O2/26:6


In [45]:
legacy_association_db = lion_association_db.copy()
legacy_association_db = legacy_association_db[['NAME', 'ID']]
legacy_association_db

Unnamed: 0,NAME,ID
0,6-keto-PGF1alpha,LION:0014873
1,PGF2alpha,LION:0014874
2,PGE2,LION:0014875
3,PGD2,LION:0014876
4,PGA1,LION:0014877
...,...,...
150159,SHexCer(d18:1/26:3),LION:0144511
150160,SHexCer(d18:1/26:4),LION:0144512
150161,SHexCer(d18:1/26:5),LION:0144513
150162,SHexCer(d18:1/26:6),LION:0144514


In [51]:
goslin_association_db = lion_association_db.copy()
goslin_association_db = goslin_association_db[['GOSLIN_NAME', 'ID']]
goslin_association_db = goslin_association_db[goslin_association_db['GOSLIN_NAME'].notna()]
goslin_association_db = goslin_association_db.rename(columns={'GOSLIN_NAME': 'NAME'})
goslin_association_db

Unnamed: 0,NAME,ID
0,"FA 20:1(13E);[8-12cy5:0;9OH,11OH];15OH;6oxo",LION:0014873
1,"FA 20:2(5Z,13E);[8-12cy5:0;9OH,11OH];15OH",LION:0014874
2,"FA 20:2(5Z,13E);[8-12cy5:0;11OH;9oxo];15OH",LION:0014875
3,"FA 20:2(5Z,13E);[8-12cy5:0;9OH;11oxo];15OH",LION:0014876
4,PGA1,LION:0014877
...,...,...
150159,SHexCer 18:1;O2/26:3,LION:0144511
150160,SHexCer 18:1;O2/26:4,LION:0144512
150161,SHexCer 18:1;O2/26:5,LION:0144513
150162,SHexCer 18:1;O2/26:6,LION:0144514


In [52]:
lion_association_db = pd.concat([legacy_association_db, goslin_association_db])
lion_association_db

Unnamed: 0,NAME,ID
0,6-keto-PGF1alpha,LION:0014873
1,PGF2alpha,LION:0014874
2,PGE2,LION:0014875
3,PGD2,LION:0014876
4,PGA1,LION:0014877
...,...,...
150159,SHexCer 18:1;O2/26:3,LION:0144511
150160,SHexCer 18:1;O2/26:4,LION:0144512
150161,SHexCer 18:1;O2/26:5,LION:0144513
150162,SHexCer 18:1;O2/26:6,LION:0144514


In [53]:
lion_association_db.to_csv('../data/lion/lion_association_table.tsv', index=False, sep='\t')