In [None]:
import os
import json
import argparse
import pandas as pd
import requests
import multiprocessing as mp

from pygoslin.parser.Parser import LipidParser
from pygoslin.domain.LipidLevel import LipidLevel

In [None]:
def convert_swiss_lipids_lipid_name(name: tuple[str, str]) -> str | None:
    lipid_parser = LipidParser()
    try:
        lipid = lipid_parser.parse(name[0])
        return lipid.get_lipid_string()
    except Exception as _:
        try:
            lipid = lipid_parser.parse(name[1])
            return lipid.get_lipid_string()
        except Exception as _:
            return None


def parse_swiss_lipids_database(file_path) -> dict:
    swiss_lipids_db = pd.read_csv(file_path, delimiter='\t')
    swiss_lipids_db.rename(columns={
        'Lipid ID': 'ID',
        'Name': 'NAME',
        'Abbreviation*': 'ABBREVIATION',
        'HMDB': 'HMDB_ID',
        'CHEBI': 'CHEBI_ID'
    }, inplace=True)

    with mp.Pool(mp.cpu_count()) as pool:
        swiss_lipids_db['GOSLIN_NAME'] = pool.map(convert_swiss_lipids_lipid_name, swiss_lipids_db[['NAME', 'ABBREVIATION']].itertuples(index=False, name=None))

    return swiss_lipids_db

In [None]:
# download the lipids.tsv from swisslipids
swswisslipids_db = parse_swiss_lipids_database("../data/swisslipids/lipids.tsv")

In [None]:
swswisslipids_db.to_csv("../data/swisslipids/lipid_names_all.tsv", sep='\t', index=False)