In [1]:
import openai
from pathlib import Path
import json
import llm2geneset
import time
import pandas as pd

In [2]:
import re

def clean_elements(array):
    cleaned_array = []
    for element in array:
        # Use regular expression to remove (GO:xxx) substring,  R-HSA-xxx substrings, and WPxxx substrings
        cleaned_element = re.sub(r'\s*\(GO:\d+\)\s*|\s*R-HSA-\d+\s*|\s*WP\d+\s*', '', element)
        cleaned_array.append(cleaned_element)
    return cleaned_array

In [3]:
lib_names = ["KEGG_2021_Human", 
             "Reactome_2022", 
             "WikiPathway_2023_Human", 
             "GO_Biological_Process_2023",
             "GO_Molecular_Function_2023",
             "GO_Cellular_Component_2023"]
#lib_names = ["KEGG_2021_Human"]
#lib_names = ["KEGG_2021_Human", 
#             "Reactome_2022", 
#             "WikiPathway_2023_Human"]

In [4]:
# Generate table for some basic stats on gene set libraries used.
geneset_sz = []
for lib_name in lib_names:
    (descr, genes) = llm2geneset.read_gmt("libs_human/gmt/" + lib_name + ".txt")
    x = [{"lib_name": lib_name, "descr": d, "num_genes": len(g)} for (d,g) in zip(descr,genes)]
    print(len(genes))
    geneset_sz.extend(x)
df = pd.DataFrame(geneset_sz)
df.to_csv("geneset_sz.tsv",sep="\t",index=None)

320
1818
801
5407
1147
474


In [5]:
aclient = openai.AsyncClient()
models = ["gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"]
# "gpt-4-turbo-2024-04-09" has garbage random text, doesn't generat correctly using
# the async API
# models = ["gpt-4-turbo-2024-04-09"]
for model in models:
    for lib_name in lib_names:
        (descr, genes) = llm2geneset.read_gmt("libs_human/gmt/" + lib_name + ".txt")
        # Generate cleaned version of gene set description w/o identifiers.
        descr_cleaned = clean_elements(descr)
        
        # Generate genes, and keep track of time it takes (seconds).
        start_time = time.time()
        llm_genes = await llm2geneset.get_genes(aclient, 
                                                descr_cleaned, 
                                                model=model, use_sysmsg=True)
        end_time = time.time()
        gen_time = end_time - start_time

        llm_genes_norole = await llm2geneset.get_genes(aclient, descr_cleaned, 
                                                       model=model, use_sysmsg=False)

        gen_res = {}

        # Assemble and save generation results.
        gen_res["lib_name"] = lib_name
        gen_res["model"] = model
        gen_res["gen_time"] = gen_time
        gen_res["descr"] = descr
        gen_res["descr_cleaned"] = descr_cleaned
        gen_res["curated_genesets"] = genes
        gen_res["llm_genesets"] = llm_genes
        gen_res["llm_genes_norole"] = llm_genes_norole
    
        with open('libs_human/' + model + '/' + lib_name + '.json', 'w') as json_file:
            json.dump(gen_res, json_file, indent=4)

 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 274/320 [00:06<00:01, 25.24it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Ubiquitin mediated proteolysis""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Ubiquitin mediated proteolysis""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "gene": "UBE2D1"
    },
    {
        "gene": "UBE2D2"
    },
    {
        "gene": "UBE2L3"
    },
    {
        "gene": "UBE2N"
    },
    {
        "gene": "UBE2V1"
    },
    {
        "gene": "UBE2W"
    },
    {
        "gene": "CDC34"
    },
    {
        "gene

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 320/320 [00:29<00:00, 10.77it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 320/320 [00:24<00:00, 13.17it/s]
 49%|████████████████████████████████████████████████████████████▊                                                               | 891/1818 [00:08<00:06, 140.69it/s]

retrying
list indices must be integers or slices, not str
List all the known genes directly and indirectly involved in the following biological process or cellular component """Platelet Homeostasis""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Platelet Homeostasis""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "ITGA2B"},
    {"gene": "ITGB3"},
    {"gene": "GP1BA"},
    {"gene": "GP1BB"},
    {"gene": "GP5"},
    {"gene": "GP6"},
    {"gene": "GP9"},
    {"gene": "GPX1"},
    {"gene": "GPX3"},
    {"gene": "VWF"},
    {"gene": "thromboxane A2 receptor (

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1818/1818 [00:37<00:00, 48.91it/s]
 61%|██████████████████████████████████████████████████████████████████████████▋                                                | 1104/1818 [00:09<00:04, 148.20it/s]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """Acyl Chain Remodeling Of CL""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Acyl Chain Remodeling Of CL""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "Taz"},
    {"gene": "Lpgat1"},
    {"gene": "Ms


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 1817/1818 [00:43<00:03,  3.45s/it]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """Fatty Acids""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Fatty Acids""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "FASN"},
    {"gene": "ACACA"},
    {"gene": "ACACB"},
    {"gene": "ACADL"},
    {"gene": "ACADM"},
    {"gene": "ACADVL"},
    {"gene": "ACADS"},
    {"gene": "ACADSB"},
    {"gene": "ACAD8"},
    {"gene": "ACAD9"},
    {"gene": "ACSF2"},
    {"gene": "ACSL1"},
    {"gene": "ACSL3"},
    {"

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1818/1818 [00:59<00:00, 30.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 801/801 [02:23<00:00,  5.59it/s]
 72%|██████████████████████████████████████████████████████████████████████████████████████████▌                                  | 580/801 [00:06<00:01, 169.35it/s]

retrying
string indices must be integers, not 'str'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Netrin UNC5B Signaling Pathway""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Netrin UNC5B Signaling Pathway""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
{
    "type": "array",
    "items": [
        {
            "gene": "NTN1"
        },
        {
            "gene": "UNC5B"
        }
    ]
}
```


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 801/801 [00:20<00:00, 39.16it/s]
 83%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                    | 4509/5407 [00:33<00:06, 140.54it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Negative Regulation Of Myeloid Cell Differentiation""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Negative Regulation Of Myeloid Cell Differentiation""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "CSF1"},
    {"gene": "CSF1R"},
    {"gene": "IL10"},
    {"gene": "IL10RA"},
    {"gene": "IL10RB"},
    {"gene": "IL4"},
    {"gene": "IL4R"},
    {"gene": "IL4R"},
    {"name": "STAT1"},
    {"gene": "SOCS1"},
    {"gene": "

 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 4947/5407 [00:36<00:02, 159.06it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Muscle Cell Development""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Muscle Cell Development""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "MYOD1"},
    {"gene": "MYOG"},
    {"gene": "MYF5"},
    {"gene": "MYF6"},
    {"gene": "MYH1"},
    {"gene": "MYH2"},
    {"gene": "MYH3"},
    {"gene": "MYH4"},
    {"gene": "MYH7"},
    {"gene": "MYH8"},
    {"gene": "MYL1"},
    {"gene": "MYL2"},
    {"gene": "MYL3"},
    {"gen

 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 5281/5407 [00:38<00:01, 89.32it/s]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """Regulation Of Peptidase Activity""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Regulation Of Peptidase Activity""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "SERPINA1"},
    {"gene": "TIMP1"},
    {"gene": "TIMP2"},
    {"gene": "TIMP3"},
    {"gene": "TIMP4"},
    {"gene": "CST3"},
    {"gene": "SERPINE1"},
    {"gene": "SERPINF2"},
    {"gene": "CSTB"},
    {"gene": "SPINT1"},
    {"gene": "TFPI"},
    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 5403/5407 [01:12<00:13,  3.40s/it]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """mRNA Metabolic Process""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """mRNA Metabolic Process""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "A1CF"},
    {"gene": "AATF"},
    {"gene": "ABCE1"},
    {"gene": "ADAR"},
    {"gene": "ADARB1"},
    {"gene": "ADARB2"},
    {"gene": "ADAT1"},
    {"gene": "ADAT2"},
    {"gene": "ADNP"},
    {"gene": "ADNP2"},
    {"gene": "ADRM1"},
    {"gene": "AEN"},
    {"gene":

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 5404/5407 [01:21<00:15,  5.00s/it]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """mRNA Modification""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """mRNA Modification""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "gene": "ADAR"
    },
    {
        "gene": "ADARB1"
    },
    {
        "gene": "ADARB2"
    },
    {
        "gene": "ADAT1"
    },
    {
        "gene": "ADAT2"
    },
    {
        "gene": "ADPRHL2"
    },
    {
        "gene": "ALKBH8"
    },
    {
        "gene": "ALKBH9"


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5407/5407 [02:01<00:00, 44.32it/s]
 11%|█████████████▊                                                                                                              | 603/5407 [00:06<00:39, 122.65it/s]

retrying
string indices must be integers, not 'str'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Positive Regulation Of Peptide Hormone Secretion""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Positive Regulation Of Peptide Hormone Secretion""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
{
    "type": "array",
    "items": [
        {"gene": "GH1"},
        {"gene": "GHRHR"},
        {"gene": "GNRH1"},
        {"gene": "IGF1"},
        {"gene": "INS"},
        {"gene": "LEP"},
        {"gene": "N

 12%|██████████████▍                                                                                                             | 630/5407 [00:06<00:38, 123.97it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Intrinsic Apoptotic Signaling Pathway By P53 Class Mediator""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Intrinsic Apoptotic Signaling Pathway By P53 Class Mediator""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "TP53"},
    {"gene": "BBC3"},
    {"gene": "BAX"},
    {"gene": "BID"},
    {"gene": "CDK5"},
    {"gene": "DR5"},
    {"gene": "FAS"},
    {"gene": "FBXO10"},
    {"gene": "HRK"},
    {"gene": "MDM2"},
    {"g

 29%|███████████████████████████████████▌                                                                                       | 1564/5407 [00:12<00:24, 155.25it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Regulation Of T-helper 1 Cell Differentiation""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Regulation Of T-helper 1 Cell Differentiation""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "TBX21"},
    {"gene": "STAT1"},
    {"gene": "IFNG"},
    {"gene": "IL12RB2"},
    {"gene": "IRF1"},
    {" gene": "IL12RB1"},
    {" gene": "IL12A"},
    {" gene": "IL18R1"},
    {" gene": "IL27RA"},
    {" gene": "IL23R"},
    {" gene":

 44%|██████████████████████████████████████████████████████▍                                                                    | 2392/5407 [00:18<00:19, 154.51it/s]

retrying
string indices must be integers, not 'str'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Regulation Of Microtubule Polymerization""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Regulation Of Microtubule Polymerization""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string"
            }
        },
        "required": ["gene"]
    },
    "genes": [
        {

 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████             | 4837/5407 [00:35<00:05, 105.06it/s]

retrying
string indices must be integers, not 'str'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Glycerol-3-Phosphate Metabolic Process""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Glycerol-3-Phosphate Metabolic Process""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
{
    "type": "array",
    "items": [
        {
            "gene": "GPD1"
        },
        {
            "gene": "GPD2"
        },
        {
            "gene": "GPDH"
        },
        {
            "gene": "ALDH4A1"
        },

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5407/5407 [01:10<00:00, 76.79it/s]
 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 1047/1147 [00:09<00:00, 123.77it/s]

retrying
list indices must be integers or slices, not str
List all the known genes directly and indirectly involved in the following biological process or cellular component """Mannosidase Activity""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Mannosidase Activity""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "MAN1A1"},
    {"gene": "MAN1A2"},
    {"gene": "MAN1B1"},
    {"gene": "MAN1C1"},
    {"gene": "MAN2A1"},
    {"gene": "MAN2A2"},
    {"gene": "MAN2A2-CPT1B"},
    {"gene": "MAN2B1"},
    {"gene": "MAN2B2"},
    {"gene": "MANEA"},
    {"gene": "M

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1147/1147 [00:30<00:00, 37.91it/s]
  6%|████████                                                                                                                      | 73/1147 [00:02<00:13, 81.23it/s]

retrying
string indices must be integers, not 'str'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Arachidonic Acid Monooxygenase Activity""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Arachidonic Acid Monooxygenase Activity""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string"
            }
        },
        "required": ["gene"]
    },
    "genes": [
        {"g

 38%|███████████████████████████████████████████████                                                                             | 435/1147 [00:05<00:04, 166.33it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """cAMP Response Element Binding""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """cAMP Response Element Binding""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "name": "CREBBP"
    },
    {
        "name": "CREB1"
    },
    {
        "name": "CREB3"
    },
    {
        "name": "CREB5"
    },
    {
        "name": "ATF1"
    },
    {
        "name": "ATF2"
    },
    {
        "name": "CREM"
    },
    {
        "name": "ATF4

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1147/1147 [00:30<00:00, 37.06it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 474/474 [01:28<00:00,  5.35it/s]
 32%|███████████████████████████████████████▌                                                                                     | 150/474 [00:03<00:03, 102.88it/s]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """Golgi Cisterna""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Golgi Cisterna""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "GOLGA1"},
    {"gene": "GOLGA2"},
    {"gene": "GOLGA3"},
    {"gene": "GOLGA4"},
    {"gene": "GOLGA5"},
    {"gene": "GOLGA6"},
    {"gene": "GOLGB1"},
    {"gene": "GOLGB1P"},
    {"gene": "GOLGB1P16"},
    {"gene": "GOLGB1P22"},
    {"gene": "GOLGB1P


 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍              | 419/474 [00:06<00:01, 43.11it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Condensed Nuclear Chromosome""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Condensed Nuclear Chromosome""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "HIST1H1A"},
    {"gene": "HIST1H1B"},
    {"gene": "HIST1H1C"},
    {"gene": "HIST1H1D"},
    {"gene": "HIST1H1E"},
    {"gene": "HIST1H1T"},
    {"gene": "HIST1H2AA"},
    {"gene": "HIST1H2AC"},
    {"gene": "HIST1H2AE"},
    {"gene": "HIST1H2AG"},
    {"gene": "HIST1H2A

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 474/474 [00:35<00:00, 13.34it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 320/320 [00:44<00:00,  7.26it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 320/320 [00:24<00:00, 12.94it/s]
  3%|███▉                                                                                                                          | 56/1818 [00:03<00:43, 40.61it/s]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """PINK1-PRKN Mediated Mitophagy""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """PINK1-PRKN Mediated Mitophagy""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "gene": "PINK1"
    },
    {
        "gene": "PRKN"
    },
    {
        "gene": "TOMM20"
    },
    {
        "gene": "TOMM22"
    },
    {
        "gene": "VDAC1"
    },
    {
        "gene": "MUL1"
    },
    {
        "gene": "M


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1818/1818 [00:36<00:00, 49.88it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1818/1818 [00:51<00:00, 35.58it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 801/801 [00:37<00:00, 21.14it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 801/801 [00:28<00:00, 28.07it/s]
 13%|███████████████▋                                                                                                            | 683/5407 [00:09<00:32, 145.44it/s]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """Innate Immune Response In Mucosa""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Innate Immune Response In Mucosa""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "gene": "MYD88"
    },
    {
        "gene": "TLR4"
    },
    {
        "gene": "NOD2"
    },
    {
        "gene": "IL1B"
    },
    {
        "gene": "IFNG"
    },
    {
        "gene": "TNF"
    },
    {
        "gene": "CXCL8"
    },
    {
     

 58%|███████████████████████████████████████████████████████████████████████▏                                                   | 3128/5407 [00:26<00:16, 139.27it/s]

retrying
No code blocks found
List all the known genes directly and indirectly involved in the following biological process or cellular component """Wound Healing""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Wound Healing""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {"gene": "TGFA"},
    {"gene": "TGFBR1"},
    {"gene": "TGFBR2"},
    {"gene": "EGF"},
    {"gene": "EGFR"},
    {"gene": "FGF1"},
    {"gene": "FGF2"},
    {"gene": "FGFR1"},
    {"gene": "FGFR2"},
    {"gene": "PDGFA"},
    {"gene": "PDGFB"},
    {"gene": "PDGFRA"},
    {"gene": "PDGFRB"},
    {

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5407/5407 [01:36<00:00, 56.19it/s]
  9%|██████████▉                                                                                                                  | 474/5407 [00:08<00:50, 96.90it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """Phosphatidylcholine Metabolic Process""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """Phosphatidylcholine Metabolic Process""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "gene": "CHPT1"
    },
    {
        "gene": "CHKA"
    },
    {
        "gene": "CHKB"
    },
    {
        "gene": "PCYT1A"
    },
    {
        "gene": "PCYT1B"
    },
    {
        "gene": "CEPT1"
    },
    {
        "gene": "LPCAT1"
    },
    {
  

 31%|█████████████████████████████████████▋                                                                                     | 1655/5407 [00:17<00:27, 134.01it/s]

retrying
'gene'
List all the known genes directly and indirectly involved in the following biological process or cellular component """protein-RNA Complex Disassembly""". Use the following JSON schema:
```json
{
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "gene": {
                "type": "string",
            }
        },
        "required": ["gene"]
    }
}
```
The field `gene` is a gene involved in the following biological process or cellular component: """protein-RNA Complex Disassembly""". Use the HUGO Gene Nomenclature Committee (HGNC) gene abbreviations. Place the output in a JSON code block. Do not add any comments in the JSON code block.

```json
[
    {
        "gene": "DDX5"
    },
    {
        "gene": "DDX6"
    },
    {
        "Gene": "DHX9"
    },
    {
        "gene": "EXOSC10"
    },
    {
        "gene": "MOV10"
    },
    {
        "gene": "UPF1"
    },
    {
        "gene": "XRN1"
    },
    {
        "gene": "H

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5407/5407 [01:34<00:00, 57.03it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1147/1147 [00:46<00:00, 24.89it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1147/1147 [03:10<00:00,  6.02it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 474/474 [00:29<00:00, 16.17it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 474/474 [00:22<00:00, 20.87it/s]
