In [None]:
!pip install biopython

#For scientific papers, ensuring your code is reproducible is paramount. Start by clearly defining your environment dependencies using requirements.txt or environment.yml and explicitly stating the Python version. Structure your code logically with modular functions and add detailed comments explaining non-obvious choices, algorithm steps, and the rationale behind hyperparameters. Crucially, set random seeds at all points of randomness to ensure consistent results across runs. Finally, provide a main script that orchestrates the entire process from data loading to result saving, allowing others to easily replicate your findings.

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [None]:
!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()
import asyncio
import aiohttp
from tqdm.asyncio import tqdm_asyncio
import json
import time
!pip install pandas
import pandas as pd



In [None]:
#upload the file manually
file_path = "/content/data kienese.xlsx"
df = pd.read_excel(file_path)


# Exract the uniprotID column and remove duplicates
uniprot_ids_all = df['Uniprotid'].astype(str)
uniprot_ids_unique = uniprot_ids_all.drop_duplicates()

# Statistical quantity
total_count = len(uniprot_ids_all)
unique_count = len(uniprot_ids_unique)

total_count, unique_count

(2219, 914)

In [None]:
# The real names of genes are extracted by ID. Here, API is used for access, and then the real names of these genes are saved in json.
async def fetch_gene_name_with_retry(session, uniprot_id, max_retries=5):
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    for attempt in range(max_retries):
        try:
            async with session.get(url, timeout=10) as response:
                if response.status == 200:
                    data = await response.json()
                    genes = data.get("genes", [])
                    if genes:
                        gene_name = genes[0].get("geneName", {}).get("value", "No gene name found")
                        return uniprot_id, gene_name
                    return uniprot_id, "No gene information"
                else:
                    err_msg = f"HTTP {response.status}"
        except Exception as e:
            err_msg = f"Exception: {e}"

        print(f"Warning: Failed to fetch {uniprot_id} (attempt {attempt+1}/{max_retries}): {err_msg}")
        await asyncio.sleep(2 ** attempt)  # Exponential backoff wait

    return uniprot_id, f"Failed after {max_retries} attempts"

async def fetch_all_with_retry(uniprot_ids, concurrency=20):
    connector = aiohttp.TCPConnector(limit_per_host=concurrency)
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = [fetch_gene_name_with_retry(session, uid) for uid in uniprot_ids]
        results = await tqdm_asyncio.gather(*tasks)
    return dict(results)

# running
unique_ids = uniprot_ids_unique.tolist()
gene_name_map = await fetch_all_with_retry(unique_ids)

# Filter out invalid gene names
valid_gene_names = [name for name in gene_name_map.values()
                    if name not in ["No gene name found", "No gene information", "Failed after 5 attempts", None]]

# Deduplication
unique_valid_gene_names = list(set(valid_gene_names))

print(unique_valid_gene_names)

print(f"Number of valid gene names:{len(unique_valid_gene_names)}")
# save as json

with open("gene_name_map_retry.json", "w", encoding="utf-8") as f:
    json.dump(unique_valid_gene_names, f, ensure_ascii=False, indent=2)

100%|██████████| 914/914 [00:08<00:00, 111.87it/s]


['Grik5', 'RAD50', 'Map7d1', 'Zfp36', 'SNX8', 'ERCC6L', 'Mst1r', 'ARHGAP24', 'ZWINT', 'ZNHIT1', 'FHL2', 'Afap1l1', 'BLM', 'Disc1', 'Tp53bp1', 'DLG1', 'WAS', 'SLC4A1', 'ESPL1', 'PPP1R12A', 'CNN3', 'Dennd1a', 'WIZ', 'Map2', 'Lipe', 'BRAF', 'Prm2', 'CDC42BPA', 'SLC3A2', 'ATAT1', 'Wasl', 'CHRM2', 'DGKD', 'HBP1', 'PLSCR4', 'MKI67', 'Caskin1', 'SNRK', 'SRPK1', 'FCGR2B', 'Sugt1', 'Tnp2', 'Itpr1', 'DLGAP5', 'Lcp1', 'GABRR1', 'SF1', 'NUP62', 'NINL', 'ACAP1', 'Lrrk2', 'MYH10', 'ANAPC4', 'PEBP1', 'MAPK3', 'MXD1', 'CTDP1', 'SYN', 'PRKDC', 'PRC1', 'BCL10', 'PHB2', 'H3C1', 'NKX3-1', 'PIKFYVE', 'DIABLO', 'MTHFR', 'MTSS1', 'KLF3', 'CDC25C', 'Cdv3', 'MAP3K5', 'NUMB', 'TRPM8', 'CERT1', 'TXNIP', 'BRSK2', 'FDX1', 'SIK3', 'ELAVL1', 'KIF11', 'Etv5', 'CHUK', 'ALYREF', 'FPR1', 'HSP90B1', 'Ugcg', 'Wnk1', 'GAK', 'Rasgrf1', 'Mip', 'MDV005', 'PHF8', 'DAPK1', 'MAPK12', 'sts5', 'MAPRE1', 'TRPC5', 'NR5A1', 'Sik1', 'Hoxa9', 'EWSR1', 'CDK5R2', 'YLPM1', 'Cd79b', 'AKAP8', 'DEK', 'H1-1', 'FZD6', 'LIG3', 'IL16', 'para', '

In [None]:
# Find IDs where the value is "No gene information"
ids_with_no_gene_info = [
    uniprot_id for uniprot_id, gene_info in gene_name_map.items()
    if gene_info == "No gene information"
]

# Print the found IDs
print("IDs with 'No gene information':")
for uniprot_id in ids_with_no_gene_info:
    print(uniprot_id)

print(f"\nFound {len(ids_with_no_gene_info)} IDs with 'No gene information'.")

IDs with 'No gene information':
P84227
Q6LED0
P12906

Found 3 IDs with 'No gene information'.


This part of the genes needs to be manually searched to obtain their real names.

P84227 Histone H3.2

Q6LED0 Histone H3.1

P12906 Middle T antigen

In [None]:

# Manually identified gene names to add
manual_gene_names = ["Middle T antigen", "Histone H3.1", "Histone H3.2"]

# Add manual gene names to the unique valid gene names list
unique_valid_gene_names.extend(manual_gene_names)

# Ensure the list remains unique after adding manual names
unique_valid_gene_names = list(set(unique_valid_gene_names))

# Ensure gene_name_map exists and is the dictionary returned from fetch_all_with_retry
# (Assuming the code execution reaches this point and gene_name_map has been populated)

if 'gene_name_map' in locals():
    # Update the gene_name_map dictionary
    if 'P84227' in gene_name_map:
        gene_name_map['P84227'] = 'Middle T antigen'
    if 'Q6LED0' in gene_name_map:
        gene_name_map['Q6LED0'] = 'Histone H3.1'
    if 'P12906' in gene_name_map:
        gene_name_map['P12906'] = 'Histone H3.2'

    # Re-filter and deduplicate to get the updated list of unique valid gene names
    valid_gene_names_updated = [name for name in gene_name_map.values()
                                if name not in ["No gene name found", "No gene information", "Failed after 5 attempts", None]]

    unique_valid_gene_names_updated = list(set(valid_gene_names_updated))

    # Save the updated list to the JSON file
    with open("gene_name_map_retry.json", "w", encoding="utf-8") as f:
        json.dump(unique_valid_gene_names_updated, f, ensure_ascii=False, indent=2)

    print("Updated 'gene_name_map_retry.json' with manual gene name corrections.")
    print(f"Number of unique valid gene names after update: {len(unique_valid_gene_names_updated)}")
else:
    print("Error: gene_name_map dictionary not found. Ensure previous steps were executed.")


Updated 'gene_name_map_retry.json' with manual gene name corrections.
Number of unique valid gene names after update: 905


In [None]:
# Extract kinase from the second column and remove duplicates
kinase_names = set(df['kinase'].dropna().unique())

kinase_names=list(kinase_names)

print(f"The second column of the table is the number of kinases: {len(kinase_names)}")

typeof = type(kinase_names)
print(typeof)

print("Kinases:", ", ".join(kinase_names))

The second column of the table is the number of kinases: 193
<class 'list'>
Kinases: PKN2, PKCt, PKCg, CDK4, FER, EGFR, CDK5, MST2, ErbB4, GCN2, RSK1, HCK, RIPK3, PFTAIRE1, Erk1, CK1e, CK1a, GPRK5, TAO2, p38g, PYK2, TSSK4, CDKL5, MRCKa, MELK, PAK5, TTBK2, ULK1, BUB1, LATS1, IKKb, DYRK1A, MYO3B, SRPK1, RSK2, smMLCK, skMLCK, JAK3, MARK3, NDR2, PHKg1, CK2a1, COT, PLK3, PKCb, PLK1, NDR1, NEK7, PKCz, AMPKa1, PAK1, EphB6, MARK1, CDK7, PAK6, p38a, AurA, PKD1, CK2a2, DYRK2, ABL2, MAP3K7, IKKe, MSK2, LATS2, MAP2K6, EphB1, SYK, Wnk1, p38b, NEK6, TYK2, GPRK4, BARK2, MAP2K3, ROCK1, Erk2, HIPK2, JNK1, DAPK1, LRRK2, PKACb, ZAP70, NEK2, PDGFRb, YES, DAPK3, AKT3, PINK1, SLK, LCK, JNK2, PKACa, SGK1, PIM3, PKCe, CDC7, RSK3, CaMK2d, GCK, CLK1, DNAPK, KIT, ATR, TBK1, AKT1, CaMK4, CDK9, MAPKAPK5, RIPK1, CaMK2b, PKCd, AurB, DYRK3, PKN1, VRK1, Erk3, CHK1, CK1d, TTK, TTBK1, SGK3, KIS, KDR, MASTL, JAK2, KHS1, GSK3A, PAK2, DCLK1, DYRK1B, CK1g2, PHKg2, RAF1, MAPKAPK2, IRAK4, FGFR1, ChaK2, MYT1, PIM1, DMPK1, SRC,

In [None]:

# Check the kinase is the offical name
def check_gene_name(symbol):
    url = f"https://rest.uniprot.org/uniprotkb/search?query=gene:{symbol}&fields=gene_names,accession&format=json"
    try:
        resp = requests.get(url, timeout=10)
        if resp.status_code != 200:
            return symbol, "Query Failed"

        data = resp.json()
        results = data.get("results", [])
        if not results:
            return symbol, "Not Found"

        # By default, the main name of the first entry (the top item) is taken.
        gene_names = results[0].get("genes", [])
        if gene_names:
            main_name = gene_names[0].get("geneName", {}).get("value", "")
            if main_name:
                if symbol == main_name:
                    return symbol, "Official"
                else:
                    return symbol, f"Alias → {main_name}"

        return symbol, "No geneName in top result"

    except Exception as e:
        return symbol, f"Error: {e}"

results = []
for name in tqdm(kinase_names, desc="Progress", ncols=100, leave=False):
    result = check_gene_name(name)
    results.append(result)
    time.sleep(0.1)  # Respect API rate limit

# print
for name, status in results:
    print(f"{name}: {status}")

Progress:   0%|                                                             | 0/193 [00:00<?, ?it/s]

PKN2: Official
PKCt: Not Found
PKCg: Alias → PRKCG
CDK4: Official
FER: Official
EGFR: Official
CDK5: Official
MST2: Alias → STK3
ErbB4: Alias → ERBB4
GCN2: Alias → EIF2AK4
RSK1: Alias → RPS6KA1
HCK: Official
RIPK3: Official
PFTAIRE1: Alias → cdk14
Erk1: Alias → MAPK3
CK1e: Alias → Csnk1e
CK1a: Official
GPRK5: Alias → GRK5
TAO2: Alias → Taok2
p38g: Not Found
PYK2: Alias → PTK2B
TSSK4: Official
CDKL5: Official
MRCKa: Alias → MRCKA
MELK: Official
PAK5: Alias → PAK6
TTBK2: Official
ULK1: Official
BUB1: Official
LATS1: Official
IKKb: Alias → IKBKB
DYRK1A: Official
MYO3B: Official
SRPK1: Official
RSK2: Alias → RPS6KA3
smMLCK: Not Found
skMLCK: Not Found
JAK3: Official
MARK3: Official
NDR2: Alias → STK38L
PHKg1: Alias → PHKG1
CK2a1: Alias → CSNK2A1
COT: Alias → CROT
PLK3: Official
PKCb: Alias → PRKCB
PLK1: Official
NDR1: Alias → STK38
NEK7: Official
PKCz: Alias → Prkcz
AMPKa1: Alias → prkaa1
PAK1: Official
EphB6: Alias → EPHB6
MARK1: Official
CDK7: Official
PAK6: Official
p38a: Official
AurA:

Not Found:
PKCt, p38g, smMLCK, skMLCK, DMPK1, AMPKa2, p70S6Kb

Alias (needs replacement):
PKCg → PRKCG, MST2 → STK3, ErbB4 → ERBB4, GCN2 → EIF2AK4, RSK1 → RPS6KA1, PFTAIRE1 → cdk14, Erk1 → MAPK3, CK1e → Csnk1e, GPRK5 → GRK5, TAO2 → Taok2, PYK2 → PTK2B, PAK5 → PAK6, NDR2 → STK38L, PHKg1 → PHKG1, CK2a1 → CSNK2A1, COT → CROT, PKCb → PRKCB, NDR1 → STK38, PKCz → Prkcz, AMPKa1 → prkaa1, EphB6 → EPHB6, AurA → AURKA, CK2a2 → CSNK2A2, IKKe → IKBKE, MSK2 → RPS6KA4, EphB1 → Ephb1, Wnk1 → WNK1, GPRK4 → GRK4, BARK2 → GRK3, Erk2 → MAPK1, JNK1 → mapk8, PKACb → Prkacb, PDGFRb → PDGFRB, YES → YES1, JNK2 → MAPK9, PKACa → PRKACA, PKCe → PRKCE, RSK3 → RPS6KA2, CaMK2d → CAMK2D, DNAPK → DNApk, CaMK4 → Camk4, CaMK2b → CAMK2B, PKCd → PRKCD, AurB → aurB, Erk3 → MAPK6, CHK1 → CHEK1, CK1d → CK1D, KIS → UHMK1, FGFR1 → Fgfr1, ChaK2 → TRPM6, MYT1 → PKMYT1, CHK2 → CHEK2, IKKa → CHUK, PKG2 → pkg2, BARK1 → GRK2, PKCa → PRKCA, PKCi → Hint1, RON → MST1R, JNK3 → MAPK10, MSK1 → RPS6KA5, EphB3 → EPHB3, NIK → MAP3K14, CaMK2a → CAMK2A, CDC2 → CDK1, FAK → PTK2, CaMK1a → camk1a, p70S6K → S6k, PKG1 → pkg1-4, LKB1 → STK11, MLK2 → MAP3K10, CaMKK2 → CAMKK2, KHS2 → khs_2, MST1 → STK4, PDHK1 → PDK1



However, most of the ones that cannot be found can still be discovered by manually searching in the literature database, but only one or two need to be replaced

PKCt→PRKCQ,
p70S6Kb→S6k2c

In [None]:
# I make this dictionary for mapping

alias_mapping = {
    "PKCt": "PRKCQ",
    "p70S6Kb": "S6k2c",
    "PKCg": "PRKCG",
    "MST2": "STK3",
    "ErbB4": "ERBB4",
    "GCN2": "EIF2AK4",
    "RSK1": "RPS6KA1",
    "PFTAIRE1": "cdk14",
    "Erk1": "MAPK3",
    "CK1e": "Csnk1e",
    "GPRK5": "GRK5",
    "TAO2": "Taok2",
    "PYK2": "PTK2B",
    "PAK5": "PAK6",
    "NDR2": "STK38L",
    "PHKg1": "PHKG1",
    "CK2a1": "CSNK2A1",
    "COT": "CROT",
    "PKCb": "PRKCB",
    "NDR1": "STK38",
    "PKCz": "Prkcz",
    "AMPKa1": "prkaa1",
    "EphB6": "EPHB6",
    "AurA": "AURKA",
    "CK2a2": "CSNK2A2",
    "IKKe": "IKBKE",
    "MSK2": "RPS6KA4",
    "EphB1": "Ephb1",
    "Wnk1": "WNK1",
    "GPRK4": "GRK4",
    "BARK2": "GRK3",
    "Erk2": "MAPK1",
    "JNK1": "mapk8",
    "PKACb": "Prkacb",
    "PDGFRb": "PDGFRB",
    "YES": "YES1",
    "JNK2": "MAPK9",
    "PKACa": "PRKACA",
    "PKCe": "PRKCE",
    "RSK3": "RPS6KA2",
    "CaMK2d": "CAMK2D",
    "DNAPK": "DNApk",
    "CaMK4": "Camk4",
    "CaMK2b": "CAMK2B",
    "PKCd": "PRKCD",
    "AurB": "aurB",
    "Erk3": "MAPK6",
    "CHK1": "CHEK1",
    "CK1d": "CK1D",
    "KIS": "UHMK1",
    "FGFR1": "Fgfr1",
    "ChaK2": "TRPM6",
    "MYT1": "PKMYT1",
    "CHK2": "CHEK2",
    "IKKa": "CHUK",
    "PKG2": "pkg2",
    "BARK1": "GRK2",
    "PKCa": "PRKCA",
    "PKCi": "Hint1",
    "RON": "MST1R",
    "JNK3": "MAPK10",
    "MSK1": "RPS6KA5",
    "EphB3": "EPHB3",
    "NIK": "MAP3K14",
    "CaMK2a": "CAMK2A",
    "CDC2": "CDK1",
    "FAK": "PTK2",
    "CaMK1a": "camk1a",
    "p70S6K": "S6k",
    "PKG1": "pkg1-4",
    "LKB1": "STK11",
    "MLK2": "MAP3K10",
    "CaMKK2": "CAMKK2",
    "KHS2": "khs_2",
    "MST1": "STK4",
    "PDHK1": "PDK1"
}

mapped_kinase_dict = {k: alias_mapping.get(k, k) for k in kinase_names}

# Make the kinase map
json_path = "/content/kinase_mapped_dict.json"
with open(json_path, "w") as f:
    json.dump(mapped_kinase_dict, f, indent=2)

with open("kinase_mapped_dict.json", "r", encoding="utf-8") as f:
    kinase_mapped_dict = json.load(f)

# real names
real_kinase_names = [kinase_mapped_dict.get(name, name) for name in kinase_names]

with open("kinase_mapped_dict.json", "r", encoding="utf-8") as f:
    kinase_mapped_dict = json.load(f)

# real names pairs set
kinase_name_pairs = [(name, kinase_mapped_dict.get(name, name)) for name in kinase_names]

In [None]:
with open("kinase_mapped_dict.json", "r", encoding="utf-8") as f:
    kinase_mapped_dict = json.load(f)

# 构建 (原名, 真名) 的列表
kinase_name_pairs = [(name, kinase_mapped_dict.get(name, name)) for name in kinase_names]

In [None]:
from Bio import Entrez
from tqdm import tqdm

# Fetch the abstracts functions
def fetch_top3_abstracts_entrez(gene_name, retmax=10):
    try:
        search_handle = Entrez.esearch(db="pubmed", term=gene_name, retmax=retmax, sort="relevance")
        search_record = Entrez.read(search_handle)
        search_handle.close()
        ids = search_record["IdList"]
        if not ids:
            return [gene_name, None, None, None]

        fetch_handle = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="abstract", retmode="xml")
        fetch_records = Entrez.read(fetch_handle)
        fetch_handle.close()

        abstracts = []
        for article in fetch_records["PubmedArticle"]:
            try:
                abstract_blocks = article["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
                abstract = " ".join(
                    str(block) if isinstance(block, str)
                    else f"{block.get('Label', '')}: {block.get('Text', '')}"
                    for block in abstract_blocks
                )
            except KeyError:
                abstract = "No abstract found"
            abstracts.append(abstract)
            time.sleep(0.1)

        # Remove invalid summaries
        abstracts = [a for a in abstracts if a != "No abstract found"]

        # If there are less than 3 items, add None.
        while len(abstracts) < 3:
            abstracts.append(None)

        return [gene_name] + abstracts[:3]

    except Exception as e:
        print(f"Error during fetching '{gene_name}': {e}")
        return [gene_name, None, None, None]

# Batch fetching function
def batch_fetch(gene_names):
    results = []
    for gene in tqdm(gene_names, desc="Fetching abstracts"):
        row = fetch_top3_abstracts_entrez(gene)
        results.append(row)
    return results

In [None]:

# Fetch abstracts and store in dictionary
abstracts_dict = {}
for gene in tqdm(real_kinase_names, desc="Fetching abstracts"):
    result = fetch_top3_abstracts_entrez(gene)
    gene_name, abs1, abs2, abs3 = result
    abstracts_dict[gene_name] = {
        "Kin_abstract1": abs1,
        "Kin_abstract2": abs2,
        "Kin_abstract3": abs3
    }

# Save to JSON
output_path = "/content/kinase_abstract.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(abstracts_dict, f, ensure_ascii=False, indent=2)

output_path

            Email address is not specified.

            To make use of NCBI's E-utilities, NCBI requires you to specify your
            email address with each request.  As an example, if your email address
            is A.N.Other@example.com, you can specify it as follows:
               from Bio import Entrez
               Entrez.email = 'A.N.Other@example.com'
            In case of excessive usage of the E-utilities, NCBI will attempt to contact
            a user at the email address provided before blocking access to the
            E-utilities.
Fetching abstracts: 100%|██████████| 193/193 [18:39<00:00,  5.80s/it]


'/content/kinase_abstract.json'

In [None]:
# Fetch abstracts and store in dictionary
abstracts_dict = {}
for gene in tqdm(unique_valid_gene_names_updated, desc="Fetching abstracts"):
    result = fetch_top3_abstracts_entrez(gene)
    gene_name, abs1, abs2, abs3 = result
    abstracts_dict[gene_name] = {
        "Kin_abstract1": abs1,
        "Kin_abstract2": abs2,
        "Kin_abstract3": abs3
    }

# Save to JSON
output_path = "/content/substrate_abstract.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(abstracts_dict, f, ensure_ascii=False, indent=2)

output_path

In [None]:
# Read kinase_mapped_dict (Original name → Real Name Mapping)
with open("kinase_mapped_dict.json", "r", encoding="utf-8") as f:
    kinase_mapped_dict = json.load(f)

# Read the summary dictionary
with open("kinase_abstract.json", "r", encoding="utf-8") as f:
    kinase_abstracts = json.load(f)

# Construct a new dictionary with the key being the real name
mapped_abstracts = {}
for original_name, abstracts in kinase_abstracts.items():
    true_name = kinase_mapped_dict.get(original_name, original_name)
    mapped_abstracts[true_name] = abstracts

# Convert to DataFrame
df = pd.DataFrame.from_dict(mapped_abstracts, orient="index")
df.reset_index(inplace=True)
df.rename(columns={"index": "kinase_name"}, inplace=True)

# Save as CSV
df.to_csv("kinase prompts.csv", index=False, encoding="utf-8")


In [None]:
# Read substrate_mapped_dict (Original name → Real Name Mapping)
with open("substrate_abstract.json", "r", encoding="utf-8") as f:
    substrate_mapped_dict = json.load(f)

# Read the summary dictionary
with open("gene_name_map_retry.json", "r", encoding="utf-8") as f:
    substrat_abstracts = json.load(f)

# Construct a new dictionary with the key being the real name
mapped_abstracts = {}
for original_name, abstracts in substrat_abstracts.items():
    true_name = substrate_mapped_dict.get(original_name, original_name)
    mapped_abstracts[true_name] = abstracts

# Convert to DataFrame
df = pd.DataFrame.from_dict(mapped_abstracts, orient="index")
df.reset_index(inplace=True)
df.rename(columns={"index": "substrate_name"}, inplace=True)

# Save as CSV
df.to_csv("kinase prompts.csv", index=False, encoding="utf-8")


After obtaining their respective abstract and dictionary comparison json files, we can let the big model help generate the final, complete csv file.