In [1]:
import openai
from pathlib import Path
import json
import llm2geneset
import time
import pandas as pd
from rouge_score import rouge_scorer
from sklearn.metrics.pairwise import cosine_similarity
aclient = openai.AsyncClient()
client = openai.Client()

In [5]:
# Generate gene sets using RAG.

scorer = rouge_scorer.RougeScorer(['rouge1','rouge2', 'rougeL'], use_stemmer=True)

models = ["gpt-3.5-turbo-0125"]
lib_names = ["WikiPathway_2023_Human"]#,
#            "Reactome_2022", 
#            "WikiPathway_2023_Human"]
ouput = []
for model in models:    
    for lib_name in lib_names:
        with open("libs_human/" + model + "/" + lib_name + ".json") as f:
            gen_res = json.load(f)
        
        test_sets = gen_res["curated_genesets"][0:5]
        test_descr = gen_res["descr_cleaned"][0:5]
        gt_embs = llm2geneset.get_embeddings(client, test_descr)

        res = await llm2geneset.gs_proposal(aclient, test_sets, model='gpt-3.5-turbo')

        # use GSAI to generate geneset name
        gsai_res = await llm2geneset.gsai(aclient, test_sets, model="gpt-4o", n_retry=1)
        gsai_names = [i['name'] for i in gsai_res]
        gsai_name_embs = llm2geneset.get_embeddings(client, gsai_names)
    

        for i, ref in enumerate(test_descr):
            gt_emb = [gt_embs[i]]
            gsai_name = gsai_names[i]
            gsai_name_emb = [gsai_name_embs[i]]
            gsai_score= scorer.score(ref, gsai_name)['rouge1'].fmeasure
            gsai_csim = cosine_similarity(gt_emb,gsai_name_emb).squeeze()

            llm2geneset_pred = res[i]
            for i in range(len(llm2geneset_pred)):
                name = llm2geneset_pred[i][0]
                name_emb = llm2geneset.get_embeddings(client, [name])
                hyperg_pval = llm2geneset_pred[i][1]
                score = scorer.score(ref, name)['rouge1'].fmeasure
                llm2geneset_csim = cosine_similarity(gt_emb,name_emb).squeeze()
            
                x={"library":lib_name,
                   "gt_name":ref,
                   "gsai_name":gsai_name,
                   "llm2genset_name":name,
                   "gsai_ROUGE1":gsai_score,
                   "llm2geneset_ROUGE1":score,
                   "gsai_csim":gsai_csim,
                   "llm2geneset_csim":llm2geneset_csim,
                   "llm2geneset_hyperg_pval":hyperg_pval}    

                ouput.append(x)
        
        #res_gsai = await llm2geneset.gsai(aclient,test_sets, model="gpt-4o")

100%|███████████████████████████████████████████████| 5/5 [00:16<00:00,  3.30s/it]
100%|███████████████████████████████████████████████| 5/5 [00:15<00:00,  3.16s/it]


In [6]:
df = pd.DataFrame(ouput)

In [8]:
df[0:8]

Unnamed: 0,library,gt_name,gsai_name,llm2genset_name,gsai_ROUGE1,llm2geneset_ROUGE1,gsai_csim,llm2geneset_csim,llm2geneset_hyperg_pval
0,WikiPathway_2023_Human,Glutathione Metabolism,Glutathione metabolism and antioxidant defense...,Glutathione metabolism,0.5,1.0,0.8307785837339222,0.9999987732293052,3.572476e-30
1,WikiPathway_2023_Human,Glutathione Metabolism,Glutathione metabolism and antioxidant defense...,Oxidative stress response,0.5,0.0,0.8307785837339222,0.4503677880242467,0.0001636133
2,WikiPathway_2023_Human,Glutathione Metabolism,Glutathione metabolism and antioxidant defense...,Drug metabolism,0.5,0.5,0.8307785837339222,0.585371675702657,1.0
3,WikiPathway_2023_Human,Glutathione Metabolism,Glutathione metabolism and antioxidant defense...,Pentose phosphate pathway,0.5,0.0,0.8307785837339222,0.4104874293406948,0.01049345
4,WikiPathway_2023_Human,Alanine And Aspartate Metabolism,Amino acid metabolism and transamination proce...,Urea cycle,0.4,0.0,0.5870363395405924,0.4054454667768651,7.035005e-06
5,WikiPathway_2023_Human,Alanine And Aspartate Metabolism,Amino acid metabolism and transamination proce...,Glutamate metabolism,0.4,0.333333,0.5870363395405924,0.6089146864984893,2.528361e-11
6,WikiPathway_2023_Human,Alanine And Aspartate Metabolism,Amino acid metabolism and transamination proce...,Alanine metabolism,0.4,0.666667,0.5870363395405924,0.8266404008892922,0.006035137
7,WikiPathway_2023_Human,Alanine And Aspartate Metabolism,Amino acid metabolism and transamination proce...,Aspartate metabolism,0.4,0.666667,0.5870363395405924,0.8174110281066136,6.50186e-16
