In [None]:
#%%

import os
import sys
from openai import OpenAI
from dotenv import load_dotenv
import pandas as pd
from datasets import Dataset, DatasetDict
from ner_metrics_both import classification_report
from copy import deepcopy
import tqdm


# -- 加入llama3.1的infer模块 -- #
load_dotenv()
LLama31_infer_path = os.getenv('LLama31_infer_path')
sys.path.append(LLama31_infer_path)
from llama_infer import *

# -- 加入gpt-4o的infer模块
load_dotenv()
gpt_infer_path = os.getenv('GPT_infer_path')
OPENAI_APIKEY = os.getenv("OPENAI_APIKEY")
sys.path.append(gpt_infer_path)
from Inference_pubmed import *


# -- 加入gpt-4o的infer代码 -- #



# 不做删除，直接占位，之后给成O，保持和ann处理之后的长度一致！

def process_special_token(orignial_list):
    """
    处理gpt/llama3.1的输出，保持原有text不变
    input: [[],[],[]]
    TODO: 如果最里边的[]。两边加空格：-,/。左边加空格：!,+。拆开：（）
    output: 直接在原列表上修改
    """
    return_list = deepcopy(orignial_list)
    for entity in orignial_list:
        # 每一个item是一个列表
        dup_entity = deepcopy(entity)
        count = 0 # 删除的个数

        # each_index 是entity的坐标索引
        # dup_index = each_index-count 是dup_entity的坐标索引

        for each_index in range(0,len(entity)):
            dup_index = each_index - count

            if each_index == 0:
                continue

            # 有["a", "-", "b"], ["a", "/", "b"]。加入["a-b"]
            if (entity[each_index] == "-") or (entity[each_index] == "/"):
                dup_entity[dup_index-1] = dup_entity[dup_index-1]+dup_entity[dup_index]+dup_entity[dup_index+1]
                del dup_entity[dup_index]
                del dup_entity[dup_index]
                count += 2
                return_list.append(dup_entity)

            """
            # 有["a-b"]。加入["a","-","b"]
            elif ("-" in entity[each_index]) or ("/" in entity[each_index]):
                index_1 = entity[each_index].find("-")
                index_2 = entity[each_index].find("/")

                index = 0
                if index_1 == -1: index = index_2
                else: index = index_1

                if index < len(dup_entity[dup_index])-1:
                    dup_entity[dup_index] = dup_entity[dup_index][0:index]
                    dup_entity.insert(dup_index+1, dup_entity[dup_index][index+1:])
                    print(index, len(dup_entity[dup_index]))
                    dup_entity.insert(dup_index+1, dup_entity[dup_index][index])
                    count -= 2 
                    return_list.append(dup_entity)
            
            """
            
    return return_list
            



    
            


# --- 子列表匹配，返回匹配的首、尾对应的元组列表 --- #
def find_sublist(main_list, sub_list):
    """
    对处理后的gpt/llama3.1的输出，把原来的text和每一个entity做匹配
    """
    sub_len = len(sub_list)
    index_list = list()
    flag_in = False
    for i in range(len(main_list) - sub_len + 1):
        #if main_list[i:i + sub_len] == sub_list:
        if ("").join(sub_list) in ("").join(main_list[i:i + sub_len]):
            # 解决可能有分割不准确的情况，子列表->子字符串匹配
            index_list.append((i,i+sub_len-1))
            flag_in = True
    if flag_in == True: # 找到了
        return index_list
    else: # 没找到 
        return -1  
    


# ------- llama3.1输出结果生成BIO tag，为了评估 ------- #
# input: 原始文本, 抽取结果
# output: ['O', 'B-PER', 'O', 'B-ORG', 'B-ORG', 'I-ORG', 'O', 'B-PER', 'I-PER', 'O']
def convert_txt_to_bio(text, entities):

    """
    逻辑得修改，是对entity做preprocess，而不是对text做啦。保证列表长度一致
    """
    # 原始文本全部变成小写
    text = text.lower()
    no_spaces_text = text.split(" ")

    # software name变成[[],[]]的格式，做子列表的匹配
    software_names = list(set([item["name"] for item in entities]))
    # print("*"*5, software_names)
    software_names_list = [item.lower().split(" ") for item in software_names]
    software_names_list = process_special_token(software_names_list)
    print("#"*5, software_names_list)

    bio_index = list()
    for item in software_names_list:
        # 子列表的匹配
        current_index_list = find_sublist(no_spaces_text, item)
        if current_index_list == -1:
            print("txt to bio","="*10, item)
        else:
            bio_index += current_index_list

    bio_list = ["O"] * len(no_spaces_text)

    # 遍历每个索引对，并设置对应位置为B或者I
    for start, end in bio_index:
        for i in range(start, end + 1):  # 因为end是包含的，所以用end + 1
            if i == start:
                bio_list[i] = "B"
            else:
                bio_list[i] = "I"
    return bio_list



# ---------- 调用gpt-4o和llama3.1来推理 -------- # 

def gpt_4o_infer(paper):

    '''
    Extract something from the abstract of a paper based on the given prompt template.
    '''
    print("* gpt-4o is inferring")
    system_role = SYSTEM_ROLE
    prompt_template = TPL_PROMPT
    guidelines = """"""
    few_shots = """"""
    with open("../../datasets/prompts/guidelines.txt", 'r', encoding='utf-8') as file_txt:
        for line in file_txt:
            guidelines += line
    with open("../../datasets/prompts/few_shots_Llama31.txt", 'r', encoding='utf-8') as file_txt:
        for line in file_txt:
            few_shots += line

    try:
        # 传入的是TPL_prompt, 里边有format函数要用的{title}和{abstract}。
        # 传入的paper会给键值对
        # 【改】把原来的prompt加到了abstract里边
        paper["abstract"] = paper["title"] + paper["abstract"]
        prompt = prompt_template.format(**paper)

        # 返回的是一个json对象，有"software"关键字
        # 共用llama3.1相同的guidelines和few_shots
        Prompt_all = f"""# You are given a title and an abstract of an academic publication. Your task is to identify and extract the names of software mentioned in the abstract. Software names are typically proper nouns and may include specific tools, platforms, or libraries used in research. Please list the software names you find in the publication in a JSON object using a key "software". If you are unable to identify any software names, please return an empty list of "software". When identifying software names, please consider the following exclusion criteria 
                            Also, apply following \"Guidelines\" and refer following \"Gold Examples\" to help with accuracy \n"""\
                            f"# Guidelines: {guidelines} \n"\
                            f"# Gold Examples: {few_shots} \n"\
                            f"# INPUT: {prompt} \n"\
                            f"\n"\
                            f"# OUTPUT: \n"
        
        client = OpenAI(api_key = OPENAI_APIKEY)
        completion = client.chat.completions.create(
            model = "gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_role},
                {"role": "user", "content": Prompt_all}
            ],
            response_format={"type": "json_object"},
            temperature=0,
        )
        result = json.loads(completion.choices[0].message.content)
        return result
    
    except Exception as e:
        print(f'! error: {e}')
        # print full stack
        import traceback
        traceback.print_exc()
        return None




def llama_31_infer(paper):
    print("* llama 3.1 is inferring")
    tmp = extract(SYSTEM_ROLE, TPL_PROMPT, paper)
    if tmp is None:
        # no software names found or error
        result = {'software': []}
    else:
        # add context to the extracted entities
        try:
            software_names_with_contexts = get_contexts(tmp['software'], paper)
        except Exception as e:
            # 可能model返回的格式不是字典 or 是字典，但是没有“software”这个键，导致上一句执行错误
            # 【对model不按指示推理设置保险 -- 不会在没"software"这个键上报错】
            print(f'! error: {e}')
            print(f'! failed to get context for {tmp}')
            software_names_with_contexts = []

        result = {'software': software_names_with_contexts}
    return result

  from .autonotebook import tqdm as notebook_tqdm


* loaded configs
* OPENAI_API_BASE_URL=http://localhost:11434/v1/
* OPENAI_API_MODEL=llama3.1
* PUBMED_DATA_TSV=/data/pubmed/metadata_36m.tsv
* OUTPUT_DATABASE=./database/software_names
* HF_TOKEN=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
* PYTHONPATH=/home/rs2926/workspace/Softname_Extraction/code/Gpt_4o_mini/:/home/rs2926/workspace/Softname_Extraction/code/Llama3.1/:
* loaded all libraries
* loaded custom openai client at http://localhost:11434/v1/


In [None]:
#%%

# 训练+测试集数据
train_folder_path = "../../datasets/train_data"
test_folder_path = "../../datasets/test_gold"
train_files = sorted(os.listdir(train_folder_path))
test_files = sorted(os.listdir(test_folder_path))


# 所有的train+test data
train_path="../../datasets/train_data/"
test_path="../../datasets/test_gold/"
train_txt_files = sorted([train_path + f for f in train_files if f.endswith('.txt')])
test_txt_files = sorted([test_path + f for f in test_files if f.endswith('.txt')])
train_ann_files = sorted([train_path + f for f in train_files if f.endswith('.ann')])
test_ann_files = sorted([test_path + f for f in test_files if f.endswith('.ann')])

new_txt = train_txt_files + test_txt_files
new_ann = train_ann_files + test_ann_files

# 【目前按照llama3.1的格式来写，gpt-4o需要修改】
all_gold = list()
llama_all_pred = list()
gpt4o_all_pred = list()

merged_list = zip(new_txt, new_ann)
gold_cant_match = 0


for txt, ann in tqdm(merged_list, total=len(new_ann)):
    current_paper = dict() # 包含pmid, title 和 abstract的字典

    file_name = os.path.basename(txt)  
    file_name_without_extension = os.path.splitext(file_name)[0]  # 去除扩展名
    # 此时abstract和title先分开放，到时候调llama3.1推理的时候再合并


    # ------ txt文件 ------ #
    txt_with_index_content = []
    with open(txt, "r", encoding="utf-8") as x_data: # Use writelines to write list
        txt_content = []
        count = -1
        current_paper["pmid"] = file_name_without_extension
        for line in x_data:
            # -- 让model infer的 -- #
            count += 1
            processed_line = line.strip()
            if count==0:
                current_paper["title"] = processed_line
                current_paper["abstract"] = ""
            else:
                current_paper["abstract"] += processed_line
            
            # -- 让ann生成gold bio list的 -- #
            content = line.strip().split(" ")
            txt_content.extend(content)
            cur_index = 0
            for item in txt_content:
                txt_with_index_content.append((cur_index, item))
                cur_index = cur_index + len(item) + 1


    # ------ ann文件 ------ #
    current_gold_list = list()
    current_gold_dict = dict()
    with open(ann, "r", encoding="utf-8") as y_data: # Use writelines to write list
        for line in y_data:
            current_mes = line.strip().split("\t")
            
            # 可能读到ann是空的 
            if len(current_mes)<=3:
                continue
            
            # ann非空 
            index_initial = current_mes[1].strip().split(" ")
            if len(index_initial) == 3:
                index = (int(index_initial[1]), int(index_initial[2]))
            elif len(index_initial) == 4: 
                item_index = index_initial[2].find(";")
                index = (int(index_initial[1]), int(index_initial[2][0:item_index]))
            current_gold_list.append((index, current_mes[-1]))

            current_gold_dict[index[0]] = (index[1], current_mes[-1])
            # index[0]是entity刚开始的坐标，index[1]是entity结束的坐标, current_mes是mention的名字
    

    # ------ 生成ann文件的gold label ------ #
    gold_current_label = list()
    flag = False
    last_index = 0
    for item in txt_with_index_content:
        men_index = item[0]
        men_name = item[1]
        if (flag == False) & (men_index in current_gold_dict.keys()): # matched! & B
            gold_current_label.append("B")
            flag = True
            last_index = current_gold_dict[men_index][0]
        elif flag == True: # matched! & I
            if men_index <= last_index:
                gold_current_label.append("I")
            else: # end match & O
                gold_current_label.append("O")
                flag = False
        else:
            gold_current_label.append("O")
            
    # -- 两模型infer -- #
    pred_llama_entities = llama_31_infer(current_paper) # 在函数内把current_paper给改了，把title加到了abstract上
    # pred_gpt4o_entities = gpt_4o_infer(current_paper) # 在函数内把current_paper给改了，把title加到了abstract上
    
    # -- pred：两模型生成bio list -- #
    pred_llama_bio_list = convert_txt_to_bio(current_paper["abstract"], pred_llama_entities["software"])
    # pred_gpt4o_bio_list = convert_txt_to_bio(current_paper["abstract"], pred_gpt4o_entities["software"])
    llama_all_pred += pred_llama_bio_list
    # gpt4o_all_pred += pred_gpt4o_bio_list
    
    #  -- gold：ann的bio list -- #
    all_gold += gold_current_label

  0%|          | 0/920 [00:00<?, ?it/s]

* llama 3.1 is inferring


  0%|          | 1/920 [00:01<17:25,  1.14s/it]

##### [['microsoft', 'excel']]
* llama 3.1 is inferring


  0%|          | 2/920 [00:03<28:22,  1.86s/it]

##### [['sql-92', 'compatible', 'relational', 'database'], ['pro7', '-', 'communication', 'server'], ['osf', '/', 'dce'], ['protocol', 'definition', 'language', 'compiler'], ['pro7-communication', 'server'], ['osf/dce']]
* llama 3.1 is inferring


  0%|          | 3/920 [00:04<22:43,  1.49s/it]

##### []
* llama 3.1 is inferring


  0%|          | 4/920 [00:07<30:26,  1.99s/it]

##### []
* llama 3.1 is inferring


  1%|          | 5/920 [00:09<29:09,  1.91s/it]

##### [['full', 'window', 'stereo']]
* llama 3.1 is inferring


  1%|          | 6/920 [00:10<25:29,  1.67s/it]

##### [['microsoft', 'excel']]
* llama 3.1 is inferring


  1%|          | 7/920 [00:11<22:41,  1.49s/it]

##### [['apex']]
* llama 3.1 is inferring


  1%|          | 8/920 [00:12<21:02,  1.38s/it]

##### [['mopac'], ['modeller'], ['triton']]
* llama 3.1 is inferring


  1%|          | 9/920 [00:14<22:40,  1.49s/it]

##### [['microsoft', 'excel']]
* llama 3.1 is inferring


  1%|          | 10/920 [00:15<20:18,  1.34s/it]

##### []
* llama 3.1 is inferring


  1%|          | 11/920 [00:16<18:43,  1.24s/it]

##### [['matlab'], ['s-plus']]
* llama 3.1 is inferring


  1%|▏         | 12/920 [00:17<18:07,  1.20s/it]

##### []
* llama 3.1 is inferring


  1%|▏         | 13/920 [00:18<16:57,  1.12s/it]

##### [['polylink'], ['microsoft', 'windows']]
* llama 3.1 is inferring


  2%|▏         | 14/920 [00:19<18:21,  1.22s/it]

##### [['visual', 'basic'], ['microsoft', 'word'], ['molecular', 'biocomputing', 'suite'], ['biotechniques', 'software', 'library'], ['microsoft', 'word', 'add-in']]
* llama 3.1 is inferring


  2%|▏         | 15/920 [00:20<17:13,  1.14s/it]

##### [['visual', 'basic'], ['microsoft', 'powerpoint']]
* llama 3.1 is inferring


  2%|▏         | 16/920 [00:22<17:56,  1.19s/it]

##### [['cdart']]
* llama 3.1 is inferring


  2%|▏         | 17/920 [00:23<17:34,  1.17s/it]

##### [['bioperl'], ['genquire']]
* llama 3.1 is inferring


  2%|▏         | 18/920 [00:24<17:07,  1.14s/it]

##### [['cinema-mx']]
* llama 3.1 is inferring


  2%|▏         | 19/920 [00:25<17:44,  1.18s/it]

##### [['acdsee'], ['after', 'shot'], ['fotoangelo'], ['smart', 'send']]
* llama 3.1 is inferring


  2%|▏         | 20/920 [00:27<21:02,  1.40s/it]

##### [['visual', 'basic', '4.0']]
* llama 3.1 is inferring


  2%|▏         | 21/920 [00:29<25:31,  1.70s/it]

##### [['bibi']]
* llama 3.1 is inferring


  2%|▏         | 22/920 [00:30<22:34,  1.51s/it]

##### [['bioeditor']]
* llama 3.1 is inferring


  2%|▎         | 23/920 [00:32<22:25,  1.50s/it]

##### [['cerr'], ['aapm/rtog', 'archiving', 'mechanism'], ['c/c++'], ['fortran'], ['matlab'], ['java']]
* llama 3.1 is inferring


  3%|▎         | 24/920 [00:33<22:09,  1.48s/it]

##### [['modweb'], ['modbase'], ['modeller'], ['modloop'], ['snpweb'], ['modview'], ['moulder']]
* llama 3.1 is inferring


  3%|▎         | 25/920 [00:34<20:21,  1.36s/it]

##### [['gosurfer'], ['chipinfo'], ['dchip']]
* llama 3.1 is inferring


  3%|▎         | 26/920 [00:35<18:39,  1.25s/it]

##### [['clustal', 'series', 'of', 'programs'], ['clustal'], ['the', 'clustal', 'series', 'of', 'programs']]
* llama 3.1 is inferring


  3%|▎         | 27/920 [00:37<19:22,  1.30s/it]

##### [['solaris.x86'], ['gibbs', 'recursive', 'sampler'], ['linux'], ['solaris'], ['gibbs', 'motif', 'sampler']]
* llama 3.1 is inferring


  3%|▎         | 28/920 [00:38<20:43,  1.39s/it]

##### []
* llama 3.1 is inferring


  3%|▎         | 29/920 [00:40<19:21,  1.30s/it]

##### [['stivid'], ['vbscript']]
* llama 3.1 is inferring


  3%|▎         | 30/920 [00:41<20:22,  1.37s/it]

##### [['pc', 'emulator', 'under', 'windows'], ['the', 'hp49g', 'programmable', 'calculator'], ['microsoft', 'windows'], ['metastats']]
* llama 3.1 is inferring


  3%|▎         | 31/920 [00:44<26:53,  1.82s/it]

##### []
* llama 3.1 is inferring


  3%|▎         | 32/920 [00:45<23:52,  1.61s/it]

##### [['cadlive']]
* llama 3.1 is inferring


  4%|▎         | 33/920 [00:46<22:02,  1.49s/it]

##### [['unix', 'workstations'], ['ibis'], ['fortran', '77']]
* llama 3.1 is inferring


  4%|▎         | 34/920 [00:49<26:05,  1.77s/it]

##### [['homgl']]
* llama 3.1 is inferring


  4%|▍         | 35/920 [00:50<25:28,  1.73s/it]

##### [['the', 'microsoft', 'windows', '3.0', 'operating', 'system'], ['universal', 'data', 'acquisition', 'program'], ['windows', '3.0'], ['microsoft', 'windows', '3.0'], ['dynamic', 'link', 'libraries', '(dll)']]
* llama 3.1 is inferring


  4%|▍         | 36/920 [00:51<22:03,  1.50s/it]

##### [['nonmem']]
* llama 3.1 is inferring


  4%|▍         | 37/920 [00:52<20:21,  1.38s/it]

##### [['rounds'], ['help', 'system']]
* llama 3.1 is inferring


  4%|▍         | 38/920 [00:55<24:56,  1.70s/it]

##### [['chull.sas']]
* llama 3.1 is inferring


  4%|▍         | 39/920 [00:56<22:28,  1.53s/it]

##### [['moltalk']]
* llama 3.1 is inferring


  4%|▍         | 40/920 [00:59<27:01,  1.84s/it]

##### [['ensembl', 'core', 'software', 'libraries']]
* llama 3.1 is inferring


  4%|▍         | 41/920 [01:00<25:28,  1.74s/it]

##### [['svm-based', 'method', 'for', 'subcellular', 'localization', 'of', 'eukaryotic', 'proteins', 'using', 'dipeptide', 'composition', 'and', 'psi-blast'], ['eslpred']]
* llama 3.1 is inferring


  5%|▍         | 42/920 [01:01<23:03,  1.58s/it]

##### [['probelynx']]
* llama 3.1 is inferring


  5%|▍         | 43/920 [01:02<21:02,  1.44s/it]

##### [['genepalette']]
* llama 3.1 is inferring


  5%|▍         | 44/920 [01:03<19:32,  1.34s/it]

##### [['haplore'], ['haplotype', 'reconstruction']]
* llama 3.1 is inferring


  5%|▍         | 45/920 [01:04<18:03,  1.24s/it]

##### [['bioconductor'], ['rmageml']]
* llama 3.1 is inferring


  5%|▌         | 46/920 [01:06<18:08,  1.25s/it]

##### [['goget'], ['java', '2', 'enterprise', 'edition', 'technology'], ['gene', 'ontology', 'database'], ['goview']]
* llama 3.1 is inferring


  5%|▌         | 47/920 [01:07<17:41,  1.22s/it]

##### [['muscle']]
* llama 3.1 is inferring


  5%|▌         | 48/920 [01:08<16:47,  1.16s/it]

##### [['varmixt'], ['r', 'package']]
* llama 3.1 is inferring


  5%|▌         | 49/920 [01:09<16:44,  1.15s/it]

##### [['clann'], ['apple', 'macintosh'], ['linux'], ['windows', 'operating', 'systems']]
* llama 3.1 is inferring


  5%|▌         | 50/920 [01:10<16:13,  1.12s/it]

##### [['therm']]
* llama 3.1 is inferring


  6%|▌         | 51/920 [01:11<17:06,  1.18s/it]

##### [['paris', 'genome', 'rearrangement', 'server'], ['paris', 'genome', 'rearrangement']]
* llama 3.1 is inferring


  6%|▌         | 52/920 [01:13<17:14,  1.19s/it]

##### [['cgview']]
* llama 3.1 is inferring


  6%|▌         | 53/920 [01:14<16:27,  1.14s/it]

##### [['pcr'], ['clustal', 'x']]
* llama 3.1 is inferring


  6%|▌         | 54/920 [01:15<18:11,  1.26s/it]

##### [['ccp4', 'coordinate', 'library']]
* llama 3.1 is inferring


  6%|▌         | 55/920 [01:16<17:15,  1.20s/it]

##### [['vms'], ['overseer'], ['unix']]
* llama 3.1 is inferring


  6%|▌         | 56/920 [01:17<17:03,  1.18s/it]

##### [['microsoft', 'excel', 'for', 'windows']]
* llama 3.1 is inferring


  6%|▌         | 57/920 [01:19<17:16,  1.20s/it]

##### [['windows'], ['linux'], ['java', 'application'], ['cghpro'], ['array', 'cgh']]
* llama 3.1 is inferring


  6%|▋         | 58/920 [01:20<16:34,  1.15s/it]

##### []
* llama 3.1 is inferring


  6%|▋         | 59/920 [01:21<16:33,  1.15s/it]

##### [['hdbstat!']]
* llama 3.1 is inferring


  7%|▋         | 60/920 [01:22<17:14,  1.20s/it]

##### [['egs4'], ['fortran', '77'], ['bbn', 'tc2000'], ['pc/386/387']]
* llama 3.1 is inferring


  7%|▋         | 61/920 [01:23<17:05,  1.19s/it]

##### [['ucsc', 'gene', 'sorter']]
* llama 3.1 is inferring


  7%|▋         | 62/920 [01:25<17:54,  1.25s/it]

##### [['python'], ['qhull'], ['provat'], ['pymol']]
* llama 3.1 is inferring


  7%|▋         | 63/920 [01:26<19:32,  1.37s/it]

##### [['locsvmpsi'], ['locnet'], ['targetp'], ['eslpred'], ['psi-blast'], ['psortii'], ['svm'], ['subloc']]
* llama 3.1 is inferring


  7%|▋         | 64/920 [01:28<19:14,  1.35s/it]

##### [['lga'], ['psi-blast'], ['pdb'], ['al2ts'], ['as2ts']]
* llama 3.1 is inferring


  7%|▋         | 65/920 [01:29<17:52,  1.25s/it]

##### [['psi-blast'], ['modeller']]
* llama 3.1 is inferring


  7%|▋         | 66/920 [01:30<18:19,  1.29s/it]

##### [['department', 'of', 'informatics'], ['linux'], ['colotux'], ['siemens'], ['colonography']]
* llama 3.1 is inferring


  7%|▋         | 67/920 [01:32<20:13,  1.42s/it]

##### [['medusa']]
* llama 3.1 is inferring


  7%|▋         | 68/920 [01:33<19:28,  1.37s/it]

##### [['the', 'software'], ['microsoft', 'foundation', 'class', 'library', '(mfc)'], ['vc++', '6.0']]
* llama 3.1 is inferring


  8%|▊         | 69/920 [01:34<17:53,  1.26s/it]

##### [['microsoft', 'excel']]
* llama 3.1 is inferring


  8%|▊         | 70/920 [01:35<16:32,  1.17s/it]

##### [['r'], ['ocplus']]
* llama 3.1 is inferring


  8%|▊         | 71/920 [01:36<15:39,  1.11s/it]

##### [['mzmine']]
* llama 3.1 is inferring


  8%|▊         | 72/920 [01:37<15:20,  1.09s/it]

##### [['turbo', 'pascal'], ['chelator']]
* llama 3.1 is inferring


  8%|▊         | 73/920 [01:39<17:54,  1.27s/it]

##### [['jaf'], ['http://www.proteomecommons.org/current/511/'], ['proteomecommons.org', 'jaf'], ['java', 'analysis', 'framework', '(jaf)']]
* llama 3.1 is inferring


  8%|▊         | 74/920 [01:40<18:44,  1.33s/it]

##### [['ciphergen', 'proteinchip', 'software', '3.1']]
* llama 3.1 is inferring


  8%|▊         | 75/920 [01:42<20:17,  1.44s/it]

##### [['genechip', 'operating', 'software', '(mas', 'or', 'gcos)'], ['dchip', 'pm'], ['rma'], ['microarray', 'analysis', 'suite'], ['gc-rma'], ['pdnn'], ['dchip', 'pmmm']]
* llama 3.1 is inferring


  8%|▊         | 76/920 [01:43<19:08,  1.36s/it]

##### [['bioconductor'], ['orderedlist']]
* llama 3.1 is inferring


  8%|▊         | 77/920 [01:44<18:41,  1.33s/it]

##### [['the', 'migenas', 'integrated', 'bioinformatics', 'toolkit']]
* llama 3.1 is inferring


  8%|▊         | 78/920 [01:46<19:21,  1.38s/it]

##### [['masqot'], ['java'], ['masqot-gui'], ['gnu', 'lgpl']]
* llama 3.1 is inferring


  9%|▊         | 79/920 [01:47<18:04,  1.29s/it]

##### [['microsoft', 'excel']]
* llama 3.1 is inferring


  9%|▊         | 80/920 [01:49<19:38,  1.40s/it]

##### [['examiner', 'module'], ['project', 'manager', 'module'], ['extractor', 'module'], ['pet-tool'], ['mapper', 'module']]
* llama 3.1 is inferring


  9%|▉         | 81/920 [01:50<18:24,  1.32s/it]

##### [['m-gcat']]
* llama 3.1 is inferring


  9%|▉         | 82/920 [01:51<18:37,  1.33s/it]

##### [['opengl'], ['openmaf'], ['dicom']]
* llama 3.1 is inferring


  9%|▉         | 83/920 [01:52<17:35,  1.26s/it]

##### [['bhageerath']]
* llama 3.1 is inferring


  9%|▉         | 84/920 [01:53<16:26,  1.18s/it]

##### [['azyxxi'], ['microsoft']]
* llama 3.1 is inferring


  9%|▉         | 85/920 [01:54<16:19,  1.17s/it]

##### [['visual', 'basic'], ['matlab'], ['viskin']]
* llama 3.1 is inferring


  9%|▉         | 86/920 [01:56<16:42,  1.20s/it]

##### [['stampa'], ['gerbil'], ['haploview'], ['gevalt']]
* llama 3.1 is inferring


  9%|▉         | 87/920 [01:57<16:35,  1.19s/it]

##### [['blm', 'analyzer'], ['the', 'blm', 'analyzer']]
* llama 3.1 is inferring


 10%|▉         | 88/920 [01:58<16:08,  1.16s/it]

##### [['ms-windows'], ['wsxm']]
* llama 3.1 is inferring


 10%|▉         | 89/920 [01:59<16:54,  1.22s/it]

##### [['casmil']]
* llama 3.1 is inferring


 10%|▉         | 90/920 [02:00<16:30,  1.19s/it]

##### [['gepat'], ['genome', 'expression', 'pathway', 'analysis', 'tool']]
* llama 3.1 is inferring


 10%|▉         | 91/920 [02:02<18:09,  1.31s/it]

##### [['svm-fold']]
* llama 3.1 is inferring


 10%|█         | 92/920 [02:05<24:35,  1.78s/it]

##### [['tassel']]
* llama 3.1 is inferring


 10%|█         | 93/920 [02:06<21:27,  1.56s/it]

##### [['gromacs'], ['guimacs']]
* llama 3.1 is inferring


 10%|█         | 94/920 [02:07<20:49,  1.51s/it]

##### [['blast'], ['nwaycomp'], ['phylip'], ['clustalw'], ['primer3'], ['align']]
* llama 3.1 is inferring


 10%|█         | 95/920 [02:08<18:55,  1.38s/it]

##### [['microsoft', 'excel', '2004'], ['visual', 'basic', 'for', 'applications']]
* llama 3.1 is inferring


 10%|█         | 96/920 [02:10<18:40,  1.36s/it]

##### [['daisy']]
* llama 3.1 is inferring


 11%|█         | 97/920 [02:12<23:00,  1.68s/it]

##### [['pycogent']]
* llama 3.1 is inferring


 11%|█         | 98/920 [02:13<22:07,  1.62s/it]

##### [['visualization', 'toolkit'], ['windows'], ['linux'], ['unix'], ['cavass'], ['insight', 'toolkit'], ['mac']]
* llama 3.1 is inferring


 11%|█         | 99/920 [02:15<22:19,  1.63s/it]

##### [['i-adhore', '2.0'], ['unix'], ['linux'], ['i-adhore']]
* llama 3.1 is inferring


 11%|█         | 100/920 [02:17<21:31,  1.58s/it]

##### [['windows-based', 'software', 'tool'], ['ampe'], ['iso', '5725'], ['fuzzy', 'logic'], ['analytical', 'method', 'performance', 'evaluation', '(ampe)']]
* llama 3.1 is inferring


 11%|█         | 101/920 [02:18<19:18,  1.41s/it]

##### [['microsoft', 'visual', 'c/c++'], ['act', '4']]
* llama 3.1 is inferring


 11%|█         | 102/920 [02:20<24:28,  1.80s/it]

##### []
* llama 3.1 is inferring


 11%|█         | 103/920 [02:21<21:40,  1.59s/it]

##### [['swan']]
* llama 3.1 is inferring


 11%|█▏        | 104/920 [02:23<19:57,  1.47s/it]

##### [['fmrib', 'software', 'library'], ['bet']]
* llama 3.1 is inferring


 11%|█▏        | 105/920 [02:24<18:06,  1.33s/it]

##### [['microsoft', 'word'], ['endnote'], ['pubmed']]
* llama 3.1 is inferring


 12%|█▏        | 106/920 [02:25<18:11,  1.34s/it]

##### [['r', 'package', 'hcgene'], ['hcgene'], ['funcat', 'taxonomy'], ['gene', 'ontology']]
* llama 3.1 is inferring


 12%|█▏        | 107/920 [02:26<17:23,  1.28s/it]

##### [['matlab'], ['datarail']]
* llama 3.1 is inferring


 12%|█▏        | 108/920 [02:28<18:02,  1.33s/it]

##### [['graphcrunch']]
* llama 3.1 is inferring


 12%|█▏        | 109/920 [02:29<18:25,  1.36s/it]

##### [['perl'], ['c'], ['c++'], ['java'], ['python'], ['c#']]
* llama 3.1 is inferring


 12%|█▏        | 110/920 [02:30<17:30,  1.30s/it]

##### [['root'], ['ctmod'], ["cern's", 'application', 'development', 'framework']]
* llama 3.1 is inferring


 12%|█▏        | 111/920 [02:31<16:31,  1.23s/it]

##### [['dovis'], ['linux', 'cluster'], ['autodock']]
* llama 3.1 is inferring


 12%|█▏        | 112/920 [02:34<21:20,  1.58s/it]

##### [['census']]
* llama 3.1 is inferring


 12%|█▏        | 113/920 [02:35<19:16,  1.43s/it]

##### [['perl'], ['bioperl'], ['java', 'applet']]
* llama 3.1 is inferring


 12%|█▏        | 114/920 [02:36<18:21,  1.37s/it]

##### [['sculpter']]
* llama 3.1 is inferring


 12%|█▎        | 115/920 [02:37<16:58,  1.27s/it]

##### [['ushuffle']]
* llama 3.1 is inferring


 13%|█▎        | 116/920 [02:38<17:08,  1.28s/it]

##### [['varivis'], ['database', 'management', 'systems'], ['perl', 'cgi', 'scripts'], ['varivis', 'software', 'package']]
* llama 3.1 is inferring


 13%|█▎        | 117/920 [02:40<17:50,  1.33s/it]

##### [['patman']]
* llama 3.1 is inferring


 13%|█▎        | 118/920 [02:41<16:36,  1.24s/it]

##### [['toxmatch']]
* llama 3.1 is inferring


 13%|█▎        | 119/920 [02:44<23:03,  1.73s/it]

##### [['microsoft', 'visual', 'basic', '6.0']]
* llama 3.1 is inferring


 13%|█▎        | 120/920 [02:45<21:17,  1.60s/it]

##### [['skdm'], ['java', 'application']]
* llama 3.1 is inferring


 13%|█▎        | 121/920 [02:46<19:43,  1.48s/it]

##### [['potterswheel'], ['matlab', 'with', 'optimization', 'toolbox'], ['matlab', 'toolbox']]
* llama 3.1 is inferring


 13%|█▎        | 122/920 [02:47<19:03,  1.43s/it]

##### [['procope'], ['java', 'software', 'suite']]
* llama 3.1 is inferring


 13%|█▎        | 123/920 [02:49<21:21,  1.61s/it]

##### [['gibbsmarkov', 'with', 'significance', 'analysis'], ['gimsan']]
* llama 3.1 is inferring


 13%|█▎        | 124/920 [02:51<20:11,  1.52s/it]

##### [['domaingraph']]
* llama 3.1 is inferring


 14%|█▎        | 125/920 [02:52<19:33,  1.48s/it]

##### [['unafold'], ['mac', 'os', 'x'], ['linux'], ['unix']]
* llama 3.1 is inferring


 14%|█▎        | 126/920 [02:55<23:14,  1.76s/it]

##### []
* llama 3.1 is inferring


 14%|█▍        | 127/920 [02:56<20:43,  1.57s/it]

##### [['fluxplus'], ['microsoft', 'gwbasic'], ['ibm-pc']]
* llama 3.1 is inferring


 14%|█▍        | 128/920 [02:57<19:46,  1.50s/it]

##### []
* llama 3.1 is inferring


 14%|█▍        | 129/920 [02:58<18:59,  1.44s/it]

##### [['mazda']]
* llama 3.1 is inferring


 14%|█▍        | 130/920 [03:00<18:47,  1.43s/it]

##### [['fax06'], ['max06'], ['caldose_x']]
* llama 3.1 is inferring


 14%|█▍        | 131/920 [03:01<17:06,  1.30s/it]

##### []
* llama 3.1 is inferring


 14%|█▍        | 132/920 [03:02<16:16,  1.24s/it]

##### [['java', 'starlogo', '2.0'], ['acacia']]
* llama 3.1 is inferring


 14%|█▍        | 133/920 [03:03<15:58,  1.22s/it]

##### [['pazar', 'database'], ['orca', 'toolkit']]
* llama 3.1 is inferring


 15%|█▍        | 134/920 [03:04<15:22,  1.17s/it]

##### [['alc']]
* llama 3.1 is inferring


 15%|█▍        | 135/920 [03:05<15:55,  1.22s/it]

##### [['pconpy']]
* llama 3.1 is inferring


 15%|█▍        | 136/920 [03:07<16:16,  1.25s/it]

##### [['windows', 'script', 'host', '(wsh)'], ['activex', 'data', 'objects'], ['javascript'], ['ole', 'automation'], ['sql']]
* llama 3.1 is inferring


 15%|█▍        | 137/920 [03:08<15:53,  1.22s/it]

##### [['ez-rhizo']]
* llama 3.1 is inferring


 15%|█▌        | 138/920 [03:09<15:54,  1.22s/it]

##### [['microsoft', 'word']]
* llama 3.1 is inferring


 15%|█▌        | 139/920 [03:10<16:37,  1.28s/it]

##### [['mcnp(x)', 'monte', 'carlo', 'computer', 'code'], ['sesame--simulation', 'of', 'external', 'source', 'accident', 'with', 'medical', 'images'], ['sesame']]
* llama 3.1 is inferring


 15%|█▌        | 140/920 [03:12<17:25,  1.34s/it]

##### [['gene', 'expression', 'omnibus', 'database'], ['tbrowser'], ['david', 'knowledgebase'], ['transcriptomebrowser'], ['java', 'application'], ['markov', 'clustering', 'algorithm']]
* llama 3.1 is inferring


 15%|█▌        | 141/920 [03:13<17:36,  1.36s/it]

##### [['decyder', 'v6.5'], ['dymension', '3'], ['progenesis', 'samespots', 'v3.0']]
* llama 3.1 is inferring


 15%|█▌        | 142/920 [03:14<16:32,  1.28s/it]

##### [['prep', '+', '07']]
* llama 3.1 is inferring


 16%|█▌        | 143/920 [03:16<15:55,  1.23s/it]

##### [['ajax'], ['google', 'maps', 'api'], ['genome', 'projector']]
* llama 3.1 is inferring


 16%|█▌        | 144/920 [03:17<15:13,  1.18s/it]

##### [['associationviewer']]
* llama 3.1 is inferring


 16%|█▌        | 145/920 [03:18<15:10,  1.18s/it]

##### [['genome', 'reverse', 'compiler']]
* llama 3.1 is inferring


 16%|█▌        | 146/920 [03:19<15:53,  1.23s/it]

##### [['va-batts'], ['qct'], ['fe']]
* llama 3.1 is inferring


 16%|█▌        | 147/920 [03:22<20:48,  1.61s/it]

##### [['therm'], ['bisen'], ['biochemical', 'simulation', 'environment']]
* llama 3.1 is inferring


 16%|█▌        | 148/920 [03:23<20:12,  1.57s/it]

##### [['jaspar'], ['transfac'], ['cotrasif']]
* llama 3.1 is inferring


 16%|█▌        | 149/920 [03:24<18:17,  1.42s/it]

##### [['mixture', 'subroutine', 'in', 'nonmem'], ['nonmem']]
* llama 3.1 is inferring


 16%|█▋        | 150/920 [03:25<17:12,  1.34s/it]

##### [['therm'], ['microsoft', 'fortran', 'version', '5.0']]
* llama 3.1 is inferring


 16%|█▋        | 151/920 [03:26<16:20,  1.28s/it]

##### [['ingeneue']]
* llama 3.1 is inferring


 17%|█▋        | 152/920 [03:29<22:27,  1.75s/it]

##### [['ensembl', 'software'], ['the', 'microbe', 'browser']]
* llama 3.1 is inferring


 17%|█▋        | 153/920 [03:30<19:50,  1.55s/it]

##### [['3d-dart'], ['3dna'], ['python']]
* llama 3.1 is inferring


 17%|█▋        | 154/920 [03:32<18:31,  1.45s/it]

##### [['open', 'source', 'tools'], ['the', 'geant4', 'application', 'for', 'tomographic', 'emission', 'simulation', 'toolkit']]
* llama 3.1 is inferring


 17%|█▋        | 155/920 [03:33<18:28,  1.45s/it]

##### [['modular', 'preoperative', 'planning', 'software'], ['insight', 'toolkit', '(kitware,', 'inc.)']]
* llama 3.1 is inferring


 17%|█▋        | 156/920 [03:35<19:35,  1.54s/it]

##### [['rtracklayer']]
* llama 3.1 is inferring


 17%|█▋        | 157/920 [03:36<18:06,  1.42s/it]

##### [['statoolkit']]
* llama 3.1 is inferring


 17%|█▋        | 158/920 [03:37<16:34,  1.31s/it]

##### [['entrez'], ['pubmed'], ['pipeline', 'pilot']]
* llama 3.1 is inferring


 17%|█▋        | 159/920 [03:38<15:29,  1.22s/it]

##### [['matlab'], ['fmri'], ['pet']]
* llama 3.1 is inferring


 17%|█▋        | 160/920 [03:39<15:23,  1.22s/it]

##### [['esi-ms'], ['maldi-ms'], ['glycoworkbench']]
* llama 3.1 is inferring


 18%|█▊        | 161/920 [03:42<21:17,  1.68s/it]

##### []
* llama 3.1 is inferring


 18%|█▊        | 162/920 [03:43<18:44,  1.48s/it]

##### []
* llama 3.1 is inferring


 18%|█▊        | 163/920 [03:44<16:42,  1.32s/it]

##### [['microsoft', 'excel']]
* llama 3.1 is inferring


 18%|█▊        | 164/920 [03:45<16:02,  1.27s/it]

##### [['gnu', 'general', 'public', 'license'], ['mspecs']]
* llama 3.1 is inferring


 18%|█▊        | 165/920 [03:47<16:43,  1.33s/it]

##### [['r/bioconductor'], ['crlmm'], ["illumina's", 'infinium', 'whole-genome', 'genotyping', 'beadchips']]
* llama 3.1 is inferring


 18%|█▊        | 166/920 [03:47<15:07,  1.20s/it]

##### [['java', 'tool', 'kit', 'for', 'building', 'genomics', 'visualization', 'applications'], ['genoviz', 'software', 'development', 'kit']]
* llama 3.1 is inferring


 18%|█▊        | 167/920 [03:49<14:59,  1.19s/it]

##### [['saint'], ['libsbml'], ['google', 'web', 'toolkit'], ['tomcat']]
* llama 3.1 is inferring


 18%|█▊        | 168/920 [03:50<16:56,  1.35s/it]

##### []
* llama 3.1 is inferring


 18%|█▊        | 169/920 [03:52<16:20,  1.31s/it]

##### [['ptmsearchplus']]
* llama 3.1 is inferring


 18%|█▊        | 170/920 [03:53<16:37,  1.33s/it]

##### [['limsa'], ['secd']]
* llama 3.1 is inferring


 19%|█▊        | 171/920 [03:54<17:13,  1.38s/it]

##### [['easymifs'], ['easymifs'], ['sitehound']]
* llama 3.1 is inferring


 19%|█▊        | 172/920 [03:56<18:30,  1.48s/it]

##### [['natbox'], ['r', 'statistical', 'language']]
* llama 3.1 is inferring


 19%|█▉        | 173/920 [03:58<17:51,  1.43s/it]

##### [['lipid'], ['visual', 'basic', 'for', 'applications'], ['ms', 'excel']]
* llama 3.1 is inferring


 19%|█▉        | 174/920 [03:59<16:45,  1.35s/it]

##### [['survival', 'online']]
* llama 3.1 is inferring


 19%|█▉        | 175/920 [04:00<16:19,  1.32s/it]

##### [['matlab'], ['matlab', 'component', 'runtime'], ['gene', 'armada']]
* llama 3.1 is inferring


 19%|█▉        | 176/920 [04:01<15:11,  1.23s/it]

##### []
* llama 3.1 is inferring


 19%|█▉        | 177/920 [04:02<15:39,  1.26s/it]

##### [['cone-beam', 'ct'], ['simplant'], ['materialise', 'dental'], ['surgiguide'], ['ct', 'images']]
* llama 3.1 is inferring


 19%|█▉        | 178/920 [04:03<14:38,  1.18s/it]

##### [['mapnext']]
* llama 3.1 is inferring


 19%|█▉        | 179/920 [04:05<14:53,  1.21s/it]

##### [['seqbuster']]
* llama 3.1 is inferring


 20%|█▉        | 180/920 [04:06<15:36,  1.27s/it]

##### [['poptree2'], ['windows', 'interface']]
* llama 3.1 is inferring


 20%|█▉        | 181/920 [04:07<15:09,  1.23s/it]

##### [['svmprat']]
* llama 3.1 is inferring


 20%|█▉        | 182/920 [04:08<14:26,  1.17s/it]

##### [['therm']]
* llama 3.1 is inferring


 20%|█▉        | 183/920 [04:09<13:56,  1.13s/it]

##### []
* llama 3.1 is inferring


 20%|██        | 184/920 [04:10<13:51,  1.13s/it]

##### [['nemo']]
* llama 3.1 is inferring


 20%|██        | 185/920 [04:12<15:16,  1.25s/it]

##### [['windows,', 'macintosh,', 'or', 'linux', 'operating', 'system'], ['pc', 'windows', 'operating', 'systems'], ['jcms']]
* llama 3.1 is inferring


 20%|██        | 186/920 [04:13<15:50,  1.29s/it]

##### [['consensuscluster']]
* llama 3.1 is inferring


 20%|██        | 187/920 [04:14<15:41,  1.28s/it]

##### [['gosemsim'], ['bioconductor', 'project'], ['r', 'package']]
* llama 3.1 is inferring


 20%|██        | 188/920 [04:15<14:43,  1.21s/it]

##### []
* llama 3.1 is inferring


 21%|██        | 189/920 [04:16<13:55,  1.14s/it]

##### [['solid'], ['perl']]
* llama 3.1 is inferring


 21%|██        | 190/920 [04:18<14:20,  1.18s/it]

##### [['rnastructure']]
* llama 3.1 is inferring


 21%|██        | 191/920 [04:19<14:10,  1.17s/it]

##### [['diacontrol']]
* llama 3.1 is inferring


 21%|██        | 192/920 [04:20<14:49,  1.22s/it]

##### [['armone']]
* llama 3.1 is inferring


 21%|██        | 193/920 [04:22<18:04,  1.49s/it]

##### [['distmatcomp']]
* llama 3.1 is inferring


 21%|██        | 194/920 [04:23<16:44,  1.38s/it]

##### [['therm'], ['qspect']]
* llama 3.1 is inferring


 21%|██        | 195/920 [04:25<15:53,  1.31s/it]

##### [['fall', 'tips', 'toolkit'], ['fall', 'tips']]
* llama 3.1 is inferring


 21%|██▏       | 196/920 [04:26<14:53,  1.23s/it]

##### []
* llama 3.1 is inferring


 21%|██▏       | 197/920 [04:27<14:46,  1.23s/it]

##### [['cochlear', 'implant', 'processor', 'programming'], ['fitting', 'to', 'outcomes', 'expert']]
* llama 3.1 is inferring


 22%|██▏       | 198/920 [04:28<13:59,  1.16s/it]

##### []
* llama 3.1 is inferring


 22%|██▏       | 199/920 [04:29<14:50,  1.24s/it]

##### [['emr'], ['road'], ['sarma']]
* llama 3.1 is inferring


 22%|██▏       | 200/920 [04:30<13:33,  1.13s/it]

##### [['learning', 'assessment', 'toolkit']]
* llama 3.1 is inferring


 22%|██▏       | 201/920 [04:32<15:58,  1.33s/it]

##### [['kaks_calculator', '2.0'], ['gamma-series', 'methods'], ['slidingwindow', 'strategies']]
* llama 3.1 is inferring


 22%|██▏       | 202/920 [04:33<15:36,  1.30s/it]

##### [['goal'], ['gene', 'ontology', 'analyzer']]
* llama 3.1 is inferring


 22%|██▏       | 203/920 [04:35<16:33,  1.39s/it]

##### [['homstrad'], ['tcoffee'], ['mafft'], ['mtrap'], ['clustalw2'], ['prefab', '4.0']]
* llama 3.1 is inferring


 22%|██▏       | 204/920 [04:36<15:34,  1.31s/it]

##### [['guidance']]
* llama 3.1 is inferring


 22%|██▏       | 205/920 [04:37<15:44,  1.32s/it]

##### [['compasss', '(complex', 'pattern', 'of', 'sequence', 'search', 'software)'], ['compasss', 'suite'], ['compasss']]
* llama 3.1 is inferring


 22%|██▏       | 206/920 [04:38<15:07,  1.27s/it]

##### [['jcoda']]
* llama 3.1 is inferring


 22%|██▎       | 207/920 [04:40<14:49,  1.25s/it]

##### [['modevo'], ['networkevolution'], ['cytoscape'], ['apcluster']]
* llama 3.1 is inferring


 23%|██▎       | 208/920 [04:41<14:03,  1.18s/it]

##### []
* llama 3.1 is inferring


 23%|██▎       | 209/920 [04:42<14:12,  1.20s/it]

##### [['merger'], ['profiler'], ['viewer'], ['profiler-merger-viewer']]
* llama 3.1 is inferring


 23%|██▎       | 210/920 [04:43<13:36,  1.15s/it]

##### [['kbsim']]
* llama 3.1 is inferring


 23%|██▎       | 211/920 [04:44<13:14,  1.12s/it]

##### []
* llama 3.1 is inferring


 23%|██▎       | 212/920 [04:45<13:38,  1.16s/it]

##### [['bionetgen', 'language', '(bngl)'], ['dynstoc'], ['rulemonkey']]
* llama 3.1 is inferring


 23%|██▎       | 213/920 [04:46<13:34,  1.15s/it]

##### [['mrmap']]
* llama 3.1 is inferring


 23%|██▎       | 214/920 [04:48<14:12,  1.21s/it]

##### [['bioruby']]
* llama 3.1 is inferring


 23%|██▎       | 215/920 [04:49<13:34,  1.16s/it]

##### [['decision', 'peptide-driven'], ['dpd', 'software']]
* llama 3.1 is inferring


 23%|██▎       | 216/920 [04:50<13:06,  1.12s/it]

##### [['v-xtractor']]
* llama 3.1 is inferring


 24%|██▎       | 217/920 [04:51<13:07,  1.12s/it]

##### [['metagenomethreader']]
* llama 3.1 is inferring


 24%|██▎       | 218/920 [04:52<13:17,  1.14s/it]

##### [['metabolic', 'design']]
* llama 3.1 is inferring


 24%|██▍       | 219/920 [04:55<17:48,  1.52s/it]

##### [['chembench']]
* llama 3.1 is inferring


 24%|██▍       | 220/920 [04:56<15:54,  1.36s/it]

##### [['cdms']]
* llama 3.1 is inferring


 24%|██▍       | 221/920 [04:57<16:30,  1.42s/it]

##### [['biodosimetry', 'assessment', 'tool', '(bat)'], ['microsoft', 'visual', 'basic', '6'], ['armed', 'forces', 'radiobiology', 'research', "institute's", 'biological', 'dosimetry', 'research', 'program']]
* llama 3.1 is inferring


 24%|██▍       | 222/920 [04:59<17:17,  1.49s/it]

##### [['the', 'aqcel', 'method'], ['the', 'application', 'software', 'aqcel'], ['the', 'conventional', 'method'], ['aqcel']]
* llama 3.1 is inferring


 24%|██▍       | 223/920 [05:00<15:51,  1.37s/it]

##### [['omicsanalyzer'], ['cytoscape'], ['java']]
* llama 3.1 is inferring


 24%|██▍       | 224/920 [05:01<14:49,  1.28s/it]

##### [['google', 'maps'], ['celldesigner'], ['cellpublisher']]
* llama 3.1 is inferring


 24%|██▍       | 225/920 [05:02<14:08,  1.22s/it]

##### [['mist'], ['mitre', 'identification', 'scrubber', 'toolkit']]
* llama 3.1 is inferring


 25%|██▍       | 226/920 [05:03<13:55,  1.20s/it]

##### [['paragon'], ['excel'], ['compid'], ['mascot']]
* llama 3.1 is inferring


 25%|██▍       | 227/920 [05:04<12:40,  1.10s/it]

##### [['fall', 'tips']]
* llama 3.1 is inferring


 25%|██▍       | 228/920 [05:06<15:03,  1.31s/it]

##### []
* llama 3.1 is inferring


 25%|██▍       | 229/920 [05:08<17:07,  1.49s/it]

##### [['consensx']]
* llama 3.1 is inferring


 25%|██▌       | 230/920 [05:09<16:02,  1.39s/it]

##### [['cagrid', 'workflow', 'toolkit']]
* llama 3.1 is inferring


 25%|██▌       | 231/920 [05:12<20:45,  1.81s/it]

##### []
* llama 3.1 is inferring


 25%|██▌       | 232/920 [05:14<22:57,  2.00s/it]

##### [['therm']]
* llama 3.1 is inferring


 25%|██▌       | 233/920 [05:15<20:01,  1.75s/it]

##### [['itk', 'software', 'framework'], ['julide']]
* llama 3.1 is inferring


 25%|██▌       | 234/920 [05:17<18:31,  1.62s/it]

##### [['netpath-win'], ['netpath'], ['microsoft', 'windows']]
* llama 3.1 is inferring


 26%|██▌       | 235/920 [05:18<16:30,  1.45s/it]

##### []
* llama 3.1 is inferring


 26%|██▌       | 236/920 [05:19<15:35,  1.37s/it]

##### [['onto-toolkit'], ['onto-perl'], ['galaxy']]
* llama 3.1 is inferring


 26%|██▌       | 237/920 [05:20<14:48,  1.30s/it]

##### [['arrayqualitymetrics']]
* llama 3.1 is inferring


 26%|██▌       | 238/920 [05:21<14:10,  1.25s/it]

##### [['caes'], ['chinese', 'acupuncture', 'expert', 'system']]
* llama 3.1 is inferring


 26%|██▌       | 239/920 [05:22<14:06,  1.24s/it]

##### [['nuclearquant'], ['leica', 'bond', 'max', 'system']]
* llama 3.1 is inferring


 26%|██▌       | 240/920 [05:23<13:48,  1.22s/it]

##### [['gepoclu']]
* llama 3.1 is inferring


 26%|██▌       | 241/920 [05:25<14:21,  1.27s/it]

##### [['dicom'], ['qt', 'graphical', 'user', 'interface'], ['nirviz'], ['visualization', 'toolkit', 'library']]
* llama 3.1 is inferring


 26%|██▋       | 242/920 [05:26<13:14,  1.17s/it]

##### [['blast']]
* llama 3.1 is inferring


 26%|██▋       | 243/920 [05:28<17:33,  1.56s/it]

##### [['geant4', 'simulation', 'toolkit']]
* llama 3.1 is inferring


 27%|██▋       | 244/920 [05:29<16:30,  1.46s/it]

##### [['dchip', 'survival', 'analysis', 'module'], ['dchip', 'software'], ['visual', 'c++'], ['dchip']]
* llama 3.1 is inferring


 27%|██▋       | 245/920 [05:31<15:04,  1.34s/it]

##### []
* llama 3.1 is inferring


 27%|██▋       | 246/920 [05:32<17:05,  1.52s/it]

##### [['mzml'], ['tool-chain'], ['mzmatch'], ['r', 'library'], ['peakml', 'viewer'], ['mzdata'], ['peakml'], ['java', 'library'], ['mzxml']]
* llama 3.1 is inferring


 27%|██▋       | 247/920 [05:34<16:07,  1.44s/it]

##### [['anyexpress']]
* llama 3.1 is inferring


 27%|██▋       | 248/920 [05:35<14:54,  1.33s/it]

##### [['geneious'], ['species', 'delimitation']]
* llama 3.1 is inferring


 27%|██▋       | 249/920 [05:36<14:12,  1.27s/it]

##### [['gc×gc/tofms'], ['guineu']]
* llama 3.1 is inferring


 27%|██▋       | 250/920 [05:37<13:55,  1.25s/it]

##### [['bc-genexminer'], ['mysql', 'relational', 'database'], ['r', 'statistical', 'software']]
* llama 3.1 is inferring


 27%|██▋       | 251/920 [05:38<13:15,  1.19s/it]

##### [['splash'], ['caret'], ['splash']]
* llama 3.1 is inferring


 27%|██▋       | 252/920 [05:39<12:41,  1.14s/it]

##### []
* llama 3.1 is inferring


 28%|██▊       | 253/920 [05:40<13:14,  1.19s/it]

##### [['mirpara'], ['mirbase'], ['hts'], ['svm'], ['mirpara']]
* llama 3.1 is inferring


 28%|██▊       | 254/920 [05:42<12:58,  1.17s/it]

##### [['binoch']]
* llama 3.1 is inferring


 28%|██▊       | 255/920 [05:43<13:25,  1.21s/it]

##### [['sim4cc'], ['leaff'], ['leaff'], ['sim4db'], ['sim4db']]
* llama 3.1 is inferring


 28%|██▊       | 256/920 [05:44<13:14,  1.20s/it]

##### [['windows'], ['spyder'], ['linux'], ['mac']]
* llama 3.1 is inferring


 28%|██▊       | 257/920 [05:45<12:55,  1.17s/it]

##### [['nsmap']]
* llama 3.1 is inferring


 28%|██▊       | 258/920 [05:46<12:52,  1.17s/it]

##### [['hmmer3'], ['pfam'], ['hmmer']]
* llama 3.1 is inferring


 28%|██▊       | 259/920 [05:47<12:46,  1.16s/it]

##### [['clotho']]
* llama 3.1 is inferring


 28%|██▊       | 260/920 [05:49<13:25,  1.22s/it]

##### [['wxpython'], ['relaxgui'], ['python'], ['nmr']]
* llama 3.1 is inferring


 28%|██▊       | 261/920 [05:50<14:20,  1.31s/it]

##### [['labkey', "server's", 'nab', 'tool'], ['atlas', 'science', 'portal'], ['labkey', 'server'], ['excel', 'macro']]
* llama 3.1 is inferring


 28%|██▊       | 262/920 [05:51<13:46,  1.26s/it]

##### [['ontocat']]
* llama 3.1 is inferring


 29%|██▊       | 263/920 [05:53<13:33,  1.24s/it]

##### [['mm-usc*pack']]
* llama 3.1 is inferring


 29%|██▊       | 264/920 [05:54<13:09,  1.20s/it]

##### [['metaxa']]
* llama 3.1 is inferring


 29%|██▉       | 265/920 [05:55<13:58,  1.28s/it]

##### [['version', '01.10'], ['version', '3.02'], ['flotrac/vigileo']]
* llama 3.1 is inferring


 29%|██▉       | 266/920 [05:57<14:03,  1.29s/it]

##### [['snvbox'], ['linux', 'system'], ['mysql', 'database'], ['c++'], ['python'], ['chasm']]
* llama 3.1 is inferring


 29%|██▉       | 267/920 [05:58<14:01,  1.29s/it]

##### [['the', 'proteored', 'miape', 'web', 'toolkit']]
* llama 3.1 is inferring


 29%|██▉       | 268/920 [05:59<13:08,  1.21s/it]

##### [['orbitview']]
* llama 3.1 is inferring


 29%|██▉       | 269/920 [06:00<13:08,  1.21s/it]

##### [['wommbat'], ['linux'], ['windows'], ['mac']]
* llama 3.1 is inferring


 29%|██▉       | 270/920 [06:02<14:00,  1.29s/it]

##### [['msoar'], ['notung'], ['multiparanoid'], ['multimsoar'], ['multimsoar', '2.0']]
* llama 3.1 is inferring


 29%|██▉       | 271/920 [06:03<15:53,  1.47s/it]

##### [['human', 'proteins'], ['fold', 'and', 'function', 'assignment', 'system'], ['microbial', 'virulence', 'factors'], ['nucleic', 'acids', 'research'], ['metagenomic', 'sequences'], ['ffas03'], ['ffas'], ['protein', 'science'], ['complete', 'proteomes']]
* llama 3.1 is inferring


 30%|██▉       | 272/920 [06:05<14:59,  1.39s/it]

##### [['geoviz', 'toolkit']]
* llama 3.1 is inferring


 30%|██▉       | 273/920 [06:06<13:51,  1.28s/it]

##### []
* llama 3.1 is inferring


 30%|██▉       | 274/920 [06:07<12:43,  1.18s/it]

##### [['galaxy'], ['python']]
* llama 3.1 is inferring


 30%|██▉       | 275/920 [06:08<12:47,  1.19s/it]

##### [['shap'], ['java']]
* llama 3.1 is inferring


 30%|███       | 276/920 [06:10<14:33,  1.36s/it]

##### [['ecancercare(bladder)']]
* llama 3.1 is inferring


 30%|███       | 277/920 [06:11<13:28,  1.26s/it]

##### []
* llama 3.1 is inferring


 30%|███       | 278/920 [06:12<12:40,  1.19s/it]

##### [['zoneminder'], ['zm']]
* llama 3.1 is inferring


 30%|███       | 279/920 [06:13<12:35,  1.18s/it]

##### [['enrichment', 'map'], ['cytoscape']]
* llama 3.1 is inferring


 30%|███       | 280/920 [06:14<12:39,  1.19s/it]

##### [['the', 'chembl', 'database'], ['cytoscape']]
* llama 3.1 is inferring


 31%|███       | 281/920 [06:15<12:05,  1.14s/it]

##### [['matlab'], ['cobra', 'toolbox']]
* llama 3.1 is inferring


 31%|███       | 282/920 [06:17<13:17,  1.25s/it]

##### [['syngo.via', 'ct', 'vascular'], ['syngo.via', '(siemens', 'healthcare,', 'forchheim,', 'germany)']]
* llama 3.1 is inferring


 31%|███       | 283/920 [06:18<13:19,  1.26s/it]

##### [['matlab'], ['afni'], ['rest'], ['spm']]
* llama 3.1 is inferring


 31%|███       | 284/920 [06:19<14:36,  1.38s/it]

##### [['drupal'], ['interpro'], ['gbrowse'], ['gene', 'ontology'], ['ncbi', 'blast'], ['tripal'], ['kyoto', 'encyclopedia', 'of', 'genes', 'and', 'genomes'], ['chado']]
* llama 3.1 is inferring


 31%|███       | 285/920 [06:21<13:50,  1.31s/it]

##### [['javascript'], ['krona'], ['web', 'browser'], ['html5']]
* llama 3.1 is inferring


 31%|███       | 286/920 [06:22<12:46,  1.21s/it]

##### [['prots']]
* llama 3.1 is inferring


 31%|███       | 287/920 [06:23<12:09,  1.15s/it]

##### [['neuritequant'], ['imagej']]
* llama 3.1 is inferring


 31%|███▏      | 288/920 [06:24<12:39,  1.20s/it]

##### [['eyemap']]
* llama 3.1 is inferring


 31%|███▏      | 289/920 [06:25<12:27,  1.18s/it]

##### [['sspace'], ['sopra'], ['mip', 'scaffolder']]
* llama 3.1 is inferring


 32%|███▏      | 290/920 [06:27<13:26,  1.28s/it]

##### [['linux'], ['bdtcomparator'], ['windows', 'operating', 'systems']]
* llama 3.1 is inferring


 32%|███▏      | 291/920 [06:28<12:46,  1.22s/it]

##### [['rest-gca'], ['matlab'], ['rest']]
* llama 3.1 is inferring


 32%|███▏      | 292/920 [06:29<12:03,  1.15s/it]

##### [['ntrfinder']]
* llama 3.1 is inferring


 32%|███▏      | 293/920 [06:30<12:00,  1.15s/it]

##### [['jamie']]
* llama 3.1 is inferring


 32%|███▏      | 294/920 [06:31<11:39,  1.12s/it]

##### [['logviewer'], ['rawxtract']]
* llama 3.1 is inferring


 32%|███▏      | 295/920 [06:32<12:54,  1.24s/it]

##### [['metscape'], ['cytoscape', 'plugin', 'manager'], ['ehmn', 'databases'], ['kegg'], ['metscape', '2', 'bioinformatics', 'tool']]
* llama 3.1 is inferring


 32%|███▏      | 296/920 [06:35<15:52,  1.53s/it]

##### [['monte', 'carlo', '(mc)', 'simulation'], ['maximum', 'likelihood-expectation', 'maximization', '(mlem)'], ['geant4'], ['integral', 'of', 'the', 'signal', 'in', 'each', 'mass', 'lesion', '(integrated', 'mass', 'signal,', 'ims)'], ['signal-difference-to-noise', 'ratio', '(sdnr)'], ['modulation', 'transfer', 'function', '(mtf)']]
* llama 3.1 is inferring


 32%|███▏      | 297/920 [06:36<15:02,  1.45s/it]

##### [['windose'], ['impact', 'ct', 'patients', 'dosimetry', 'calculator'], ['ct-expo']]
* llama 3.1 is inferring


 32%|███▏      | 298/920 [06:37<13:42,  1.32s/it]

##### []
* llama 3.1 is inferring


 32%|███▎      | 299/920 [06:38<13:01,  1.26s/it]

##### [['ptools'], ['c++'], ['python']]
* llama 3.1 is inferring


 33%|███▎      | 300/920 [06:39<12:26,  1.20s/it]

##### [['convan']]
* llama 3.1 is inferring


 33%|███▎      | 301/920 [06:40<12:29,  1.21s/it]

##### [['mcscan'], ['mcscanx']]
* llama 3.1 is inferring


 33%|███▎      | 302/920 [06:41<12:11,  1.18s/it]

##### [['mapcheck', '2'], ['3dvh', 'software']]
* llama 3.1 is inferring


 33%|███▎      | 303/920 [06:42<11:50,  1.15s/it]

##### [['metextract']]
* llama 3.1 is inferring


 33%|███▎      | 304/920 [06:44<11:58,  1.17s/it]

##### [['pyelph'], ['python']]
* llama 3.1 is inferring


 33%|███▎      | 305/920 [06:45<12:43,  1.24s/it]

##### [['the', 'ieee', '11073', 'personal', 'health', 'device', '(phd)', 'group'], ['iso/ieee', '11073', 'phd', 'message', 'generation', 'toolkit']]
* llama 3.1 is inferring


 33%|███▎      | 306/920 [06:46<12:41,  1.24s/it]

##### [['homeml', 'repository'], ['homeml', 'toolkit'], ['homeml', 'application']]
* llama 3.1 is inferring


 33%|███▎      | 307/920 [06:48<14:26,  1.41s/it]

##### [['meme/mast'], ['meme'], ['r', 'package'], ['iteme'], ['mdscan'], ['meet'], ['clustalw'], ['match'], ['q-residuals'], ['muscle']]
* llama 3.1 is inferring


 33%|███▎      | 308/920 [06:49<13:50,  1.36s/it]

##### [['kurzweil', '3000']]
* llama 3.1 is inferring


 34%|███▎      | 309/920 [06:51<13:21,  1.31s/it]

##### [['copicat']]
* llama 3.1 is inferring


 34%|███▎      | 310/920 [06:52<13:05,  1.29s/it]

##### [['anntools']]
* llama 3.1 is inferring


 34%|███▍      | 311/920 [06:53<13:36,  1.34s/it]

##### [['ngs', 'qc', 'toolkit'], ['roche', '454'], ['illumina'], ['perl']]
* llama 3.1 is inferring


 34%|███▍      | 312/920 [06:54<11:25,  1.13s/it]

##### [['excel']]
* llama 3.1 is inferring


 34%|███▍      | 313/920 [06:55<12:02,  1.19s/it]

##### [['ecomics']]
* llama 3.1 is inferring


 34%|███▍      | 314/920 [06:56<12:01,  1.19s/it]

##### [['biogem'], ['windows'], ['ruby'], ['linux'], ['mac', 'os', 'x']]
* llama 3.1 is inferring


 34%|███▍      | 315/920 [06:58<12:05,  1.20s/it]

##### [['flotrac/vigileo', 'system'], ['third', 'generation', 'software', 'version', '3.02']]
* llama 3.1 is inferring


 34%|███▍      | 316/920 [06:59<11:39,  1.16s/it]

##### [['matlab'], ['sos']]
* llama 3.1 is inferring


 34%|███▍      | 317/920 [07:00<11:59,  1.19s/it]

##### [['microsoft', 'excel'], ['freedom', 'evo', 'liquid', 'handler', 'software']]
* llama 3.1 is inferring


 35%|███▍      | 318/920 [07:01<11:30,  1.15s/it]

##### [['ceawatch']]
* llama 3.1 is inferring


 35%|███▍      | 319/920 [07:02<11:39,  1.16s/it]

##### [['cellulose-builder'], ['the', 'bash', 'programming', 'language']]
* llama 3.1 is inferring


 35%|███▍      | 320/920 [07:03<11:15,  1.13s/it]

##### [['ms-excel']]
* llama 3.1 is inferring


 35%|███▍      | 321/920 [07:04<10:53,  1.09s/it]

##### [['fcstrans']]
* llama 3.1 is inferring


 35%|███▌      | 322/920 [07:05<10:46,  1.08s/it]

##### [['icd-10', 'toolkit'], ['icd-10']]
* llama 3.1 is inferring


 35%|███▌      | 323/920 [07:07<11:13,  1.13s/it]

##### [['tina', 'manual', 'landmarking', 'tool']]
* llama 3.1 is inferring


 35%|███▌      | 324/920 [07:08<11:14,  1.13s/it]

##### [['visual', 'exploration', 'and', 'statistics', 'to', 'promote', 'annotation'], ['vespa']]
* llama 3.1 is inferring


 35%|███▌      | 325/920 [07:09<11:45,  1.19s/it]

##### [['rosetta'], ['saber'], ['rosettadesign']]
* llama 3.1 is inferring


 35%|███▌      | 326/920 [07:10<11:48,  1.19s/it]

##### [['face', 'software'], ['face']]
* llama 3.1 is inferring


 36%|███▌      | 327/920 [07:11<11:19,  1.15s/it]

##### [['therm']]
* llama 3.1 is inferring


 36%|███▌      | 328/920 [07:13<12:20,  1.25s/it]

##### [['isoquant'], ['windows', '7']]
* llama 3.1 is inferring


 36%|███▌      | 329/920 [07:14<12:03,  1.22s/it]

##### [['psi-search']]
* llama 3.1 is inferring


 36%|███▌      | 330/920 [07:17<18:03,  1.84s/it]

##### [['bayesian', 'regularization', 'method'], ['prior'], ['one-way', 'analysis', 'of', 'variance'], ['dna', 'microarrays'], ['background'], ['multiple', 'tests', 'correction'], ['next-generation', 'sequencing', '(rna-seq)'], ['empirical', 'measurements'], ['diagnostic', 'plots'], ['quantitative', 'mass', 'spectrometry'], ['protein', 'arrays'], ['data', 'sets'], ['two-sample', 't-tests'], ['regularized', 'variance'], ['t-test'], ['r', 'source', 'code'], ['probabilistic', 'mixture', 'model', 'treatment'], ['cyber-t']]
* llama 3.1 is inferring


 36%|███▌      | 331/920 [07:19<16:39,  1.70s/it]

##### [['firsst4'], ['asa24'], ['firsst4']]
* llama 3.1 is inferring


 36%|███▌      | 332/920 [07:20<14:42,  1.50s/it]

##### [['tps'], ['eclipse'], ['diamond']]
* llama 3.1 is inferring


 36%|███▌      | 333/920 [07:21<13:28,  1.38s/it]

##### [['fice']]
* llama 3.1 is inferring


 36%|███▋      | 334/920 [07:22<13:52,  1.42s/it]

##### [['assign-sbt', 'v3.6+'], ['assign-sbt', 'v3.2.7'], ['conexio', 'genomics']]
* llama 3.1 is inferring


 36%|███▋      | 335/920 [07:23<13:09,  1.35s/it]

##### [['nrpb-sr250', 'software'], ['nrpb-sr250']]
* llama 3.1 is inferring


 37%|███▋      | 336/920 [07:24<12:18,  1.26s/it]

##### [['stepstone', 'interactive', 'medical', 'software']]
* llama 3.1 is inferring


 37%|███▋      | 337/920 [07:26<11:48,  1.21s/it]

##### [['pagit']]
* llama 3.1 is inferring


 37%|███▋      | 338/920 [07:27<11:19,  1.17s/it]

##### []
* llama 3.1 is inferring


 37%|███▋      | 339/920 [07:28<11:18,  1.17s/it]

##### [['amber'], ['dendrimer', 'building', 'toolkit'], ['dbt']]
* llama 3.1 is inferring


 37%|███▋      | 340/920 [07:30<14:55,  1.54s/it]

##### [['simupop', 'simulation', 'environment'], ['gene-environment', 'interaction', 'simulator', '2', '(gens2)'], ['python', 'language']]
* llama 3.1 is inferring


 37%|███▋      | 341/920 [07:31<13:31,  1.40s/it]

##### [['ephla', 'software'], ['ephla', 'method'], ['ephla', 'program'], ['ephla']]
* llama 3.1 is inferring


 37%|███▋      | 342/920 [07:32<12:31,  1.30s/it]

##### [['minfi', 'bioconductor', 'package'], ['swan']]
* llama 3.1 is inferring


 37%|███▋      | 343/920 [07:34<12:08,  1.26s/it]

##### [['therm'], ['primer-blast']]
* llama 3.1 is inferring


 37%|███▋      | 344/920 [07:35<12:06,  1.26s/it]

##### []
* llama 3.1 is inferring


 38%|███▊      | 345/920 [07:36<11:42,  1.22s/it]

##### [['kmwin'], ['sas'], ['spss'], ['r']]
* llama 3.1 is inferring


 38%|███▊      | 346/920 [07:37<11:10,  1.17s/it]

##### []
* llama 3.1 is inferring


 38%|███▊      | 347/920 [07:39<12:29,  1.31s/it]

##### [['fusionfinder'], ['rna-seq', 'read', 'data'], ['perl-based', 'software'], ['single-end', '(se)', 'or', 'paired-end', '(pe)']]
* llama 3.1 is inferring


 38%|███▊      | 348/920 [07:40<11:57,  1.25s/it]

##### [['metabosearch']]
* llama 3.1 is inferring


 38%|███▊      | 349/920 [07:41<11:11,  1.18s/it]

##### [['milquant']]
* llama 3.1 is inferring


 38%|███▊      | 350/920 [07:42<12:22,  1.30s/it]

##### [['lxtoo']]
* llama 3.1 is inferring


 38%|███▊      | 351/920 [07:44<13:15,  1.40s/it]

##### []
* llama 3.1 is inferring
