In [1]:
import os
from tqdm import tqdm
from datasets import load_dataset
from elasticsearch import Elasticsearch
from langchain_community.vectorstores import ElasticsearchStore
from langchain_community.document_loaders import TextLoader
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import SpacyTextSplitter
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from elasticsearch import Elasticsearch
from getpass import getpass
from utils import *
from dotenv import load_dotenv
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

from ragas import evaluate


load_dotenv()  # take environment variables from .env.

HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
HUGGINGFACE_USERNAME = os.getenv('HUGGINGFACE_USERNAME')
HUGGINGFACE_DATASET_NAME = os.getenv('HUGGINGFACE_DATASET_NAME')
ELASTIC_CLOUD_ID = os.getenv('ELASTIC_CLOUD_ID')
ELASTIC_API_KEY = os.getenv('ELASTIC_API_KEY')
QA_VALIDATION_DATASET = os.getenv('QA_VALIDATION_DATASET')
QA_VALIDATION_TOKEN = os.getenv('QA_VALIDATION_TOKEN')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

model_name = "NeuML/pubmedbert-base-embeddings"

from langchain_community.embeddings import HuggingFaceEmbeddings
from pprint import pprint

# Validation dataset (without RAG answers): https://huggingface.co/datasets/prio7777777/pubmed-qa-validation

In [2]:
'''
Example on how to run a validation for a given configuration
NOTE: this has not been tested holistically, but the code should work
'''


'\nExample on how to run a validation for a given configuration\nNOTE: this has not been tested holistically, but the code should work\n'

In [3]:
model_name = "NeuML/pubmedbert-base-embeddings"
device = 'cuda:0'
model_id = "GPT-3.5-Turbo" # "meta-llama/Llama-2-7b-chat-hf"

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': device},
    encode_kwargs={'device': device}
)

indexes = ['pubmedbert-sentence-transformer-50','pubmedbert-sentence-transformer-100','pubmedbert-sentence-transformer-200','pubmedbert-sentence-transformer-400','pubmedbert-recursive-character-400-overlap-50']

## define the LLM model to use | later this can be overwritten by the user
llm = prepare_llm(HUGGINGFACE_TOKEN,model_id=model_id,use_openai=True)
# llm = Ollama(model = "llama2:latest")

## create configuration for the run_config function
save_path = '../data/chunking_test.csv'
save_path_result = "../data/chunking_test_formatted.csv"


  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


#### Evaluate the influence of chunking size and chunk overlap

In [4]:
chunking_configuration_results = pd.DataFrame(columns=['configuration', 'answer_relevancy', 'context_precision', 'context_recall', 'faithfulness'])


In [5]:
for index_name in indexes:
    elastic_vector_search = ElasticsearchStore(
        es_cloud_id=ELASTIC_CLOUD_ID,
        index_name=index_name,
        embedding=embeddings,
        es_api_key=ELASTIC_API_KEY,
    )

    config_1 = {
        "index_name": index_name,
        'evaluation_dataset_path': QA_VALIDATION_DATASET,
        'HUGGINGFACE_TOKEN': HUGGINGFACE_TOKEN,
        'HUGGINGFACE_DATASET_NAME': HUGGINGFACE_DATASET_NAME,
        'llm': llm,
        'QA_VALIDATION_TOKEN': QA_VALIDATION_TOKEN,
        'save_path': save_path,
        'max_retrieved_docs': 3,
        'query_transformation_strategy':'read-write-retrieve',
        'OPENAI_API_KEY': OPENAI_API_KEY
    }

    answers = run_config(elastic_vector_search=elastic_vector_search,
                     use_ensemble_retriever=False,
                     verbose=False,
                     save=True,
                     **config_1)

        
    config_2 = {
        'QA_VALIDATION_TOKEN': QA_VALIDATION_TOKEN,
        'QA_VALIDATION_DATASET': QA_VALIDATION_DATASET,
        'save_path': save_path,
        'save_path_result': save_path_result, 
    }

    ## this is a Dataset on which the RAGAs metrics can be applied
    result_dataset = testset_to_validation(save=True,**config_2)

    ## get ragas metrics
    resulted_metrics = evaluate(
        result_dataset,
        metrics=[
            context_precision,
            faithfulness,
            answer_relevancy,
            context_recall,
        ],
    )

    chunking_configuration_results.append({'configuration': index_name, 'answer_relevancy': resulted_metrics['answer_relevancy'], 'context_precision': resulted_metrics['context_precision'], 'context_recall': resulted_metrics['context_recall'], 'faithfulness': resulted_metrics['faithfulness']}, ignore_index=True)

    ## save individual results

    resulted_metrics.to_pandas().to_csv(f'../data/chunking_configurations/{index_name}_results.csv')


## save the results
chunking_configuration_results.to_csv('../data/chunking_configurations/chunking_configuration_results.csv')

  warn_deprecated(
generate RAG answers: 100%|██████████| 67/67 [04:55<00:00,  4.40s/it]


ValueError: The metric [context_precision] that that is used requires the following additional columns ['ground_truth'] to be present in the dataset. Looks like you're trying to use 'context_precision' without ground_truth. Please use consider using  `context_utilization' instead.

In [2]:

## first define embeddings for the db


## define what index to use and instantiate the vector store

index_name = 'pubmedbert-sentence-transformer-400'

elastic_vector_search = ElasticsearchStore(
    es_cloud_id=ELASTIC_CLOUD_ID,
    index_name=index_name,
    embedding=embeddings,
    es_api_key=ELASTIC_API_KEY,
)

## define the LLM model to use | later this can be overwritten by the user
llm = prepare_llm(HUGGINGFACE_TOKEN,model_id=model_id,use_openai=True)

## create configuration for the run_config function
save_path = '../data/rag_validation_answers_400.csv'

config_1 = {
    "index_name": index_name,
    'evaluation_dataset_path': QA_VALIDATION_DATASET,
    'HUGGINGFACE_TOKEN': HUGGINGFACE_TOKEN,
    'HUGGINGFACE_DATASET_NAME': HUGGINGFACE_DATASET_NAME,
    'llm': llm,
    'QA_VALIDATION_TOKEN': QA_VALIDATION_TOKEN,
    'save_path': save_path,
    'max_retrieved_docs': 3
}

## this will save the results under the given path as a csv file
## the file will contain the question and the result for each question in the validation dataset (questions generated with RAGas from the new dataset)
## takes about 20-30 mins on T4 GPU
answers = run_config(elastic_vector_search=elastic_vector_search,
                     use_ensemble_retriever=False,
                     verbose=True,
                     config_name='new_dataset_400',
                     save=True,
                     **config_1)


{'name': 'instance-0000000000', 'cluster_name': '3799852da3b9401fb819ca73ae522ae6', 'cluster_uuid': 'okQgd_O8RiqhrBY6O1QGQg', 'version': {'number': '8.12.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '48a287ab9497e852de30327444b0809e55d46466', 'build_date': '2024-02-19T10:04:32.774273190Z', 'build_snapshot': False, 'lucene_version': '9.9.2', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}
Successfully loaded RAG pipeline




Successfully loaded evaluation dataset


generate RAG answers:   0%|          | 0/67 [00:00<?, ?it/s]



[1m> Entering new RetrievalQA chain...[0m


  warn_deprecated(




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" although artificial intelligence ( ai ) has had a profound impact on areas such as image recognition, comparable advances in drug discovery are rare. this article quantifies the stages of drug discovery in which improvements in the time taken, success rate or affordability will have the most profound overall impact on bringing new drugs to market. changes in clinical success rates will have the most profound impact on improving success in drug discovery ; in other words, the quality of decisions regarding which compound to take forward ( and how to conduct clinical trials ) are more important than speed or cost. although current advances in ai focus on how to make a giv

generate RAG answers:   1%|▏         | 1/67 [00:03<04:11,  3.80s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" artificial intelligence ( ai ) is changing healthcare and the practice of medicine as data - driven science and machine - learning technologies, in particular, are contributing to a variety of medical and clinical tasks. such advancements have also raised many questions, especially about public trust. as a response to these concerns there has been a concentrated effort from public bodies, policy - makers and technology companies leading the way in ai to address what is identified as a \ " public trust deficit \ ". this paper argues that a focus 

generate RAG answers:   3%|▎         | 2/67 [00:07<04:04,  3.76s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" an inverse relationship between risk of schizophrenia and premorbid iq is a robust empirical finding. cognitive impairment may be a core feature of schizophrenia in addition to the clinical symptoms that have historically defined the disorder. "

" an inverse relationship between risk of schizophrenia and premorbid iq is a robust empirical finding. cognitive impairment may be a core feature of schizophrenia in addition to the clinical symptoms that have historically defined the disorder. "

" the authors sought to clarify the relationship betwee

generate RAG answers:   4%|▍         | 3/67 [00:08<02:42,  2.53s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the substantial reduction of radiation exposure using ( ultra - ) low dose programs in native computed tomographic imaging has led to considerable changes in imaging diagnostics and treatment planning in urolithiasis in recent years. in addition, especially in germany, ultrasound diagnostics is highly available in terms of equipment and with increasing expertise. this can largely replace the previous radiation - associated procedures in emergency and follow - up diagnostics, but also in intraoperative imaging, e. g., in percutaneous stone therap

generate RAG answers:   6%|▌         | 4/67 [00:10<02:20,  2.24s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" direct pcr can be used to successfully generate full str profiles from dna present on the surface of objects. str profiles are only of use in cases where a potential donor profile is available for comparison, and dna is of sufficient dna quality and quantity to generate a reliable profile. often, no donor information is available and only trace dna is present on items. as a result, alternative techniques are required to generate genetic data that can provide investigative leads. massively parallel sequencing ( mps ) offers the ability to detect 

generate RAG answers:   7%|▋         | 5/67 [00:12<02:13,  2.16s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
facilitate better classification and enable enhanced risk prediction for relevant outcomes. "

facilitate better classification and enable enhanced risk prediction for relevant outcomes. "

facilitate better classification and enable enhanced risk prediction for relevant outcomes. "
Human: How can ML techniques overcome the limitations of conventional risk scores for predicting VAs?[0m


generate RAG answers:   9%|▉         | 6/67 [00:14<02:04,  2.03s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the information generated through drug profiling can be used to infer a common source between one or several seizures as well as drug trafficking routes to provide insights into drug markets. although well established, it is time - consuming and ineffective to compare all drug profiles manually. in recent years, there has been a push to automate processes to enable a more efficient comparison of illicit drug specimens. various chemometric methods have been employed to compare and interpret forensic case data promptly. the intelligence that is pr

generate RAG answers:  10%|█         | 7/67 [00:18<02:55,  2.92s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" we report for the first time the use of experimental electron density ( ed ) in the protein data bank for modeling of noncovalent interactions ( ncis ) for protein - ligand complexes. our methodology is based on reduced electron density gradient ( rdg ) theory describing intermolecular ncis by ed and its first derivative. we established a database named experimental nci database ( exptnci ; http : / / ncidatabase. stonewise. cn / # / nci ) containing ed saddle points, indicating \ u223c200, 000 ncis from over 12, 000 protein - ligand complexes. 

generate RAG answers:  12%|█▏        | 8/67 [00:20<02:18,  2.35s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" this article will give a brief history, review the latest guidelines, discuss risk factors and sources, and discuss screening, diagnosis, and management of lead poisoning in children. additionally, the role of the nurse practitioner ( np ) caring for children will be reviewed. "

" this article will give a brief history, review the latest guidelines, discuss risk factors and sources, and discuss screening, diagnosis, and management of lead poisoning in children. additionally, the role of the nurse practitioner ( np ) caring for children will be 

generate RAG answers:  13%|█▎        | 9/67 [00:23<02:37,  2.71s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" artificial intelligence and algorithms are increasingly able to replace human workers in cognitively sophisticated tasks, including ones related to justice. many governments and international organizations are discussing policies related to the application of algorithmic judges in courts. in this paper, we investigate the public perceptions of algorithmic judges. across two experiments ( n \ u2009 = \ u20091, 822 ), and an internal meta - analysis ( n \ u2009 = \ u20093, 039 ), our results show that even though court users acknowledge several ad

generate RAG answers:  15%|█▍        | 10/67 [00:24<02:08,  2.25s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" in a year when disagreements over scientific matters like covid - 19 continue to occupy political discourse, the surfacing of a spate of high - profile research errors is regrettable. it's crucial that the public trusts science at a time when so many topics - artificial intelligence, climate change, and pandemics - cast shadows of uncertainty on the future. errors, intentional or not, erode confidence in science. it's not surprising that science integrity has become a focal point for major institutions in the united states, from the white house 

generate RAG answers:  16%|█▋        | 11/67 [00:26<01:59,  2.14s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" advances in informatics and information technology are sure to alter the practice of medical imaging and image - guided therapies substantially over the next decade. each element of the imaging continuum will be affected by substantial increases in computing capacity coincident with the seamless integration of digital technology into our society at large. this article focuses primarily on areas where this it transformation is likely to have a profound effect on the practice of radiology. "

" advances in informatics and information technology ar

generate RAG answers:  18%|█▊        | 12/67 [00:30<02:23,  2.61s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" mass gathering ( mg ) events are associated with public health risks. during the period january 14 to march 4, 2019, kumbh mela in prayagraj, india was attended by an estimated 120 million visitors. an onsite disease surveillance was established to identify and respond to disease outbreaks. "

" mass gathering ( mg ) events are associated with public health risks. during the period january 14 to march 4, 2019, kumbh mela in prayagraj, india was attended by an estimated 120 million visitors. an onsite disease surveillance was established to ident

generate RAG answers:  19%|█▉        | 13/67 [00:32<02:05,  2.33s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" create an index of global reach for healthcare hashtags and tweeters therein, filterable by topic of interest. "

" create an index of global reach for healthcare hashtags and tweeters therein, filterable by topic of interest. "

" web - based public reporting by means of dashboards has become an essential tool for governments worldwide to monitor covid - 19 information and communicate it to the public. the actionability of such dashboards is determined by their fitness for purpose - meeting a specific information need - and fitness for use - pl

generate RAG answers:  21%|██        | 14/67 [00:34<02:02,  2.30s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" machine learning models may outperform traditional statistical regression algorithms for predicting clinical outcomes. proper validation of building such models and tuning their underlying algorithms is necessary to avoid over - fitting and poor generalizability, which smaller datasets can be more prone to. in an effort to educate readers interested in artificial intelligence and model - building based on machine - learning algorithms, we outline important details on cross - validation techniques that can enhance the performance and generalizabi

generate RAG answers:  22%|██▏       | 15/67 [00:36<02:05,  2.41s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" artificial intelligence systems for health care, like any other medical device, have the potential to fail. however, specific qualities of artificial intelligence systems, such as the tendency to learn spurious correlates in training data, poor generalisability to new deployment settings, and a paucity of reliable explainability mechanisms, mean they can yield unpredictable errors that might be entirely missed without proactive investigation. we propose a medical algorithmic audit framework that guides the auditor through a process of considerin

generate RAG answers:  24%|██▍       | 16/67 [00:39<02:02,  2.40s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
show efficacy for psychotic symptoms, but adjunctive buspirone may be associated with improvement in extrapyramidal symptoms and cognitive deficits in schizophrenia. due to the preliminary nature of this meta - analysis, larger sample size and higher quality rcts are needed to confirm these finding. "

show efficacy for psychotic symptoms, but adjunctive buspirone may be associated with improvement in extrapyramidal symptoms and cognitive deficits in schizophrenia. due to the preliminary nature of this meta - analysis, larger sample size and highe

generate RAG answers:  25%|██▌       | 17/67 [00:40<01:48,  2.16s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the next step in the evolution of electronic medical record ( emr ) use is the integration of artificial intelligence ( ai ) into health care. with the benefit of roughly 15 years of electronic medical records ( emr ) data from millions of patients, health systems can now leverage this historical information via the assistance of complex mathematical algorithms to formulate computer - based medical decisions. with ai spending in health care forecasted to increase from $ 2. 1 billion currently to $ 36 billion by 2025, < sup > 1 < / sup > we sit o

generate RAG answers:  27%|██▋       | 18/67 [00:44<02:09,  2.65s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" class - prediction accuracy provides a quick but superficial way of determining classifier performance. it does not inform on the reproducibility of the findings or whether the selected or constructed features used are meaningful and specific. furthermore, the class - prediction accuracy oversummarizes and does not inform on how training and learning have been accomplished : two classifiers providing the same performance in one validation can disagree on many future validations. it does not provide explainability in its decision - making process

generate RAG answers:  28%|██▊       | 19/67 [00:46<01:56,  2.42s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" proteins play a crucial role in many biological processes, where their interaction with other proteins are integral. abnormal protein - protein interactions ( ppis ) have been linked to various diseases including cancer, and thus targeting ppis holds promise for drug development. however, experimental confirmation of the peculiarities of ppis is challenging due to their dynamic and transient nature. as a complement to experimental technologies, multiple computational molecular docking ( md ) methods have been developed to predict the structures 

generate RAG answers:  30%|██▉       | 20/67 [00:49<02:06,  2.70s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" several factors are involved in obtaining the competence of providing spiritual care in nursing students. the purpose of this study was to explain the relationship between moral intelligence and the professional self - concept with the competency of the nursing students in providing spiritual care to promote nursing education. "

" several factors are involved in obtaining the competence of providing spiritual care in nursing students. the purpose of this study was to explain the relationship between moral intelligence and the professional self 

generate RAG answers:  31%|███▏      | 21/67 [00:51<01:54,  2.49s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" several factors are involved in obtaining the competence of providing spiritual care in nursing students. the purpose of this study was to explain the relationship between moral intelligence and the professional self - concept with the competency of the nursing students in providing spiritual care to promote nursing education. "

" several factors are involved in obtaining the competence of providing spiritual care in nursing students. the purpose of this study was to explain the relationship between moral intelligence and the professional self 

generate RAG answers:  33%|███▎      | 22/67 [00:53<01:45,  2.34s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the recognition of child physical abuse can be challenging and often requires a multidisciplinary assessment. deep learning models, based on clinical characteristics, laboratory studies, and imaging findings, were developed to facilitate unbiased identification of children who may have been abused. "

" the recognition of child physical abuse can be challenging and often requires a multidisciplinary assessment. deep learning models, based on clinical characteristics, laboratory studies, and imaging findings, were developed to facilitate unbiased

generate RAG answers:  34%|███▍      | 23/67 [00:55<01:35,  2.18s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" improving the rate of polyp detection is an important measure to prevent colorectal cancer ( crc ). real - time automatic polyp detection systems, through deep learning methods, can learn and perform specific endoscopic tasks previously performed by endoscopists. the purpose of this study was to explore whether a high - performance, real - time automatic polyp detection system could improve the polyp detection rate ( pdr ) in the actual clinical environment. "

" improving the rate of polyp detection is an important measure to prevent colorectal

generate RAG answers:  36%|███▌      | 24/67 [00:57<01:34,  2.19s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" computer - aided detection ( cade ) helps increase colonoscopic polyp detection. however, little is known about other performance metrics like the number and duration of false - positive ( fp ) activations or how stable the detection of a polyp is. "

" computer - aided detection ( cade ) helps increase colonoscopic polyp detection. however, little is known about other performance metrics like the number and duration of false - positive ( fp ) activations or how stable the detection of a polyp is. "

" improving the rate of polyp detection is an

generate RAG answers:  37%|███▋      | 25/67 [01:00<01:30,  2.16s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" cannabis is a commonly used recreational drug in young adults. the worldwide prevalence in 18 - to 25 - year - olds is approximately 35 %. significant differences in cognitive performance have been reported previously for groups of cannabis users. however, the groups are often heterogeneous in terms of cannabis use. here, we study daily cannabis users with a confirmed diagnosis of cannabis use disorder ( cud ) to examine cognitive performance on measures of memory, executive function and risky decision - making. "

" cannabis is a commonly used 

generate RAG answers:  39%|███▉      | 26/67 [02:07<14:50, 21.71s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" using a convenience sample of adolescents ( < i > n < / i > \ u2009 = \ u20091609 ; 63. 5 % female ; < i > m < / i > < sub > < i > age < / i > < / sub > \ u2009 = \ u200916. 54 ), this study explored whether ei predicted adolescent life satisfaction and whether friendship quality and optimism mediated this relationship during the covid - 19 pandemic. the structural equation modeling revealed that ei predicted adolescent life satisfaction, friendship quality, and optimism, friendship quality partially mediated the relationship between ei and life

generate RAG answers:  40%|████      | 27/67 [02:09<10:33, 15.83s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the purpose of this study was to develop a deep learning model to diagnose breast cancer by embedding a diagnostic algorithm that examines the asymmetry of bilateral breast tissue. this retrospective study was approved by the institutional review board. a total of 115 patients who underwent breast surgery and had pathologically confirmed breast cancer were enrolled in this study. two image pairs [ 230 pairs of bilateral breast digital breast tomosynthesis ( dbt ) images with 115 malignant tumors and contralateral tissue ( m / n ), and 115 bilate

generate RAG answers:  42%|████▏     | 28/67 [02:10<07:29, 11.53s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the impaired regulation of emotional responses has significant social consequences for patients with attention deficit hyperactivity disorder ( adhd ) and can be thought to increase the risk for technological addictions. "

" the impaired regulation of emotional responses has significant social consequences for patients with attention deficit hyperactivity disorder ( adhd ) and can be thought to increase the risk for technological addictions. "

" this study addresses if deficits in cognitive, attention, and inhibitory control performance in adu

generate RAG answers:  43%|████▎     | 29/67 [02:12<05:23,  8.51s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
held an accurate test of 99. 73 % for binary classification with the ( normal / covid - 19 ) scenario. comparing our results with related studies shows that our proposed models were superior to the previous cnn machine learning models in terms of various performance metrics. our pre - trained model associated with the ct dataset achieved 100 % of the f1score and the loss value was approximately 0. 00268. "

held an accurate test of 99. 73 % for binary classification with the ( normal / covid - 19 ) scenario. comparing our results with related stud

generate RAG answers:  45%|████▍     | 30/67 [02:13<03:51,  6.26s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" to operationalise fairness in the adoption of medical artificial intelligence ( ai ) algorithms in terms of access to computational resources, the proposed approach is based on a two - dimensional ( 2d ) convolutional neural networks ( cnn ), which provides a faster, cheaper and accurate - enough detection of early alzheimer's disease ( ad ) and mild cognitive impairment ( mci ), without the need for use of large training data sets or costly high - performance computing ( hpc ) infrastructures. "

" to operationalise fairness in the adoption of 

generate RAG answers:  46%|████▋     | 31/67 [02:16<03:07,  5.21s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" artificial intelligence ( ai ) is rapidly fuelling a fundamental transformation in the practice of pathology. however, clinical integration remains challenging, with no ai algorithms to date in routine adoption within typical anatomic pathology ( ap ) laboratories. this survey gathered current expert perspectives and expectations regarding the role of ai in ap from those with first - hand computational pathology and ai experience. "

" artificial intelligence ( ai ) is rapidly fuelling a fundamental transformation in the practice of pathology. h

generate RAG answers:  48%|████▊     | 32/67 [02:18<02:33,  4.39s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" in order to meet people's demand for intelligent management of daily life and health, manufacturing and developing humidity monitoring equipment with convenience, high sensitivity, easy miniaturization, and low cost is particularly important in the era of rapid development of artificial intelligence and the internet of things. polyaniline ( pani ) is an attractive humidity sensing material due to its designable functional properties. however, pani modified polyoxometalates ( poms ) for humidity sensing are still rare. as a proof of concept, a no

generate RAG answers:  49%|████▉     | 33/67 [02:21<02:17,  4.04s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" this article was migrated. the article was marked as recommended. machine learning approaches form the basis of \ " artificial intelligence \ " and have been increasingly applied in health services settings. it has been shown that such approaches may produce more accurate predictions in some contexts, compared to conventional statistical approaches, and may also reduce the costs of decision - making through automation. \ u00a0 nevertheless, there are both general limitations to developing and implementing machine learning approaches that must be

generate RAG answers:  51%|█████     | 34/67 [02:24<02:02,  3.70s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the rapid progress of the combination of medicine and engineering provides better chances for the clinical treatment and healthcare engineering. traumatic brain injury ( tbi ) and its related symptoms have become a major global health problem. at present, these techniques has been widely used in the rehabilitation of tbi. in this review article, we summarizes the progress of the combination of medicine and industry in the rehabilitation of traumatic brain injury in recent years, mainly from the following aspects : artificial intelligence ( ai ),

generate RAG answers:  52%|█████▏    | 35/67 [02:26<01:43,  3.24s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" to determine how to optimize the delivery of machine learning techniques in a clinical setting to detect intracranial hemorrhage ( ich ) on non - contrast - enhanced ct images to radiologists to improve workflow. "

" to determine how to optimize the delivery of machine learning techniques in a clinical setting to detect intracranial hemorrhage ( ich ) on non - contrast - enhanced ct images to radiologists to improve workflow. "

" to determine how to optimize the delivery of machine learning techniques in a clinical setting to detect intracrani

generate RAG answers:  54%|█████▎    | 36/67 [02:29<01:34,  3.03s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the question of whether some non - human animal species are more intelligent than others is a reoccurring theme in comparative psychology. to convincingly address this question, exact comparability of behavioral methodology and data across species is required. the current article explores one of the rare cases in which three vertebrate species ( humans, macaques, and crows ) experienced identical experimental conditions during the investigation of a core cognitive capability - the abstract categorization of absolute numerical quantity. we found 

generate RAG answers:  55%|█████▌    | 37/67 [02:31<01:23,  2.80s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
sequencing technology can significantly increase the molecular genetic etiologic diagnosis rate of the patients with unexplained id or dd. combined use of these technologies can serve as a useful examinational method in assisting differential diagnosis of children with unexplained id or dd. "

sequencing technology can significantly increase the molecular genetic etiologic diagnosis rate of the patients with unexplained id or dd. combined use of these technologies can serve as a useful examinational method in assisting differential diagnosis of ch

generate RAG answers:  57%|█████▋    | 38/67 [02:33<01:11,  2.47s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" here we ask : what is productive signaling? how to define it, how to measure it, and most of all, what are the parameters that determine it? further, what determines the strength of signaling from an upstream to a downstream node in a specific cell? these questions have either not been considered or not entirely resolved. the requirements for the signal to propagate downstream to activate ( repress ) transcription have not been considered either. yet, the questions are pivotal to clarify, especially in diseases such as cancer where determination

generate RAG answers:  58%|█████▊    | 39/67 [02:36<01:10,  2.51s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" cancer therapeutics cause various treatment - related changes that may impact patient follow - up and disease monitoring. although atypical responses such as pseudoprogression may be misinterpreted as treatment nonresponse, other changes, such as hyperprogressive disease seen with immunotherapy, must be recognized early for timely management. radiation necrosis in the brain is a known response to radiotherapy and must be distinguished from local tumor recurrence. radiotherapy can also cause adverse effects such as pneumonitis and local tissue to

generate RAG answers:  60%|█████▉    | 40/67 [02:39<01:12,  2.68s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" several studies have linked codeletion of chromosome arms 1p / 19q in low - grade gliomas ( lgg ) with positive response to treatment and longer progression - free survival. hence, predicting 1p / 19q status is crucial for effective treatment planning of lgg. in this study, we predict the 1p / 19q status from mr images using convolutional neural networks ( cnn ), which could be a non - invasive alternative to surgical biopsy and histopathological analysis. our method consists of three main steps : image registration, tumor segmentation, and clas

generate RAG answers:  61%|██████    | 41/67 [02:41<01:07,  2.60s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" information has the power to protect against unexpected events and control any crisis such as the covid - 19 pandemic. since covid - 19 has already rapidly spread all over the world, only technology - driven data management can provide accurate information to manage the crisis. this study aims to explore the potential of big data technologies for controlling covid - 19 transmission and managing it effectively. "

" information has the power to protect against unexpected events and control any crisis such as the covid - 19 pandemic. since covid -

generate RAG answers:  63%|██████▎   | 42/67 [02:46<01:22,  3.28s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" demonstrated is the code division multiple access ( cdma ) - mode coded access optical sensor ( caos ) smart camera suited for bright target scenarios. deploying a silicon cmos sensor and a silicon point detector within a digital micro - mirror device ( dmd ) - based spatially isolating hybrid camera design, this smart imager first engages the dmd starring mode with a controlled factor of 200 high optical attenuation of the scene irradiance to provide a classic unsaturated cmos sensor - based image for target intelligence gathering. next, this c

generate RAG answers:  64%|██████▍   | 43/67 [02:47<01:05,  2.75s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" through a comprehensive theoretical basis and actual test analysis of the application system design and functional efficiency of the cloud platform, this paper puts forward an artificial intelligence environmental data monitoring and wetland environmental simulation method based on gis remote sensing images. first, the basic storage and computing functions have been enhanced at the physical layer. second, the middleware layer is more flexible in the use of management methods and strategies. there are many strategies and methods that can be used 

generate RAG answers:  66%|██████▌   | 44/67 [02:51<01:10,  3.06s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" prediction and early detection of kidney damage induced by nonsteroidal anti - inflammatories ( nsaids ) would provide the best chances of maximizing the anti - inflammatory effects while minimizing the risk of kidney damage. unfortunately, biomarkers for detecting nsaid - induced kidney damage in cats remain to be discovered. to identify potential urinary biomarkers for monitoring nsaid - based treatments, we applied an untargeted metabolomics approach to urine collected from cats treated repeatedly with meloxicam or saline for up to 17 days. a

generate RAG answers:  67%|██████▋   | 45/67 [02:53<00:58,  2.64s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" inactive or old, healed tuberculosis ( tb ) on chest radiograph ( cr ) is often found in high tb incidence countries, and to avoid unnecessary evaluation and medication, differentiation from active tb is important. this study develops a deep learning ( dl ) model to estimate activity in a single chest radiographic analysis. "

" inactive or old, healed tuberculosis ( tb ) on chest radiograph ( cr ) is often found in high tb incidence countries, and to avoid unnecessary evaluation and medication, differentiation from active tb is important. this 

generate RAG answers:  69%|██████▊   | 46/67 [02:54<00:45,  2.16s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" open - source artificial intelligence models are finding free application in various industries, including computer science and medicine. their clinical potential, especially in assisting diagnosis and therapy, is the subject of increasingly intensive research. due to the growing interest in ai for diagnostics, we conducted a study evaluating the abilities of ai models, including chatgpt, microsoft bing, and scholar ai, in classifying single - curve scoliosis based on radiological descriptions. fifty - six posturographic images depicting single 

generate RAG answers:  70%|███████   | 47/67 [02:56<00:41,  2.07s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
- the - art classification models. the proposed framework records an accuracy of 99. 98 % ( four classes ) and 100 % ( five classes ). "

- the - art classification models. the proposed framework records an accuracy of 99. 98 % ( four classes ) and 100 % ( five classes ). "

" class - prediction accuracy provides a quick but superficial way of determining classifier performance. it does not inform on the reproducibility of the findings or whether the selected or constructed features used are meaningful and specific. furthermore, the class - predic

generate RAG answers:  72%|███████▏  | 48/67 [02:57<00:35,  1.89s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the covid - 19 pandemic prompted the scientific community to share timely evidence, also in the form of pre - printed papers, not peer reviewed yet. "

" the covid - 19 pandemic prompted the scientific community to share timely evidence, also in the form of pre - printed papers, not peer reviewed yet. "

" the covid - 19 pandemic prompted the scientific community to share timely evidence, also in the form of pre - printed papers, not peer reviewed yet. "
Human: What kinds of papers were shared by the scientific community during the COVID-19 pand

generate RAG answers:  73%|███████▎  | 49/67 [03:00<00:40,  2.23s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" to provide an overview of the role of cardiovascular ( cv ) imaging in facilitating and advancing the field of precision medicine in cv disease. "

" to provide an overview of the role of cardiovascular ( cv ) imaging in facilitating and advancing the field of precision medicine in cv disease. "

" recently, medical radiology has undergone significant improvements in patient management due to advancements in image acquisition by the last generation of machines, data processing, and the integration of artificial intelligence. in this way, cardiov

generate RAG answers:  75%|███████▍  | 50/67 [03:03<00:38,  2.24s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" we present an overview of current clinical musculoskeletal imaging applications for artificial intelligence, as well as potential future applications and techniques. "

" we present an overview of current clinical musculoskeletal imaging applications for artificial intelligence, as well as potential future applications and techniques. "

" despite the rapid growth of interest and diversity in applications of artificial intelligence ( ai ) to biomedical research, there are limited objective ways to characterize the potential for use of ai in clin

generate RAG answers:  76%|███████▌  | 51/67 [03:04<00:32,  2.03s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" given the prevalence of depression worldwide and its major impact on society, several studies employed artificial intelligence modelling to automatically detect and assess depression. however, interpretation of these models and cues are rarely discussed in detail in the ai community, but have received increased attention lately. in this study, we aim to analyse the commonly selected features using a proposed framework of several feature selection methods and their effect on the classification results, which will provide an interpretation of the 

generate RAG answers:  78%|███████▊  | 52/67 [03:06<00:30,  2.03s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" to assess whether previously observed brain and cognitive differences between children with type 1 diabetes and control subjects without diabetes persist, worsen, or improve as children grow into puberty and whether differences are associated with hyperglycemia. "

" to assess whether previously observed brain and cognitive differences between children with type 1 diabetes and control subjects without diabetes persist, worsen, or improve as children grow into puberty and whether differences are associated with hyperglycemia. "

" to determine if

generate RAG answers:  79%|███████▉  | 53/67 [03:08<00:25,  1.84s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" some encouraging uses for ai in medicine will lead to potentially novel legal liability issues. complex algorithms involve an opacity that creates problems for the medical and legal professions alike. as iatrogenic injury is common in medical malpractice, the medical profession is understandably concerned when ai is introduced in diagnostic and therapeutic devices and events and outcome cannot be fully explained due to the \ " black box \ " effect. a concern about machine learning algorithms is the black box issue and understanding how conclusio

generate RAG answers:  81%|████████  | 54/67 [03:09<00:23,  1.84s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
markers. thirteen studies explored predicting covid - 19 severity, while the rest predicted diagnosis. seventy percent of the articles used deep learning models, while 30 % used traditional ml algorithms. most studies reported high sensitivity, specificity, and accuracy for the ml models ( exceeding 90 % ). the overall concern about the risk of bias was \ " unclear \ " in 56 % of the studies. this was mainly due to concerns about selection bias. ml may help identify covid - 19 patients in the early phase of the pandemic, particularly in the contex

generate RAG answers:  82%|████████▏ | 55/67 [03:11<00:19,  1.65s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" artificial intelligence ( ai ) has the potential to assist in endoscopy and improve decision making, particularly in situations where humans may make inconsistent judgments. the performance assessment of the medical devices operating in this context is a complex combination of bench tests, randomized controlled trials, and studies on the interaction between physicians and ai. we review the scientific evidence published about gi genius, the first ai - powered medical device for colonoscopy to enter the market, and the device that is most widely t

generate RAG answers:  84%|████████▎ | 56/67 [03:13<00:20,  1.87s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" to compare the artificial intelligence algorithms as powerful machine learning methods for evaluating patients with suspected sepsis using data from routinely available blood tests performed on arrival at the hospital. results were compared with those obtained from the classical logistic regression method. "

" to compare the artificial intelligence algorithms as powerful machine learning methods for evaluating patients with suspected sepsis using data from routinely available blood tests performed on arrival at the hospital. results were compar

generate RAG answers:  85%|████████▌ | 57/67 [03:14<00:16,  1.67s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" to review publications in the field of human factors and organisational issues ( hf & oi ) in the year 2022 and to assess major contributions to the subject. "

" to review publications in the field of human factors and organisational issues ( hf & oi ) in the year 2022 and to assess major contributions to the subject. "

" the work system is defined by its components, their roles and the relationships between them. any work system gravitates around the human resource and the interdependencies between human factor and the other components of it.

generate RAG answers:  87%|████████▋ | 58/67 [03:16<00:16,  1.84s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the quality of decision and assessment of risk are key determinants of successful sport performance. athletes differ fundamentally in their decision - making ability according to their athletic expertise level. moreover, given the influence of emotions on decision - making, it is likely that a trait reflecting emotional functioning, trait emotional intelligence, may also influence decision - making. therefore, the aim of this research was to investigate the respective contribution of athletic expertise and trait emotional intelligence to non - a

generate RAG answers:  88%|████████▊ | 59/67 [03:19<00:16,  2.12s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" there is an avalanche of biomedical data generation and a parallel expansion in computational capabilities to analyze and make sense of these data. starting with genome sequencing and widely employed deep sequencing technologies, these trends have now taken hold in all omics disciplines and increasingly call for multi - omics integration as well as data interpretation by artificial intelligence technologies. here, we focus on mass spectrometry ( ms ) - based proteomics and describe how machine learning and, in particular, deep learning now predi

generate RAG answers:  90%|████████▉ | 60/67 [03:24<00:20,  2.87s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the diagnosis, evaluation, and treatment planning of pancreatic pathologies usually require the combined use of different imaging modalities, mainly, computed tomography ( ct ), magnetic resonance imaging ( mri ), and positron emission tomography ( pet ). artificial intelligence ( ai ) has the potential to transform the clinical practice of medical imaging and has been applied to various radiological techniques for different purposes, such as segmentation, lesion detection, characterization, risk stratification, or prediction of response to trea

generate RAG answers:  91%|█████████ | 61/67 [03:26<00:16,  2.70s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" pancreatic cancer is one of the most lethal neoplasms among common cancers worldwide, and pcls are well - known precursors of this type of cancer. artificial intelligence ( ai ) could help to improve and speed up the detection and classification of pancreatic lesions. the aim of this review is to summarize the articles addressing the diagnostic yield of artificial intelligence applied to medical imaging ( computed tomography [ ct ] and / or magnetic resonance [ mr ] ) for \ u00a0the \ u00a0detection of pancreatic cancer and pancreatic cystic les

generate RAG answers:  93%|█████████▎| 62/67 [03:28<00:12,  2.49s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
current capabilities of ai and proposals for how these capabilities should be leveraged in the clinical realm will be reviewed, while the questions that need to be answered before this vision becomes a reality are posed. "

current capabilities of ai and proposals for how these capabilities should be leveraged in the clinical realm will be reviewed, while the questions that need to be answered before this vision becomes a reality are posed. "

current capabilities of ai and proposals for how these capabilities should be leveraged in the clinical r

generate RAG answers:  94%|█████████▍| 63/67 [03:30<00:08,  2.21s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" this study aimed to explore whether spiritual intelligence could help midwives cope with job - related stress. a cross - sectional study was done with 143 midwives in babol, iran. the sampling method was non - random and convenience samples were used. amram and dreyer's spiritual intelligence and health and safety executive occupational stress questionnaires were used. the response rate of subjects was 90. 51 %. results showed that the most predictors of job stress were total spiritual intelligence ( \ u03b2 \ u2009 = \ u20090. 507, p \ u2009 = 

generate RAG answers:  96%|█████████▌| 64/67 [03:31<00:06,  2.03s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the interrupted time - series ( its ) concept is performed using linear regression to evaluate the impact of policy changes in public health at a specific time. objectives of this study were to verify, with an artificial intelligence - based nonlinear approach, if the estimation of its data could be facilitated, in addition to providing a computationally explicit equation. "

" an important task in developing accurate public health intervention evaluation methods based on historical interrupted time series ( its ) records is to determine the exa

generate RAG answers:  97%|█████████▋| 65/67 [03:33<00:03,  1.88s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" precision medicine development is driven by the possibilities of next generation sequencing, information technology and artificial intelligence and thus, raises a number of ethical questions. empirical studies have investigated such issues from the perspectives of health care professionals, researchers and patients. we synthesize the results from these studies in this review. "

" precision medicine development is driven by the possibilities of next generation sequencing, information technology and artificial intelligence and thus, raises a numb

generate RAG answers:  99%|█████████▊| 66/67 [03:35<00:01,  1.97s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
" the objective was to identify barriers and facilitators to the implementation of artificial intelligence ( ai ) applications in clinical radiology in the netherlands. "

" the objective was to identify barriers and facilitators to the implementation of artificial intelligence ( ai ) applications in clinical radiology in the netherlands. "

" artificial intelligence has the potential to disrupt the way clinical radiology is practiced globally. however, there are barriers that radiologists should be aware of prior to implementing artificial intell

generate RAG answers: 100%|██████████| 67/67 [03:36<00:00,  3.24s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`


Evaluating:   0%|          | 0/268 [00:00<?, ?it/s]

Exception in thread Thread-8:
Traceback (most recent call last):
  File "c:\Users\priot\anaconda3\envs\nlp\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
  File "c:\Users\priot\anaconda3\envs\nlp\lib\site-packages\ragas\executor.py", line 75, in run
    results = self.loop.run_until_complete(self._aresults())
  File "c:\Users\priot\anaconda3\envs\nlp\lib\asyncio\base_events.py", line 616, in run_until_complete
    return future.result()
  File "c:\Users\priot\anaconda3\envs\nlp\lib\site-packages\ragas\executor.py", line 63, in _aresults
    raise e
  File "c:\Users\priot\anaconda3\envs\nlp\lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
  File "c:\Users\priot\anaconda3\envs\nlp\lib\asyncio\tasks.py", line 619, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "c:\Users\priot\anaconda3\envs\nlp\lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwar

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead.

In [3]:

config_2 = {
    'QA_VALIDATION_TOKEN': QA_VALIDATION_TOKEN,
    'QA_VALIDATION_DATASET': QA_VALIDATION_DATASET,
    'save_path': save_path,
    'save_path_result': '../data/validation_400_gpt_3-5-turbo.csv' 
}

## this is a Dataset on which the RAGAs metrics can be applied
result_dataset = testset_to_validation(save=True,**config_2)

## get ragas metrics
resulted_metrics = evaluate(
    result_dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)

pprint(resulted_metrics)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`


Evaluating:   0%|          | 0/268 [00:00<?, ?it/s]

{'answer_relevancy': 0.8942793135767543,
 'context_precision': 0.9253731342358208,
 'context_recall': 0.9253731343283582,
 'faithfulness': 0.825542328042328}


In [9]:
resulted_metrics['answer_relevancy']

0.8942793135767543

In [15]:
from langchain_community.document_loaders import HuggingFaceDatasetLoader

loader = HuggingFaceDatasetLoader("MaraEliana/pubmed-abstracts",use_auth_token="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",page_content_column='abstract')
data = loader.load()



Generating train split:   0%|          | 0/69698 [00:00<?, ? examples/s]

In [None]:
# query = "What is the role of artificial intelligence in nephrology?"
# results = elastic_vector_search.similarity_search(query,k=50)


# titles_elastic = [res.metadata["Title"] for res in results]
# for res in results:
#     print(res.metadata['Title'])

### For Mara

Using Ollama generate a bigger validation dataset of 5000 items (make TEST_SET_SIZE 5000).
Change ChatOpenAI with the llama model (line 15/16)
Save the resulting csv locally and send it to me so I can upload it to huggingface.

In [None]:
TEST_SET_SIZE = 100

In [None]:
# %pip install ragas 
from ragas.testset import TestsetGenerator
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from ragas.llms import LangchainLLM
import random
#https://docs.ragas.io/en/latest/howtos/customisations/llms.html

sub_data = random.sample(data, TEST_SET_SIZE)

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
print(OPENAI_API_KEY)

# Add custom llms and embeddings
generator_llm = LangchainLLM(llm=ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY))
critic_llm = LangchainLLM(llm=ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY)) ## should be gpt-4 but we dont have access
embeddings_model = embeddings

# Change resulting question type distribution
testset_distribution = {
    "simple": 0.25,
    "reasoning": 0.25,
    "multi_context": 0.25,
    "conditional": 0.25,
}

# percentage of conversational question
chat_qa = 0.1


test_generator = TestsetGenerator(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings_model=embeddings_model,
    testset_distribution=testset_distribution,
    chat_qa=chat_qa,
)

testset = test_generator.generate(sub_data, test_size=TEST_SET_SIZE) ## why second parameter is 5?

In [None]:
test_df = testset.to_pandas()
test_df


In [None]:
# test_df.to_csv('testset.csv',index=False)

# index = 4

In [None]:
# from pprint import pprint

# pprint(test_df.iloc[index]['question'])

# ## this is the answer
# pprint(test_df.iloc[index]['ground_truth'])


In [None]:
# ## relevant contexts split by \n
# pprint(test_df.iloc[index]['ground_truth_context'][0])