In [0]:
%pylab inline

In [0]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd

In [0]:
# Example: load a DSS dataset as a Pandas dataframe
mydataset = dataiku.Dataset("mydataset")
mydataset_df = mydataset.get_dataframe()

In [8]:
import dataiku
import pandas as pd
from IliadEmbeddingWrapper import DSSLLMEmbeddingWrapper
from ragas import evaluate
from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness
from ragas.dataset_schema import EvaluationDataset
from ragas.testset import TestsetGenerator
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from langchain.schema import Document as LangChainDocument

class DataikuInitializer:
    """Initializes Dataiku client, project and provides helper methods to load models and datasets."""
    def __init__(self):
        self.client = dataiku.api_client()
        self.project = self.client.get_default_project()
        
    def get_langchain_llm(self, llm_id):
        llm_model = self.project.get_llm(llm_id)
        return llm_model.as_langchain_llm()
    
    def get_custom_embeddings(self, embedding_model_id):
        emb_model = self.project.get_llm(embedding_model_id)
        return DSSLLMEmbeddingWrapper(emb_model)
    
    def get_dataset(self, dataset_name):
        return dataiku.Dataset(dataset_name)

class DocumentCreator:
    """Loads the intake forms dataset, samples rows, and converts them to LangChain documents."""
    def __init__(self, dataset_name):
        self.dataset_name = dataset_name
        
    def load_and_sample_documents(self, sample_size=100):
        intake_forms = dataiku.Dataset(self.dataset_name)
        df = intake_forms.get_dataframe()
        df_sample = df.sample(n=sample_size)
        return df_sample
    
    def create_langchain_documents(self, df):
        # Filter out rows with empty or very short text
        df = df[df['chunk_text'].str.len() > 50]
        documents = [
            LangChainDocument(
                page_content=row["chunk_text"],
                metadata={"id": str(index), "metadata": row["metadata"]}
            )
            for index, row in df.iterrows()
        ]
        return documents, df

class TestsetGeneratorWrapper:
    """Wraps the testset generation using the provided LLM and embeddings."""
    def __init__(self, llm, embeddings):
        self.generator_embeddings = LangchainEmbeddingsWrapper(embeddings)
        self.generator_llm = LangchainLLMWrapper(llm)
        self.generator = TestsetGenerator(llm=self.generator_llm, embedding_model=self.generator_embeddings)
        
    #def generate_testset(self, documents, testset_size=30):
    #    try:
    #        return self.generator.generate_with_langchain_docs(documents, testset_size=testset_size)
    #    except ValueError as e:
    #        print("Error during testset generation: ", e)
    #        raise
    def generate_testset(self, documents, testset_size=10):
        try:
            if len(documents) < testset_size:
                testset_size = len(documents)
            return self.generator.generate_with_langchain_docs(documents, testset_size=testset_size)
        except ValueError as e:
            print("Error details: ", e)
            # Fallback strategy: return an empty dataset or handle differently
            return EvaluationDataset.from_list([])

class RAGResponseGenerator:
    """Generates responses using the RAG pipeline based on the provided LLM."""
    def __init__(self, llm):
        self.llm = llm
        
    def generate_response(self, question):
        # Wrap the question in a payload to include the required "input" field.
#         payload = {"input": question}
        return self.llm.invoke(question)
    
    def apply_responses_to_dataframe(self, df):
        # Check if expected 'user_input' column exists, otherwise warn.
        if "user_input" not in df.columns:
            print("Warning: 'user_input' column not found. Using 'chunk_text' as fallback for responses.")
            df["user_input"] = df.get("chunk_text", "")
        df["response"] = df["user_input"].apply(lambda q: self.generate_response(q))
        # Optionally, copy the reference contexts to a new column if needed.
        df["retrieved_contexts"] = df.get("reference_contexts", "")
        return df

class EvaluationPipeline:
    """Runs the evaluation on the dataset and returns the evaluation results as a pandas DataFrame."""
    def __init__(self, llm, embeddings, metrics=None):
        self.llm = llm
        self.embeddings = embeddings
        if metrics is None:
            self.metrics = [answer_relevancy, context_precision, faithfulness, context_recall]
        else:
            self.metrics = metrics
        
    def evaluate(self, dataset):
        evaluation_df = evaluate(
            dataset=dataset,
            llm=self.llm,
            embeddings=self.embeddings,
            metrics=self.metrics
        )
        return evaluation_df.to_pandas()


In [0]:
def _evaluate(LLM_ID, EMBEDDING_ID, input_dataset, output_dataset):
    # Initialize Dataiku and retrieve models
    initializer = DataikuInitializer()
    # LLM_ID = "custom:iliad-plugin-conn-prod:gpt-4o"
    # EMBEDDING_ID = "custom:iliad-plugin-conn-prod:roughly-jazzy-mermaid"
    
    langchain_llm = initializer.get_langchain_llm(LLM_ID)
    custom_embeddings = initializer.get_custom_embeddings(EMBEDDING_ID)
    
    # Load and prepare documents
    doc_creator = DocumentCreator(input_dataset)
    df_sample = doc_creator.load_and_sample_documents(sample_size=100)
    documents, df_out = doc_creator.create_langchain_documents(df_sample)
    
    # Generate testset from documents
    testset_generator = TestsetGeneratorWrapper(langchain_llm, custom_embeddings)
    generated_dataset = testset_generator.generate_testset(documents, testset_size=10)
    df_generated = generated_dataset.to_pandas()
    print("Generated Testset:")
    print(df_generated)
    
    # Generate responses for each user input in the generated dataset
    response_generator = RAGResponseGenerator(langchain_llm)
    df_with_responses = response_generator.apply_responses_to_dataframe(df_generated)
    print("DataFrame with Responses:")
    print(df_with_responses.head())
    
    # Convert to EvaluationDataset for evaluation
    data_records = df_with_responses.to_dict("records")
    ragas_dataset = EvaluationDataset.from_list(data_records)
    
    # Evaluate the results
    evaluation_pipeline = EvaluationPipeline(langchain_llm, custom_embeddings)
    evaluation_results = evaluation_pipeline.evaluate(ragas_dataset)
    
    # Write evaluation results back to a Dataiku dataset
    ragas_evaluation = initializer.get_dataset(output_dataset)
    ragas_evaluation.write_with_schema(evaluation_results)

# ragas on chunk data

In [2]:
EMBEDDING_ID = "custom:iliad-plugin-conn-prod:text-embedding-ada-002"
LLM_ID = "custom:iliad-plugin-conn-prod:gpt-4o"
input_dataset = "input_data_chunked"
output_dataset = "input_data_response_evaluated"



In [0]:
# # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# import dataiku
# import pandas as pd
# import math
# from dataiku import pandasutils as pdu
# from IliadEmbeddingWrapper import DSSLLMEmbeddingWrapper
# from ragas import evaluate
# from ragas.metrics import (
#             answer_relevancy,
#             context_precision,
#             context_recall,
#             faithfulness,
#         )
# from ragas.dataset_schema import EvaluationDataset


# # Creating langchain_llm
# client = dataiku.api_client()
# project = client.get_default_project()
# LLM_ID = LLM_MODEL
# llm_model = project.get_llm(LLM_ID)
# langchain_llm = llm_model.as_langchain_llm()

# # Creating custom embeddings
# embedding_model_id = EMBEDDING_MODEL
# emb_model = project.get_llm(embedding_model_id)
# custom_embeddings = DSSLLMEmbeddingWrapper(emb_model)



# from ragas.testset import TestsetGenerator
# from ragas.embeddings import LangchainEmbeddingsWrapper
# from ragas.llms import LangchainLLMWrapper
# from langchain.schema import Document as LangChainDocument

# generator_embeddings = LangchainEmbeddingsWrapper(custom_embeddings)
# generator_llm = LangchainLLMWrapper(langchain_llm)

# intake_forms = dataiku.Dataset("input_data_chunked")
# intake_forms_df = intake_forms.get_dataframe()
# intake_forms_df = intake_forms_df

# documents = [
#                 LangChainDocument(
#                     page_content=row["chunk_text"],
#                     metadata={"id": str(index), "metadata": row["metadata"]}
#                 )
#                 for index, row in intake_forms_df.iterrows()
#             ]



# generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)

# dataset = generator.generate_with_langchain_docs(documents, testset_size=30)
# df_out = dataset.to_pandas()

# print(df_out)

# import pandas as pd

# # Assuming you have a function that takes a question and returns a response from your RAG pipeline:
# def generate_response(llm, question):
#     return llm.invoke(question)

# # Generate the response for each user_input and add it as a new column.
# df_out["response"] = df_out["user_input"].apply(lambda q: generate_response(langchain_llm, q))

# # Now your DataFrame includes a "response" column.
# print(df_out.head())
# df_out["retrieved_contexts"] = df_out["reference_contexts"]

# # Convert DataFrame to list of dictionaries and create an EvaluationDataset
# data_records = df_out.to_dict("records")
# ragas_dataset = EvaluationDataset.from_list(data_records)
# # evaluation_dataset = dataset.to_evaluation_dataset()

# # Run the evaluation with your custom embeddings, llm and column mapping
# evaluation_df = evaluate(
#     dataset=ragas_dataset,
#     llm=langchain_llm,
#     embeddings=custom_embeddings,
#     metrics = [answer_relevancy, context_precision, faithfulness, context_recall]
    
# )

# # Convert evaluation results to a pandas DataFrame and write the output
# ragas_evaluation_df = evaluation_df.to_pandas()
# ragas_evaluation = dataiku.Dataset("ragas_evaluation")
# ragas_evaluation.write_with_schema(ragas_evaluation_df)

In [3]:
# Initialize Dataiku and retrieve models
initializer = DataikuInitializer()

langchain_llm = initializer.get_langchain_llm(LLM_ID)
custom_embeddings = initializer.get_custom_embeddings(EMBEDDING_ID)

/opt/dataiku/python/dataikuapi/dss/langchain/llm.py:138: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.8/migration/
  extra = pydantic.Extra.forbid
/opt/dataiku/python/dataikuapi/dss/langchain/llm.py:302: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.8/migration/
  extra = pydantic.Extra.forbid
/opt/dataiku/python/dataikuapi/dss/langchain/embeddings.py:24: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.8/migration/
  extra = pydantic.Ex

In [4]:
# Load and prepare documents
doc_creator = DocumentCreator(input_dataset)
df_sample = doc_creator.load_and_sample_documents(sample_size=20)
documents, df_out = doc_creator.create_langchain_documents(df_sample)

In [5]:
documents

[Document(metadata={'id': '1183', 'metadata': '{"file_name": "2024 RINVOQ AD Patient Engagement Map11.21.24.xlsx", "image_links": "", "chunk_id": "2024 RINVOQ AD Patient Engagement Map11.21.24.xlsx_chunk_86", "chunk_order": 86}'}, page_content=".800.274.6867 or use this Coverage Checklist: https://abbv.ie/b6cb82.  Msg&Data rates may apply.  Text HELP for help, STOP to opt out.', 'Destination URL 1': 'https://abbv.ie/rinvoq-pi', 'Destination URL 2': 'https://abbv.ie/b6cb82', 'Destination URL 3': nan, 'Char Count': nan, 'Included?': nan}, {'SMS': 'Double Opt-In (DOI)/Confirmation SMS', 'Overview': 'First text message sent after a patient opts in to receive Treatment Support Messages with a request to confirm the text opt-in (text YES) and the options for more information (text HELP) or to stop receiving texts (text STOP)', 'Message Audience': nan, 'Trigger': nan, 'Content': 'To receive Complete Treatment Support messages, reply YES to confirm.  Msg. freq varies.  Msg&Data rates may apply

In [6]:
# Generate testset from documents
testset_generator = TestsetGeneratorWrapper(langchain_llm, custom_embeddings)
generated_dataset = testset_generator.generate_testset(documents, testset_size=10)
df_generated = generated_dataset.to_pandas()
print("Generated Testset:")
print(df_generated)

Generating personas: 100%|██████████| 3/3 [00:04<00:00,  1.64s/it]                                           
Generating Scenarios: 100%|██████████| 2/2 [00:43<00:00, 21.52s/it]
Generating Samples: 100%|██████████| 10/10 [00:21<00:00,  2.16s/it]


Generated Testset:
                                          user_input                                 reference_contexts                                          reference                      synthesizer_name
0  Could you provide a detailed explanation of ho...  [.800.274.6867 or use this Coverage Checklist:...  The Coverage Checklist can be accessed via the...  single_hop_specifc_query_synthesizer
1  What information is available about the upadac...  [.  STOP to end.", 'Destination URL 2': "Respo...  The RINVOQ (upadacitinib) Complete Rebate Prog...  single_hop_specifc_query_synthesizer
2                  How can I get support for RINVOQ?  [.  <Variable Name>, You may be able to save o...  You can get support for RINVOQ by calling 1-80...  single_hop_specifc_query_synthesizer
3  Wht are the conditions for using RINVOQ in adu...  [.\nAdults with moderate to severe Crohn’s dis...  RINVOQ is used in adults with moderate to seve...  single_hop_specifc_query_synthesizer
4  How does the 

In [9]:
# Generate responses for each user input in the generated dataset
response_generator = RAGResponseGenerator(langchain_llm)
df_with_responses = response_generator.apply_responses_to_dataframe(df_generated)
print("DataFrame with Responses:")
print(df_with_responses.head())


DataFrame with Responses:
                                          user_input                                 reference_contexts                                          reference                      synthesizer_name                                           response                                 retrieved_contexts
0  Could you provide a detailed explanation of ho...  [.800.274.6867 or use this Coverage Checklist:...  The Coverage Checklist can be accessed via the...  single_hop_specifc_query_synthesizer  The Coverage Checklist is a tool often used in...  [.800.274.6867 or use this Coverage Checklist:...
1  What information is available about the upadac...  [.  STOP to end.", 'Destination URL 2': "Respo...  The RINVOQ (upadacitinib) Complete Rebate Prog...  single_hop_specifc_query_synthesizer  As of my last update, specific details about p...  [.  STOP to end.", 'Destination URL 2': "Respo...
2                  How can I get support for RINVOQ?  [.  <Variable Name>, You may be abl

In [11]:
# Convert to EvaluationDataset for evaluation
data_records = df_with_responses.to_dict("records")
ragas_dataset = EvaluationDataset.from_list(data_records)

# Evaluate the results
evaluation_pipeline = EvaluationPipeline(langchain_llm, custom_embeddings)
evaluation_results = evaluation_pipeline.evaluate(ragas_dataset)

# Write evaluation results back to a Dataiku dataset
ragas_evaluation = initializer.get_dataset(output_dataset)
ragas_evaluation.write_with_schema(evaluation_results)

  self._single_turn_ascore(sample=sample, callbacks=group_cm),
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  self._single_turn_ascore(sample=sample, callbacks=group_cm),
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  self._single_turn_ascore(sample=sample, callbacks=group_cm),
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  self._single_turn_ascore(sample=sample, callbacks=group_cm),
  return await self._ascore(row, callbacks)
  return await self._ascore(row, callbacks)
  self._single_turn_ascore(sample=sample, callbacks=group_cm),
  return await self._asco

  return await self._ascore(row, callbacks)
  self._single_turn_ascore(sample=sample, callbacks=group_cm),
  return await self._ascore(row, callbacks)
Evaluating:  82%|████████▎ | 33/40 [01:11<00:43,  6.19s/it]ERROR:ragas.executor:Exception raised in Job[10]: OutputParserException(Failed to parse NLIStatementOutput from completion {"statements": [{"statement": "If you are looking for support with RINVOQ (upadacitinib), there are several steps you can take.", "reason": "The context does not provide specific steps for support with RINVOQ.", "verdict": 0}, {"statement": "Your first point of contact should be your healthcare provider.", "reason": "The context does not mention contacting a healthcare provider as the first point of contact.", "verdict": 0}, {"statement": "Your healthcare provider can provide you with information about the medication.", "reason": "The context does not mention healthcare providers providing information about the medication.", "verdict": 0}, {"statement": "Your

ERROR:ragas.executor:Exception raised in Job[38]: OutputParserException(Failed to parse NLIStatementOutput from completion {"statements": [{"statement": "Total Inventory Management (TIM) can significantly aid Nurse Ambassadors in providing emergency help for patients taking RINVOQ by ensuring that the medication is readily available.", "reason": "The context does not provide information about Total Inventory Management (TIM) aiding Nurse Ambassadors in providing emergency help or ensuring medication availability.", "verdict": 0}, {"statement": "Total Inventory Management (TIM) can significantly aid Nurse Ambassadors in providing emergency help for patients taking RINVOQ by ensuring that the medication is efficiently distributed.", "reason": "The context does not mention Total Inventory Management (TIM) or its role in efficiently distributing medication for emergency help.", "verdict": 0}, {"statement": "Total Inventory Management (TIM) allows for real-time tracking of RINVOQ inventory 

Evaluating:  92%|█████████▎| 37/40 [02:55<01:02, 20.92s/it]ERROR:ragas.executor:Exception raised in Job[18]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[34]: OutputParserException(Failed to parse NLIStatementOutput from completion {"statements": [{"statement": "If a patient is experiencing heart attack symptoms while taking RINVOQ and is facing challenges with insurance coverage, it is crucial to prioritize immediate medical attention for the heart attack symptoms.", "reason": "The context emphasizes the importance of seeking emergency help right away if experiencing heart attack symptoms while taking RINVOQ. However, it does not mention insurance coverage challenges in relation to prioritizing medical attention.", "verdict": 0}, {"statement": "If experiencing heart attack symptoms, the patient should seek emergency medical care immediately.", "reason": "The context explicitly states to get emergency help right away if experiencing symptoms of a heart attack while takin

10 rows successfully written (dQj9l751lO)


