In [None]:
from devtools import debug
from dotenv import load_dotenv

load_dotenv(verbose=True)

%load_ext autoreload
%autoreload 2

!export PYTHONPATH=":./python"

In [1]:
#!pip3 install -U langchain-community faiss-cpu langchain-openai tiktoken
#!pip3 install -U giskard

In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import giskard
import pandas as pd
from langchain_community.document_loaders import PyPDFLoader

In [3]:
# Prepare vector store (FAISS) with IPPC report

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

from python.ai_core.embeddings import EmbeddingsFactory
from python.ai_core.llm import get_llm
from python.ai_core.prompts import def_prompt
from python.ai_core.vector_store import VectorStoreFactory

vs_factory = VectorStoreFactory(
    id="Chroma_in_memory",
    collection_name="giskard_test",
    embeddings_factory=EmbeddingsFactory(),
)

DOC = (
    "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_LongerReport.pdf"
)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100, add_start_index=True
)
documents = PyPDFLoader(DOC).load()
texts = text_splitter.split_documents(documents)
vs_factory.add_documents(texts)


# Prepare QA chain
system_prompt = """You are the Climate Assistant, a helpful AI assistant made by Giskard.
Your task is to answer common questions on climate change.
You will be given a question and relevant excerpts from the IPCC Climate Change Synthesis Report (2023).
Please provide short and clear answers based on the provided context. Be polite and helpful.

Context:
{context}"""

user_prompt = """
Question:
{question}

Your answer:
"""

llm = get_llm(llm_id="gpt_35_openai")


prompt = def_prompt(system=system_prompt, user=user_prompt)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
climate_qa_chain = create_retrieval_chain(
    vs_factory.vector_store.as_retriever(), question_answer_chain
)

# chain.invoke({"input": query})



[32m2024-07-08 22:46:02.566[0m | [1mINFO    [0m | [36mpython.ai_core.vector_store[0m:[36mvector_store[0m:[36m113[0m - [1mget vector store  : Chroma_in_memory/giskard_test_multilingual_MiniLM_local[0m
[32m2024-07-08 22:46:23.933[0m | [1mINFO    [0m | [36mpython.ai_core.llm[0m:[36mget_llm[0m:[36m405[0m - [1mget LLM : gpt_35_openai - configurable: True[0m
[32m2024-07-08 22:46:26.208[0m | [1mINFO    [0m | [36mpython.ai_core.llm[0m:[36mget_configurable[0m:[36m361[0m - [1mCannot load gemini_pro_google: No module named 'langchain_google_vertexai'[0m


In [4]:
def model_predict(df: pd.DataFrame):
    """Wraps the LLM call in a simple Python function.

    The function takes a pandas.DataFrame containing the input variables needed
    by your model, and must return a list of the outputs (one for each row).
    """
    return [climate_qa_chain.invoke({"query": question}) for question in df["question"]]

In [5]:
import giskard
import os
from giskard.llm.client.openai import OpenAIClient


giskard.llm.set_llm_api("openai")
oc = OpenAIClient(model="gpt-4-turbo-preview")
giskard.llm.set_default_client(oc)

giskard_model = giskard.Model(
    model=model_predict,
    model_type="text_generation",
    name="Climate Change Question Answering",
    description="This model answers any question about climate change based on IPCC reports",
    feature_names=["question"],
)

2024-07-08 23:46:49,651 pid:8938 MainThread giskard.models.automodel INFO     Your 'prediction_function' is successfully wrapped by Giskard's 'PredictionFunctionModel' wrapper class.


In [6]:
scan_results = giskard.scan(giskard_model)

2024-07-08 23:48:28,930 pid:8938 MainThread httpx        INFO     HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
🔎 Running scan…
Estimated calls to your model: ~365
Estimated LLM calls for evaluation: 148

2024-07-08 23:48:30,189 pid:8938 MainThread giskard.scanner.logger INFO     Running detectors: ['LLMBasicSycophancyDetector', 'LLMCharsInjectionDetector', 'LLMHarmfulContentDetector', 'LLMImplausibleOutputDetector', 'LLMInformationDisclosureDetector', 'LLMOutputFormattingDetector', 'LLMPromptInjectionDetector', 'LLMStereotypesDetector', 'LLMFaithfulnessDetector']
Running detector LLMBasicSycophancyDetector…


KeyboardInterrupt: 

In [None]:
display(scan_results)

In [None]:
# Or save it to a file
scan_results.to_html("scan_results.html")