# Evaluation of OpenContext Documentaion with Arize

In [None]:
%pip install --quiet langchain pypdf pymongo langchain-openai tiktoken google-cloud-secret-manager

In [None]:
%pip install --upgrade google-auth



In [None]:
import os

%pip show langchain

from platform import python_version
print(python_version())

Name: langchain
Version: 0.1.11
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: aiohttp, async-timeout, dataclasses-json, jsonpatch, langchain-community, langchain-core, langchain-text-splitters, langsmith, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 
3.10.12


In [None]:
!pip install arize-phoenix

Collecting arize-phoenix
  Using cached arize_phoenix-3.9.0-py3-none-any.whl (1.2 MB)
Collecting ddsketch (from arize-phoenix)
  Downloading ddsketch-2.0.4-py3-none-any.whl (18 kB)
Collecting hdbscan<1.0.0,>=0.8.33 (from arize-phoenix)
  Downloading hdbscan-0.8.33.tar.gz (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting openinference-instrumentation-langchain>=0.1.11 (from arize-phoenix)
  Downloading openinference_instrumentation_langchain-0.1.11-py3-none-any.whl (13 kB)
Collecting openinference-instrumentation-llama-index>=1.1.1 (from arize-phoenix)
  Downloading openinference_instrumentation_llama_index-1.1.1-py3-none-any.whl (15 kB)
Collecting openinference-instrumentation-openai>=0.1.3 (from arize-phoenix)
  Downloading

In [None]:
from urllib.request import urlopen

import nest_asyncio
import numpy as np
import pandas as pd
import phoenix as px
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers import KNNRetriever
from phoenix.experimental.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from phoenix.trace import DocumentEvaluations, SpanEvaluations
from phoenix.trace.langchain import LangChainInstrumentor
from tqdm import tqdm

nest_asyncio.apply()  # needed for concurrent evals in notebook environments



In [None]:
import os
from google.cloud import secretmanager
from google.colab import auth
from google.colab import drive

In [None]:
#drive.mount('/content/gdrive')

In [None]:
def load_secrets(secrets_name, project_id):
  # Build a client
  auth.authenticate_user()
  client = secretmanager.SecretManagerServiceClient()
  secret_name = secrets_name
  # Create path to latest secret
  resource_name = f"projects/{project_id}/secrets/{secret_name}/versions/latest"
  # Get your secret :
  response = client.access_secret_version(request={"name": resource_name})
  secret_string = response.payload.data.decode('UTF-8')
  return secret_string

In [None]:
project_id = 'botchagalupep1'
openai_api_key = load_secrets("openai_api_key",project_id)
os.environ['OPENAI_API_KEY'] = openai_api_key
MONGODB_ATLAS_CLUSTER_URI = load_secrets("MDB_CLUSTER0_URI",project_id)
langsmith_api_key = load_secrets("langsmith_api_key",project_id)

PermissionDenied: 403 Permission 'secretmanager.versions.access' denied for resource 'projects/botchagalupep1/secrets/openai_api_key/versions/latest' (or it may not exist).

In [None]:
# connect notebook to langsmith
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

# This key is sourced from vars.env
os.environ['LANGCHAIN_API_KEY'] = langsmith_api_key  # Uncomment and replace  with your actual API key

os.environ['LANGCHAIN_PROJECT'] = 'opencontext-1'

# To verify, you can print the variables
print(os.environ.get('LANGCHAIN_TRACING_V2'))
print(os.environ.get('LANGCHAIN_ENDPOINT'))
#print(os.environ.get('LANGCHAIN_API_KEY'))  # Uncomment if you want to print your API key (be careful with sharing your notebook)
print(os.environ.get('LANGCHAIN_PROJECT'))

In [None]:
from pymongo import MongoClient

# initialize MongoDB python client
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)

DB_NAME = "Cluster0"
COLLECTION_NAME = "OpenContext0"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"

MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

In [None]:
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langchain_openai import OpenAIEmbeddings

vector_search = MongoDBAtlasVectorSearch.from_connection_string(
    MONGODB_ATLAS_CLUSTER_URI,
    DB_NAME + "." + COLLECTION_NAME,
    OpenAIEmbeddings(model="text-embedding-3-small",dimensions=1536,disallowed_special=()),
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
)

#Similarity Search with Score

In [None]:
px.close_app()
session = px.launch_app()
LangChainInstrumentor().instrument()

#Question Answering

## https://github.com/Arize-ai/phoenix/tree/main/tutorials

# Use this example to evaluate your model

In [None]:
queries = [
    'What is a CodeCompnent',
    'What is a SaaS User',
    'What is a Service',
    'What is a Location',
    'What is a Datacenter',
    'List all the entities',
    'List code examples',
    'List yaml examples names',
    'list the yaml example crates-erp'
]

from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0, max_tokens=5000)

qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=vector_search.as_retriever(), return_source_documents=True)

for query in queries:
  answer = qa_chain({"query": query})
  print("Query:", answer['query'])
  print("Result:", answer['result'])

In [None]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

In [None]:
eval_model = OpenAIModel(
    model_name="gpt-4-turbo-preview",
)
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
)[0]

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)

In [None]:
px.Client().get_evaluations()


In [None]:
trace_df = px.Client().get_spans_dataframe()
trace_df

In [None]:
import pandas as pd
from phoenix.trace.dsl import SpanQuery

query_for_root_span = SpanQuery().where(
    "parent_id is None",   # Filter for root spans
).select(
    input="input.value",   # Input contains the user's question
    output="output.value", # Output contains the LLM's answer
)

query_for_retrieved_documents = SpanQuery().where(
    "span_kind == 'RETRIEVER'",  # Filter for RETRIEVER span
).select(
    # Rename parent_id as span_id. This turns the parent_id
    # values into the index of the output dataframe.
    span_id="parent_id",
).concat(
    "retrieval.documents",
    reference="document.content",
)

# Perform an inner join on the two sets of spans.
pd.concat(
    px.Client().query_spans(
        query_for_root_span,
        query_for_retrieved_documents,
    ),
    axis=1,
    join="inner",
)