### LLM Observability and evaluations

In [None]:
!pip install arize-phoenix
#!pip install --upgrade scikit-learn
#!pip install langchain
#!pip install transformers
#!pip install torch
#!pip install pydantic==1.10.8
#!pip install typing-inspect==0.8.0 typing_extensions==4.5.0
!pip -q install cohere

In [None]:
import phoenix as px
import langchain
import transformers
import os
from phoenix.trace.langchain import LangChainInstrumentor
import cohere
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
key = os.getenv('COHERE_API_KEY')

In [None]:
co = cohere.Client(key)

In [None]:
# Launch phoenix
session = px.launch_app()

In [None]:
# # By default, the traces will be exported to the locally running Phoenix server.
LangChainInstrumentor().instrument()

In [None]:
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

In [None]:
from langchain.llms import HuggingFacePipeline
model_name = "bigscience/bloomz-560m"

hf = HuggingFacePipeline.from_model_id(
    model_id=model_name,
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 64},
    device_map='auto',
)

In [None]:
from langchain.prompts import PromptTemplate

template = """Question: {question}

Answer: """
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "Who is Obama?"

print(chain.invoke({"question": question}))

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('all-data.csv', encoding='cp1252')

In [None]:
df.head()

In [None]:
MY_CUSTOM_TEMPLATE = '''
    You are evaluating the positive, negative and neutral of the responses to query.
    [BEGIN DATA]
    ************
    [review]: {review}
    ************
    [Response]: {response}
    [END DATA]


    Please focus on the tone of the response.
    Your answer must be single word, either "positive" or "negative"
    '''

In [None]:
from phoenix.experimental.evals import (
    llm_classify
)

rails = ['neutral', 'positive', 'negative']
#The rails is used to hold the output to specific values based on the template
#It will remove text such as ",,," or "..."
#Will ensure the binary value expected from the template is returned
#rails = list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values())
relevance_classifications = llm_classify(
    dataframe=df,
    template=MY_CUSTOM_TEMPLATE,
    model=hf,
    rails=rails,
    provide_explanation=True
)
#relevance_classifications is a Dataframe with columns 'label' and 'explanation'

In [None]:
from phoenix.experimental.evals import llm_generate

In [None]:
llm_generate