In [1]:
def evaluate_with_llm(model, prompt, generated_text):
    evaluations = {}

    # Template for creating evaluation queries
    def create_evaluation_query(template, **kwargs):
        query = ChatPromptTemplate.from_template(template)
        chain = query | model
        return float(chain.invoke(kwargs).content)

    # Evaluate Relevance
    relevance_template = "Given the context provided by the following prompt: '{prompt}', please evaluate on a scale from 0 to 1, where 1 is highly relevant and 0 is not relevant at all, how relevant is this generated response: '{generated_text}'? Provide a numerical score only."
    evaluations['relevance'] = create_evaluation_query(relevance_template, prompt=prompt, generated_text=generated_text)

    # Evaluate Clarity
    clarity_template = "How clear and easily understandable is this text: '{generated_text}'? Rate its clarity on a scale from 0 to 1, where 1 indicates that the text is very clear and 0 indicates that the text is very unclear. Provide a numerical score only."
    evaluations['clarity'] = create_evaluation_query(clarity_template, prompt=prompt, generated_text=generated_text)

    # Evaluate Coherence
    coherence_template = "On a scale from 0 to 1, with 1 being highly coherent and 0 being not coherent at all, how well do the ideas in this generated text: '{generated_text}' flow together? Consider if the text makes logical sense as a whole. Provide a numerical score only."
    evaluations['coherence'] = create_evaluation_query(coherence_template, prompt=prompt, generated_text=generated_text)

    # Evaluate Detail and Exhaustiveness
    detail_template = "Assessing the detail and exhaustiveness relative to the prompt '{prompt}', how thoroughly does this generated text: '{generated_text}' cover the topic? Rate on a scale from 0 to 1, where 1 is very detailed and exhaustive, and 0 is not detailed at all. Provide a numerical score only."
    evaluations['details'] = create_evaluation_query(detail_template, prompt=prompt, generated_text=generated_text)

    # Evaluate Suitability as an Answer
    suitability_template = "Evaluate the suitability of this generated text: '{generated_text}' as an answer to the original prompt '{prompt}'. On a scale from 0 to 1, where 1 is a perfect answer and 0 is completely unsuitable, provide a numerical score only."
    evaluations['suitability'] = create_evaluation_query(suitability_template, prompt=prompt, generated_text=generated_text)

    return evaluations

In [2]:
def critique(model, prompt, generated_text):
    evaluation_weights = {
        'relevance': 3,  
        'clarity': 1,
        'coherence': 0.5,
        'details': 1.5,
        'suitability': 2  
    }
    
    evaluations = evaluate_with_llm(model, prompt, generated_text)
    print("Evaluations:", evaluations)
    
    # Calculate the weighted sum of the evaluations
    weighted_sum = sum(evaluations[aspect] * evaluation_weights.get(aspect, 1) for aspect in evaluations)
    
    # Calculate the sum of weights for the aspects evaluated
    total_weight = sum(evaluation_weights.get(aspect, 1) for aspect in evaluations)
    
    # Calculate the weighted average of the evaluations
    weighted_average = weighted_sum / total_weight if total_weight > 0 else 0
    
    return [weighted_average, evaluations]

In [6]:
import cohere
from langchain_community.utilities import SerpAPIWrapper
import os
co = cohere.Client(os.environ["COHERE_API_KEY"])


def get_reranked_result(query, top_n=1):
  matches = kb.query([Query(text=query)])
  docs = extract_documents_texts(matches)
  rerank_results = co.rerank(model="rerank-english-v2.0", query=query, documents=docs, top_n=top_n)
  texts = []
  for rerank_result in rerank_results:
      # Accessing the 'text' field in the document attribute of each RerankResult
      text = rerank_result.document['text']
      texts.append(text)
  return texts

In [7]:
from typing import List, Dict, Any, Tuple
from collections import defaultdict

class QueryDetail:
    def __init__(self, query: str):
        self.query = query
        self.content: List[str] = []
        self.critique_score: float = 0.0
        self.critique_details: Dict[str, Any] = {}
        self.retrieval_needed: bool = False
        self.search_needed: bool = False

    def add_response(self, model, search) -> None:
        """Process the query to add response, handle retrieval and critique."""
        if is_retrieval_needed(model, self.query):
            response = " ".join(get_reranked_result(self.query, top_n=3))
            self.retrieval_needed = True
        else:
            response = "Some generated answer"
            self.retrieval_needed = False
        
        self.content.append(response)
        
        critique_score, critique_details = critique(model, self.query, response)
        self.critique_score = critique_score
        self.critique_details = critique_details
        self.search_needed = critique_score < 0.5

        if self.search_needed:
            self.search_and_add_results(search)

    def search_and_add_results(self, search) -> None:
        """Perform a search and process the results if critique score is low."""
        search_result_raw = search.run(self.query)
        search_result = str_to_json(search_result_raw) or []
        self.content.extend(search_result)

class QueryProcessor:
    def __init__(self, model, search, queries: List[str]):
        self.model = model
        self.search = search
        self.queries = [QueryDetail(query) for query in queries]

    def process_queries(self) -> List[QueryDetail]:
        """Process each query in the list."""
        for query_detail in self.queries:
            query_detail.add_response(self.model, self.search)
            if query_detail.search_needed:
                consolidated_response = consolidate(self.model, query_detail.content)
                query_detail.content = [consolidated_response]
                critique_score, critique_details = critique(self.model, query_detail.query, consolidated_response)
                query_detail.critique_score = critique_score
                query_detail.critique_details = critique_details
        return self.queries

def advanced_rag_query(model, query: str, num_queries: int) -> List[QueryDetail]:
    search = SerpAPIWrapper()
    initial_queries = generate_queries(model, query, num_queries)[:num_queries]
    query_processor = QueryProcessor(model, search, initial_queries)
    processed_queries = query_processor.process_queries()
    return processed_queries