In [None]:
%pip install llama-index-postprocessor-cohere-rerank
%pip install llama-index

In [None]:
import os.path
import shutil
import logging
import sys
import chromadb
import openai
import time
import nltk
import nest_asyncio
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.extractors import (
    TitleExtractor,
    QuestionsAnsweredExtractor,
    KeywordExtractor,
    BaseExtractor,
    SummaryExtractor)
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.response.notebook_utils import (
    display_source_node,
    display_response,
)
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.postprocessor.cohere_rerank import CohereRerank
from sherpa_reader import LLMSherapaReader
from llama_index.core import SimpleDirectoryReader
from document_sorter import DocumentSorter

nest_asyncio.apply()


In [None]:
# Directory for ChromaDB storage
PERSIST_DIR = "./chromadb"

## Set LLM

In [None]:
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=1024)

### Delete Previous DB

In [None]:
if os.path.exists(PERSIST_DIR):
        shutil.rmtree(PERSIST_DIR)

## Instatiate ChromaDB

In [None]:
chroma_client = chromadb.PersistentClient(path=PERSIST_DIR)
chroma_collection = chroma_client.get_or_create_collection("class_materials2")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

## Load Data

In [None]:
# Load data
print("Loading Data")
documents = SimpleDirectoryReader("data", file_extractor={".pdf" : LLMSherapaReader()}).load_data()
info, questions, garbage, broken = DocumentSorter().sort(documents)
print("Data Loaded")

## Dad Loading & Ingestion Pipeline

In [None]:
# Ingest data through the pipeline
pipeline = IngestionPipeline(
transformations=[
    SentenceWindowNodeParser.from_defaults(
        # how many sentences on either side to capture
        window_size=3,
        # the metadata key that holds the window of surrounding sentences
        window_metadata_key="window",
        # the metadata key that holds the original sentence
        original_text_metadata_key="original_sentence",
    ),
    #SummaryExtractor(summaries=["prev", "self", "next"], llm=llm),
    #KeywordExtractor(keywords=3, llm=llm),
    OpenAIEmbedding(model_name="text-embedding-3-large")
],
vector_store=vector_store
)

nodes_post_pipe = pipeline.run(documents=info)


In [None]:
with open("nodes_post_pipe.txt", "w") as file:
    for node in nodes_post_pipe:
        file.write(node.text + "\n\n")

## Indexing

In [None]:
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=OpenAIEmbedding(model_name="text-embedding-3-large"), storage_context=storage_context)

## Querying

In [None]:
window_post_processor = MetadataReplacementPostProcessor(target_metadata_key="window")
cohere_api_key = os.environ.get("COHERE_API_KEY")
cohere_rerank = CohereRerank(api_key=cohere_api_key, top_n=3)

query_engine = index.as_query_engine(
    similarity_top_k=10,
    llm=llm,
    node_postprocessors=[
        window_post_processor,
        cohere_rerank
    ],
    )
response = query_engine.query("what information can you tell me about the textbook?")
display_response(
    response=response, source_length=1000, show_source=True, show_source_metadata=True
)

## Evaluate RAG Embeddings

In [None]:
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core.evaluation import RetrieverEvaluator

In [None]:
retriever = index.as_retriever(similarity_top_k=3)

retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)

qa_dataset = generate_question_context_pairs(
    nodes_post_pipe, llm=llm, num_questions_per_chunk=1
)

In [None]:
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [None]:
mrr_score = sum(result.metric_dict["mrr"].score for result in eval_results) / len(eval_results)
print(f"mrr_score: {mrr_score}")

hit_rate_score = sum(result.metric_dict["hit_rate"].score for result in eval_results) / len(eval_results)
print(f"hit_rate_score: {hit_rate_score}")

## Evaluate Model Responses

In [None]:
%pip install spacy

In [None]:

from llama_index.llms.openai import OpenAI
from llama_index.core.evaluation import FaithfulnessEvaluator

In [None]:
# gpt-4
gpt4 = OpenAI(temperature=0, model="gpt-4")

evaluator_gpt4 = FaithfulnessEvaluator(llm=gpt4)

In [None]:
from llama_index.core.evaluation import DatasetGenerator


question_generator = DatasetGenerator.from_documents(info)
eval_questions = question_generator.generate_questions_from_nodes(30)

In [None]:
import asyncio


def evaluate_query_engine(query_engine, questions):
    c = [query_engine.aquery(q) for q in questions]
    results = asyncio.run(asyncio.gather(*c))
    print("finished query")

    total_correct = 0
    for r in results:
        # evaluate with gpt 4
        eval_result = (
            1 if evaluator_gpt4.evaluate_response(response=r).passing else 0
        )
        total_correct += eval_result

    return total_correct, len(results)

In [None]:
correct, total = evaluate_query_engine(query_engine, eval_questions[:30])

print(f"score: {correct}/{total}")

In [None]:
from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner

queries = list(qa_dataset.queries.values())[:10]


faithfulness_evaluator = FaithfulnessEvaluator()
relevancy_evaluator = RelevancyEvaluator()

runner = BatchEvalRunner(
{"faithfulness": faithfulness_evaluator, "relevancy": relevancy_evaluator},
workers=8,
)
eval_results = await runner.aevaluate_queries(
    query_engine, queries=queries
)
faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])
print(f"faithfulness_score: {faithfulness_score}")

relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])
print(f"relevancy_score: {relevancy_score}")
