# Some quick start code for TUM Hackathon

In [None]:
from langchain.document_loaders import PyPDFLoader
from ai_eval.config import global_config as glob

filename = "Allplan_2020_Manual.pdf"

loader = PyPDFLoader(f"{glob.DATA_PKG_DIR}/{filename}")

raw_data = loader.load()

texts = [page.page_content for page in raw_data]

print(f"Number of docs: {len(texts)}")

## (Optional) Preprocess and load data: 

In [None]:
from ai_eval.resources.preprocessor import Preprocessor
from ai_eval.config import global_config as glob

filename = "Allplan_2020_Manual.pdf"

pre = Preprocessor()

docs = pre.fetch_documents(
    blob_path=f"{glob.DATA_PKG_DIR}/{filename}", source="local"
)

documents = pre.chunk_documents(documents=docs)

print(f"Number of processed document chunks: {len(documents)}")

## Get annotated data:

In [None]:
from ai_eval.services.file import JSONService
from ai_eval.config import global_config as glob

json = JSONService(path="generated_qa_data_tum.json", root_path=glob.DATA_PKG_DIR, verbose=True)

qa_data = json.doRead()
print(f"Number of evaluation data samples: {len(qa_data)}")

### Fit RAG model on the generated data and create evaluation dataset

In [None]:
from ai_eval.resources import eval_dataset_builder as eval

ground_truth_contexts = [item["context"] for item in qa_data]
sample_queries = [item["question"] for item in qa_data]
expected_responses = [item["answer"] for item in qa_data]

ground_truth_contexts = ground_truth_contexts[:20]
sample_queries = sample_queries[:20]
expected_responses = expected_responses[:20]

Example: using Vertex AI models

In [None]:
from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings

chat_model = ChatVertexAI(
                        project=glob.GCP_PROJECT,
                        model_name="gemini-2.5-flash",
                        temperature=0.1,
                        max_retries=2,
                    )

embedding_model = VertexAIEmbeddings(
                        project=glob.GCP_PROJECT,
                        model_name="text-multilingual-embedding-002",
                    )

In [None]:
from langchain_community.vectorstores import FAISS
from ai_eval.resources.rag_template import FAISSRAG, TFIDFRAG

vectorstore = FAISS.from_documents(documents, embedding_model)

# 1. Create your RAG instance
#rag = FAISSRAG(chat_model, documents, k=3, vectorstore=vectorstore)    # some vanilla example
rag = TFIDFRAG(chat_model, documents, k=5)                 # our (naive) hackathon baseline 

query = "What is Allplan?"

the_relevant_docs = rag.retrieve(question=query)

answer, relevant_docs = rag.answer(question=query)
answer, relevant_docs

In [None]:
# 2. Create the builder with the RAG instance
builder = eval.EvalDatasetBuilder(rag)

# 3. Build the evaluation dataset
evaluation_dataset = builder.build_evaluation_dataset(
    input_contexts=ground_truth_contexts,
    sample_queries=sample_queries,
    expected_responses=expected_responses,
)

In [None]:
from ai_eval.resources import deepeval_scorer as deep 

scorer = deep.DeepEvalScorer(evaluation_dataset)

results = scorer.calculate_scores()
print(results)

In [None]:
custom_weights = {
    'Answer Relevancy': 0.4,
    'Faithfulness': 0.3,
    'Contextual Recall': 0.2,
    'Contextual Precision': 0.1
}

metrics = scorer.get_overall_metrics(metric_weights=custom_weights)
metrics

In [None]:
scorer.get_summary(save_to_file=True)

In [None]:
# from ai_eval.services.file import JSONService
# from ai_eval.config import global_config as glob

# json = JSONService(path="simple_benchmark_tfidf_N100.json", root_path=glob.DATA_PKG_DIR, verbose=True)

# json.doWrite(scorer.get_overall_metrics())