In [1]:
import os
import sys
import ast
from dotenv import load_dotenv, find_dotenv
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_postgres import PGVector
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from datasets import Dataset

sys.path.append('../..')
_ = load_dotenv(find_dotenv()) # read local .env file

PG_VECTOR_PWD = os.environ["PG_VECTOR_PWD"]

model_embedding = HuggingFaceEmbeddings(model_name='multi-qa-mpnet-base-dot-v1')

connection = f"postgresql+psycopg://vector_user:{PG_VECTOR_PWD}@localhost:5431/vector_db"
collection_name = "udlbook"

vector_store = PGVector(
    embeddings=model_embedding,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

retriever = vector_store.as_retriever(search_kwargs={'k': 3})

llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

prompt = """You are an expert in the book 'Understanding Deep Learning'. 
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Keep the answer upto 5 lines unless the user asks for more information

Question:
{question}

Context:
{context}

Answer:
"""

prompt_template = ChatPromptTemplate.from_template(prompt)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_rag_chain = (
{"context": (retriever | format_docs), "question": RunnablePassthrough()}
| prompt_template
| llm
| StrOutputParser() 
)

query = 'What is machine learning'
result = qa_rag_chain.invoke(query)

  from tqdm.autonotebook import tqdm, trange


In [17]:
import pandas as pd
from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

In [3]:
df_data = pd.read_csv("../files/evaluation_dataset.csv")

In [77]:
baseset = df_data[['question','contexts','ground_truth']].copy()

In [78]:
baseset['contexts']=baseset['contexts'].apply(ast.literal_eval)

In [13]:
answers_lst = []

for index,row in baseset.iterrows():
    print(row['question'])
    result = qa_rag_chain.invoke(row['question'])
    print(result)
    answers_lst.append(result)
    print()

How are node embeddings updated in a simple Graph CNN layer?
In a simple Graph CNN layer, node embeddings are updated by: 
1. aggregating neighboring nodes to form a vector, 
2. applying a linear transformation to the aggregated vector and the original node, 
3. adding these together with a bias, 
4. and finally applying a nonlinear activation function.

What is an adjacency matrix in graph representation, and how is it defined?
An adjacency matrix in graph representation is a matrix A where entry (m, n) is set to one if there is an edge between nodes m and n, and zero otherwise. It is an N × N matrix representing the graph structure. For undirected graphs, this matrix is always symmetric.

What is the relationship between the least squares criterion and the fit of a linear model?
The least squares criterion minimizes the sum of the squares of the deviations between the model prediction and the true output values. A good fit results in small squared deviations, while a bad fit results 

In [81]:
baseset['answer'] = answers_lst

In [86]:
datatest = Dataset.from_pandas(baseset[['question', 'contexts', 'answer', 'ground_truth']])

In [88]:
result = evaluate(
    dataset=datatest,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    llm=llm,
    embeddings=model_embedding,
)

result

Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]

Exception raised in Job[29]: TimeoutError()
Exception raised in Job[19]: TimeoutError()
Exception raised in Job[20]: TimeoutError()
Exception raised in Job[3]: TimeoutError()
Exception raised in Job[23]: TimeoutError()


{'context_precision': 1.0000, 'faithfulness': 0.9125, 'answer_relevancy': 0.9474, 'context_recall': 1.0000}