In [2]:
import os
import sys
from dotenv import load_dotenv, find_dotenv
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_postgres import PGVector
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

sys.path.append('../..')
_ = load_dotenv(find_dotenv()) # read local .env file

PG_VECTOR_PWD = os.environ["PG_VECTOR_PWD"]

In [3]:
model_embedding = HuggingFaceEmbeddings(model_name='multi-qa-mpnet-base-dot-v1')

connection = f"postgresql+psycopg://vector_user:{PG_VECTOR_PWD}@localhost:5431/vector_db"
collection_name = "udlbook"

vector_store = PGVector(
    embeddings=model_embedding,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

  from tqdm.autonotebook import tqdm, trange


In [4]:
retriever = vector_store.as_retriever(search_kwargs={'k': 3})

In [5]:
llm_llama3_1 = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

llm_gemma2 = ChatGroq(
    model="gemma2-9b-it",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

llm_mixtral = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [6]:
prompt = """You are an expert in the book 'Understanding Deep Learning'. 
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Keep the answer upto 5 lines unless the user asks for more information

Question:
{question}

Context:
{context}

Answer:
"""

prompt_template = ChatPromptTemplate.from_template(prompt)

In [7]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_rag_chain_llama3_1 = (
{
"context": (retriever
|
format_docs),
"question": RunnablePassthrough()
}
|
prompt_template
|
llm_llama3_1
)

qa_rag_chain_gemma2 = (
{
"context": (retriever
|
format_docs),
"question": RunnablePassthrough()
}
|
prompt_template
|
llm_gemma2
)

qa_rag_chain_mixtral = (
{
"context": (retriever
|
format_docs),
"question": RunnablePassthrough()
}
|
prompt_template
|
llm_mixtral
)

In [8]:
query = 'What is machine learning'
result_l = qa_rag_chain_llama3_1.invoke(query)
print(result_l.content)
print(result_l)

Machine learning is a subset of AI that learns to make decisions by fitting mathematical models to observed data.
content='Machine learning is a subset of AI that learns to make decisions by fitting mathematical models to observed data.' response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 704, 'total_tokens': 725, 'completion_time': 0.084, 'prompt_time': 0.165740096, 'queue_time': 0.005318881999999997, 'total_time': 0.249740096}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_b3ae7e594e', 'finish_reason': 'stop', 'logprobs': None} id='run-a1742dc8-0c35-479f-b239-a5a10d7e1831-0' usage_metadata={'input_tokens': 704, 'output_tokens': 21, 'total_tokens': 725}


In [None]:
result_g = qa_rag_chain_gemma2.invoke(query)
print(result_g.content)
print(result_g)

In [None]:
result_m = qa_rag_chain_mixtral.invoke(query)
print(result_m.content)
print(result_m)

In [10]:
query = 'What is the difference between machine learning and deep learning?'

In [None]:
result_l = qa_rag_chain_llama3_1.invoke(query)
print(result_l.content)
print(result_l)

In [None]:
result_g = qa_rag_chain_gemma2.invoke(query)
print(result_g.content)
print(result_g)

In [None]:
result_m = qa_rag_chain_mixtral.invoke(query)
print(result_m.content)
print(result_m)

In [14]:
query = 'Why does deep learning work? Please be detailed'

In [None]:
result_l = qa_rag_chain_llama3_1.invoke(query)
print(result_l.content)
print(result_l)

In [None]:
result_g = qa_rag_chain_gemma2.invoke(query)
print(result_g.content)
print(result_g)

In [None]:
result_m = qa_rag_chain_mixtral.invoke(query)
print(result_m.content)
print(result_m)

In [18]:
query = 'What are transformers and what is the difference with convolutional network?'

In [None]:
result_l = qa_rag_chain_llama3_1.invoke(query)
print(result_l.content)
print(result_l)

In [None]:
result_g = qa_rag_chain_gemma2.invoke(query)
print(result_g.content)
print(result_g)

In [None]:
result_m = qa_rag_chain_mixtral.invoke(query)
print(result_m.content)
print(result_m)