In [3]:
from dotenv import load_dotenv
import os

load_dotenv(override=True) # Load environment variables from .env file, override any existing variables

# Making a Langchain Embeddings Object using Nomic

from langchain_nomic import NomicEmbeddings

embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")

# Making a Pinecone Vector Store Object

from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "nlp-module"  # change if desired
index = pc. Index(index_name)
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

# Making a Retriever Object (Allows you to find similar documents in your Pinecone index, given a query)

retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 10, "score_threshold": 0.5},
)

# Making a ChatGroq Object (This is the LLM model that will generate responses)

from langchain_groq import ChatGroq
llm = ChatGroq(model="llama3-8b-8192", stop_sequences= None, temperature=0)

# Function to format the retrieved documents, gotten from the retriever

def format_docs(docs):
    print("docs:", docs)
    print()
    return "\n\n".join(doc.page_content for doc in docs)


# Making a custon prompt which had two variables, "context" and question

# Note:This prompt_template expects a dictionary/JSON with the keys "context" and "question" as input

from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate

prompt_template = ChatPromptTemplate.from_messages([
    HumanMessagePromptTemplate(
        prompt=PromptTemplate(
            input_variables=["context", "question"],
            template=( # The constructed prompt from the variables
                "You are an assistant for question-answering tasks. Use the following "
                "pieces of retrieved context to answer the question. If you don't know "
                "the answer, just say that you don't know. Use three sentences maximum "
                "and keep the answer concise.\n\n"
                
                "Question: {question}\n"
                "Context: {context}\n"
                "Answer:"
            )
            
        )
    )
])

# A simple function that logs the input and returns it

def logger(input):
    print(input)
    return input


# A chain with the modified prompt

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# The chain simply looks likes this:
rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt_template
    | llm
    | StrOutputParser()
)

# respnse = rag_chain.invoke("Tell me about the paper: Attention is all you Need")

In [9]:
from IPython.display import Markdown, display

respnse = rag_chain.invoke("when is the Arabic course taught? tell me all the people that teach it")

display(Markdown(respnse))

No relevant docs were retrieved using the relevance score threshold 0.5


docs: []



Based on the provided context, I don't have any information about the Arabic course being taught. Therefore, I cannot provide an answer to the question.

If you provide the context, I'll be happy to help you with the question.

In [None]:
from dotenv import load_dotenv
import os

load_dotenv(override=True) # Load environment variables from .env file, override any existing variables

# Making a Langchain Embeddings Object using Nomic

from langchain_nomic import NomicEmbeddings

embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")

# Making a Pinecone Vector Store Object

from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "musab-bilal-rag"  # change if desired
index = pc.Index(index_name)
vector_store = PineconeVectorStore(index=index, embedding=embeddings, namespace="prog-ann")

# Making a Retriever Object (Allows you to find similar documents in your Pinecone index, given a query)

retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 5, "score_threshold": 0.5},
)

# Making a ChatGroq Object (This is the LLM model that will generate responses)

from langchain_groq import ChatGroq
llm = ChatGroq(model="llama3-8b-8192", stop_sequences= None, temperature=0)

# Function to format the retrieved documents, gotten from the retriever

def format_docs(docs):
    print("docs:", docs)
    print()
    return "\n\n".join(doc.page_content for doc in docs)


# Making a custon prompt which had two variables, "context" and question

# Note:This prompt_template expects a dictionary/JSON with the keys "context" and "question" as input

from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate

prompt_template = ChatPromptTemplate.from_messages([
    HumanMessagePromptTemplate(
        prompt=PromptTemplate(
            input_variables=["context", "question"],
            template=( # The constructed prompt from the variables
                "You are an assistant for question-answering tasks. Use the following "
                "pieces of retrieved context to answer the question. If you don't know "
                "the answer, just say that you don't know.\n\n"
                "Keep the answer concise but answer completely.\n\n"
                "Give the exact line of context if you are asked for some sort of justification for your response.\n\n"
                
                "Question: {question}\n"
                "Context: {context}\n"
                "Answer:"
            )
            
        )
    )
])

# A simple function that logs the input and returns it

def logger(input):
    # print(input)
    return input


# A chain with the modified prompt

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# The chain simply looks likes this:
rag_chain = (
    {
        "context": retriever | format_docs | logger,
        "question": RunnablePassthrough()
    }
    | prompt_template
    | llm
    | StrOutputParser()
)

# respnse = rag_chain.invoke("Tell me about the paper: Attention is all you Need")

In [14]:
from IPython.display import Markdown, display

respnse = rag_chain.invoke("How many credit hours in Master's Islamic Banking and how do you know this?")

display(Markdown(respnse))

docs: [Document(id='txt139', metadata={}, page_content='Moreover, the students are encouraged to pick a topic for their thesis/ project with a positive societal impact and include ethical perspectives in their research work. Use of technology and innovative pedagogy\n The MS Finance program incorporates software in student learning including Advanced Excel, VBA, Stata, R, Python etc. Apart from this, workshops, and multiple training sessions on new software are organized for\n graduate students to support in their research endeavors. ---\n PROGRAM\n ANNOUNCEMENT 2024-25 42 Semester-wise sequence of courses edit Course Cr Pre- Course\n Semester -1 (Fall) code hours requisite type\n Master of Science (MS)\n Islamic Banking and Finance\n Islamic Economics ECO510 3 - Cor e\n The MS Islamic Banking and Finance (MS IBF) is a 36 credit hours program which can be completed in a\n minimum duration of 1.5 years.'), Document(id='txt18', metadata={}, page_content='The program is designed to provid

The Master's program in Islamic Banking and Finance has 42 credit hours. I know this because it is stated in the context: "The MS Islamic Banking and Finance (MS IBF) is a 42 credit hours program which can be completed in a minimum duration of 1.5 years." (Line 14)