In [23]:
# @title
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_google_genai import ChatGoogleGenerativeAI  
from langchain.prompts import PromptTemplate


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:

loader = PyPDFLoader("../azure.pdf")
docs = loader.load()

In [4]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=100)

In [5]:

documents = text_splitter.split_documents(docs)

In [6]:
documents[10].page_content

'convincing me to write this title. The book improved in manifold ways through valuable comments from all the reviewers, time and again. Adrian Raposo did a commendable job helping develop the content as well as coordinating the overall project management. This book would not have been in its current shape had it not received the perfect touch of the technical editor, Abhishek Kotian, and also all the proofreaders.\nSpecial thanks to my colleagues, Kamal and Mahananda. Kamal took time to get'

In [22]:
type(documents)

list

In [7]:
import os
gemini_api_key = os.environ['google_api_key']

In [8]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=gemini_api_key)

In [9]:
llm = ChatGoogleGenerativeAI( model="gemini-1.5-flash",google_api_key=gemini_api_key)

In [10]:
vector_db = FAISS.from_documents(documents,embeddings)

In [11]:
retriever = vector_db.as_retriever()

In [26]:
from langchain_core.output_parsers import JsonOutputParser

In [24]:
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)

In [44]:
retrieval_grader = prompt | llm | JsonOutputParser()
question = "what is Evaluation metric"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

{'score': 'yes'}


In [45]:
docs[1].page_content

'evaluate model  86\nevaluate recommender  135evaluation metrics, classification\nabout  92\naccuracy  93\nArea Under the Curve (AUC)  95\nF1 score  94false negative  93false positive  92\nmatric  96\nprecision  94\nrecall  94receiver operating characteristics  \n(ROC) graph  95\nthreshold  94true negative  93true positive  92\nExecute Python Script module  143-145Execute R Script module  149, 150experiment\npreparing, to publish  156\nF'

In [12]:
template = """
You are a PDF document expert specializing in extracting accurate answers from complex texts.
Utilize the provided context to deliver precise and concise answers.

Context:
{context}

Provide a well-informed and detailed answer based on the context:

"""

In [13]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", template),
        ("human", "{input}"),
    ]
)

In [14]:
chain = create_stuff_documents_chain(llm,prompt)

In [15]:
rag_chain = create_retrieval_chain(retriever, chain)

In [48]:
results = rag_chain.invoke({"input": "What is  Evaluation metrics?"})
print(results)

{'input': 'What is  Evaluation metrics?', 'context': [Document(metadata={'source': '../azure.pdf', 'page': 120}, page_content='metrics that are defined. Often, one metric may not be sufficient to take a decision. To start with, you may look at accuracy, but at times it might be deceptive. Consider a case where you are making a prediction for a rare disease where in reality, 99 percent negative cases and 1 percent of positive cases appear. If your classification model predicts all the cases as true negatives, then the accuracy is still 99 percent. In this case, the F1 score might be useful as it would give you a clear'), Document(metadata={'source': '../azure.pdf', 'page': 116}, page_content="Consider a case where you need to predict the housing price not as a number, but as \ncategories, such as greater than 100K or less than 100K. In this case, though you are predicting the housing price, you are indeed predicting a class or category for the \nhousing price and hence, it's a classific

In [19]:
while True:
    user_question = input("Enter your question (or type 'exit' to quit): ")
    if user_question.lower() == 'exit':
        print("Exiting...")
        break
    print(user_question)
    results = rag_chain.invoke({"input": user_question})
    print("Answer:", results['answer'])

what is linear regression
Answer: Linear regression is a regression algorithm used in ML Studio. It aims to fit a line to a dataset, making it a popular and historically significant regression method. 

ML studio pros and cons
Answer: ## ML Studio: Pros and Cons

**Pros:**

* **Ease of Use:** ML Studio provides a visual, drag-and-drop interface, making it easy to build, test, and deploy predictive models without extensive coding knowledge.
* **Comprehensive Platform:** It offers a complete platform for predictive analytics, encompassing development, testing, and deployment.
* **Cloud-Based:** Being browser-based and hosted on Azure, ML Studio offers accessibility from any modern browser and eliminates the need for local software installations.
* **Collaborative Environment:** ML Studio enables sharing your work with others, facilitating collaboration and knowledge sharing. 

**Cons:**

* **Limited Flexibility:** While providing a user-friendly interface, ML Studio's out-of-the-box modu