In [3]:

from langchain_groq import ChatGroq
from dotenv import load_dotenv

import os

load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=groq_api_key,
    max_tokens=500
)


In [4]:
llm.invoke([{"role": "user", "content": "What is langchain?"}])

AIMessage(content="LangChain is an open-source framework for building conversational AI systems. It's built on top of the Python programming language, and it allows developers to create more sophisticated conversational models by leveraging LLMs (Large Language Models) like the ones provided by models like Llama, LLaMA, and more.\n\nLangChain is designed to work seamlessly with LLMs and offers a variety of tools and features that make it easier to integrate these powerful models into real-world applications. Some of the key features of LangChain include:\n\n1. **Conversational APIs**: LangChain provides a simple way to create conversational APIs that can be used to interact with LLMs. This allows developers to build conversational interfaces that can understand and respond to user input in a more natural way.\n2. **Task Management**: LangChain includes a task management system that allows developers to break down complex conversational tasks into smaller, more manageable pieces. This m

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# The model name is loaded from the sentence-transformers library
model_name = "sentence-transformers/all-MiniLM-l6-v2"

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
)

text = "This is a test document."
doc_embeddings = embeddings.embed_documents([text, "Another document"])
query_embedding = embeddings.embed_query(text)



  embeddings = HuggingFaceEmbeddings(


In [7]:
query_embedding

[-0.038338541984558105,
 0.12346471846103668,
 -0.02864297851920128,
 0.05365270376205444,
 0.008845366537570953,
 -0.03983934596180916,
 -0.07300589233636856,
 0.04777132719755173,
 -0.030462471768260002,
 0.05497974902391434,
 0.08505292981863022,
 0.03665666654706001,
 -0.005319973453879356,
 -0.002233141800388694,
 -0.06071099638938904,
 -0.027237920090556145,
 -0.01135166734457016,
 -0.042437683790922165,
 0.00912993960082531,
 0.10081552714109421,
 0.07578728348016739,
 0.06911715865135193,
 0.009857431054115295,
 -0.0018377641681581736,
 0.02624903991818428,
 0.03290243074297905,
 -0.07177437096834183,
 0.028384247794747353,
 0.06170954555273056,
 -0.052529532462358475,
 0.033661652356386185,
 0.07446812838315964,
 0.07536034286022186,
 0.03538404777646065,
 0.06713404506444931,
 0.010798045434057713,
 0.08167017996311188,
 0.016562897711992264,
 0.03283063694834709,
 0.036325663328170776,
 0.0021727988496422768,
 -0.09895738214254379,
 0.0050467848777771,
 0.05089650675654411,


In [8]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Ng_MLY07 2.pdf")
docs = loader.load()

In [9]:
docs

[Document(metadata={'producer': 'Skia/PDF m68', 'creator': 'PyPDF', 'creationdate': '', 'source': 'Ng_MLY07 2.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content=''),
 Document(metadata={'producer': 'Skia/PDF m68', 'creator': 'PyPDF', 'creationdate': '', 'source': 'Ng_MLY07 2.pdf', 'total_pages': 7, 'page': 1, 'page_label': '2'}, page_content='Machine Learning Yearning is a   \n deeplearning.ai project.  \n  \n  \n  \n  \n  \n  \n  \n © 2018 Andrew Ng. All Rights Reserved.  \n  \n \xa0 \n \xa0 \n Page 2 Machine Learning Yearning-Draft Andrew Ng'),
 Document(metadata={'producer': 'Skia/PDF m68', 'creator': 'PyPDF', 'creationdate': '', 'source': 'Ng_MLY07 2.pdf', 'total_pages': 7, 'page': 2, 'page_label': '3'}, page_content='Comparing to  \n human-level  \n performance  \n \xa0 \n \xa0 \n   \n Page 3 Machine Learning Yearning-Draft Andrew Ng'),
 Document(metadata={'producer': 'Skia/PDF m68', 'creator': 'PyPDF', 'creationdate': '', 'source': 'Ng_MLY07 2.pdf', 'total_pages'

In [10]:
extracted_text = [
    {
        "source": doc.metadata["source"],
        "page_content": doc.page_content
        
    }
    for doc in docs
]

    

In [11]:
extracted_text

[{'source': 'Ng_MLY07 2.pdf', 'page_content': ''},
 {'source': 'Ng_MLY07 2.pdf',
  'page_content': 'Machine Learning Yearning is a   \n deeplearning.ai project.  \n  \n  \n  \n  \n  \n  \n  \n © 2018 Andrew Ng. All Rights Reserved.  \n  \n \xa0 \n \xa0 \n Page 2 Machine Learning Yearning-Draft Andrew Ng'},
 {'source': 'Ng_MLY07 2.pdf',
  'page_content': 'Comparing to  \n human-level  \n performance  \n \xa0 \n \xa0 \n   \n Page 3 Machine Learning Yearning-Draft Andrew Ng'},
 {'source': 'Ng_MLY07 2.pdf',
  'page_content': '33 Why we compare to human-level  \n performance   \n \xa0 \n Many machine learning systems aim to automate things that humans do well. Examples  \n include image recognition, speech recognition, and email spam classification. Learning  \n algorithms have also improved so much that we are now surpassing human-level  \n performance on more and more of these tasks.   \n Further, there are several reasons building an ML system is easier if you are trying to do a  \n task

In [23]:
from langchain_text_splitters import RecursiveCharacterTextSplitter 

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=50
)

text_splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x73d88038d760>

In [31]:
from langchain_core.documents import Document

all_chunks = []
for doc in docs:
    chunks = text_splitter.split_text(doc.page_content)
    #print(f"Document has been split into {len(chunks)} chunks.")
    for i,chunk in enumerate(chunks):
        chunk_doc = Document(
            page_content=chunk,
            metadata={"source": doc.metadata["source"], "chunk": i}
        )
        all_chunks.append(chunk_doc)

In [34]:
all_chunks

[Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 0}, page_content='Machine Learning Yearning is a   \n deeplearning.ai project.'),
 Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 1}, page_content='deeplearning.ai project.  \n  \n  \n  \n  \n  \n  \n  \n © 2018 Andrew Ng. All Rights Reserved.'),
 Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 2}, page_content='© 2018 Andrew Ng. All Rights Reserved.'),
 Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 3}, page_content='Page 2 Machine Learning Yearning-Draft Andrew Ng'),
 Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 0}, page_content='Comparing to  \n human-level  \n performance'),
 Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 1}, page_content='human-level  \n performance  \n \xa0 \n \xa0 \n   \n Page 3 Machine Learning Yearning-Draft Andrew Ng'),
 Document(metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 0}, page_content='33 Why we compare to human-level  \n performance'),
 Document(met

In [35]:
print(f"Created {len(all_chunks)} chunks")


Created 100 chunks


In [36]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# The model name is loaded from the sentence-transformers library
model_name = "sentence-transformers/all-MiniLM-l6-v2"

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
)

In [None]:
from langchain_community.vectorstores import FAISS

victor_store = FAISS.from_documents(all_chunks, embeddings)

victor_store.save_local("faiss_medical")
# ret = FAISS.load_local("faiss_medical", embeddings)


In [38]:
retriver = victor_store.as_retriever(search_type="similarity", search_kwargs={"k": 2})

In [44]:
response = retriver.invoke('whats human-level performance')
response[1].page_content

'34 How to define human-level performance'

In [45]:
query = "whats human-level performance?"

docs_and_scores = retriver.vectorstore.similarity_search_with_score(query, k=4)

for doc, score in docs_and_scores:
    print("--------")
    print("Score:", score)
    print("Metadata:", doc.metadata)
    print(doc.page_content[:400])

--------
Score: 0.117998704
Metadata: {'source': 'Ng_MLY07 2.pdf', 'chunk': 17}
human-level performance.
--------
Score: 0.25098738
Metadata: {'source': 'Ng_MLY07 2.pdf', 'chunk': 0}
Comparing to  
 human-level  
 performance
--------
Score: 0.25807592
Metadata: {'source': 'Ng_MLY07 2.pdf', 'chunk': 0}
34 How to define human-level performance
--------
Score: 0.2812174
Metadata: {'source': 'Ng_MLY07 2.pdf', 'chunk': 9}
previous chapter for comparing to human-level performance apply:


In [None]:
from langchain_core.prompts import PromptTemplate

prompt_template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, say that you don't know.
Use three sentences maximum and keep the answer concise.

Question: {question}
Context: {context}
Answer:
"""

prompt = PromptTemplate.from_template(prompt_template)

print(prompt)


input_variables=['context', 'input'] input_types={} partial_variables={} template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved context to answer the question.\nIf you don't know the answer, say that you don't know.\nUse three sentences maximum and keep the answer concise.\n\nQuestion: {input}\nContext: {context}\nAnswer:\n"


In [53]:
def get_context(question):
    docs = retriver.invoke(question)

    return "\n\n".join(doc.page_content for doc in docs)
context = get_context("whats Error analysis?")
context

'• It is hard to know what the optimal error rate and reasonable desired error\n\n• Error analysis can draw on human intuition. \u200b By discussing images with a team of'

In [56]:
from langchain_core.runnables import RunnableParallel,RunnablePassthrough

runable = (RunnableParallel(
    {'context':get_context,
    'question':RunnablePassthrough()
    }
))

chain = runable | prompt | llm

response = chain.invoke('whats human-level performance?')
response.content

'Human-level performance refers to a level of performance that matches or exceeds that of a typical human in a particular task or domain. It is often used as a benchmark for artificial intelligence and machine learning systems, indicating that they have achieved a level of competence similar to that of a human. Achieving human-level performance is considered a significant milestone in AI development.'

In [67]:
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains.retrieval import create_retrieval_chain

combine_docs_chain = create_stuff_documents_chain(llm, prompt)

response = create_retrieval_chain(retriver, combine_docs_chain)
response

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x73d735ffb260>, search_kwargs={'k': 2}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved context to answer the question.\nIf you don't know the answer, say that you don't know.\nUse three sentences maximum and keep the answer concise.\n\nQuestion: {input}\nConte

In [69]:
response.invoke({
    "input": "What is Machine Learning Yearning?",
    "context": get_context("What is Machine Learning Yearning?")
})


{'input': 'What is Machine Learning Yearning?',
 'context': [Document(id='c2aa4ddc-8298-44bb-9184-2e17f675396e', metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 0}, page_content='Machine Learning Yearning is a   \n deeplearning.ai project.'),
  Document(id='bb8ac637-ddcd-450a-a964-53f91be4adeb', metadata={'source': 'Ng_MLY07 2.pdf', 'chunk': 1}, page_content='Many machine learning systems aim to automate things that humans do well. Examples')],
 'answer': 'Machine Learning Yearning is a project by deeplearning.ai that helps learners understand how to approach machine learning problems. It aims to provide a framework for tackling complex machine learning challenges. The project focuses on the process of machine learning, not just the techniques.'}