In [2]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_groq import ChatGroq

_ = load_dotenv(find_dotenv())

# api for groq
GROQ_API = os.getenv("GROQ_API")
# api for openai
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

chat_groq_model = ChatGroq(
    model='llama-3.1-70b-versatile',
    api_key = GROQ_API,
    temperature= 0.7,
    max_retries=3,
)

In [17]:
from langchain_community.document_loaders import TextLoader,PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

In [21]:
# load data

text_documents = PyPDFLoader('data/John_F_Kennedy.pdf').load()

# split documents
text_splitter = CharacterTextSplitter(chunk_size = 1000,
                                      chunk_overlap = 200)

chunk_of_text = text_splitter.split_documents(text_documents)

In [22]:
chunk_of_text

[Document(metadata={'source': 'data/John_F_Kennedy.pdf', 'page': 0}, page_content="John F. Kennedy\nOval Office portrait, 1963\n35th President of the United States\nIn office\nJanuary 20, 1961 – November 22, 1963\nVice President Lyndon B. Johnson\nPreceded by Dwight D. Eisenhower\nSucceeded by Lyndon B. Johnson\nUnited States Senator\nfrom Massachusetts\nIn office\nJanuary 3, 1953 – December 22, 1960\nPreceded by Henry Cabot Lodge Jr.\nSucceeded by Benjamin A. Smith II\nMember of the U.S. House of Representatives\nfrom Massachusetts's 11th district\nIn office\nJanuary 3, 1947 – January 3, 1953\nPreceded by James Michael Curley\nSucceeded by Tip O'Neill\nPersonal details\nBorn John Fitzgerald Kennedy\nMay 29, 1917\nBrookline, Massachusetts,\nU.S.\nJohn F. Kennedy\nJohn Fitzgerald Kennedy (May 29, 1917 –\nNovember 22, 1963), often referred to as JFK, was an\nAmerican politician who served as the 35th president\nof the United States from 1961 until his assassination\nin 1963. He was the y

In [19]:
vector_db = Chroma.from_documents(chunk_of_text, OpenAIEmbeddings())

In [9]:
question = "who is dswithbappy?"
response = vector_db.similarity_search(question)
print(response[0].page_content)

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


dswithbappy focuses on providing content on Data Science, Artificial Intelligence, Machine Learning, 
Deep Learning, Computer Vision, Natural language processing, Python programming, etc. in English. 
"Real creativity won't make things more complex. Instead, I will simplify them." 
I have been working in the Data Science field, spearheading in machine learning, 
deep learning, and computer vision. Typically this is my YouTube channel where I clarify 
different themes on machine learning, deep learning, and AI with numerous real-world issue scenarios. 
My primary point is to create everybody commonplace of ML and AI. Please subscribe and support the channel. 
As I come up with new innovations. These recordings are free and I guarantee to create more curiously substance as we go ahead.
ðŸ˜€Please donate if you want to support the channel through Buy me a coffee: https://www.buymeacoffee.com/dswithbappy

Please feel free to drop a mail for any Item unboxing and any other collaboration.


## FAISS

In [10]:
from langchain_community.vectorstores import FAISS


vector_db_F =  FAISS.from_documents(chunk_of_text,OpenAIEmbeddings())

In [11]:
retriever = vector_db_F.as_retriever()

In [12]:
retriever.invoke("question")

[Document(id='fabdfd4c-4e9c-4035-b34e-35242fec04d7', metadata={'source': 'data/prince.txt'}, page_content='dswithbappy focuses on providing content on Data Science, Artificial Intelligence, Machine Learning, \nDeep Learning, Computer Vision, Natural language processing, Python programming, etc. in English. \n"Real creativity won\'t make things more complex. Instead, I will simplify them." \nI have been working in the Data Science field, spearheading in machine learning, \ndeep learning, and computer vision. Typically this is my YouTube channel where I clarify \ndifferent themes on machine learning, deep learning, and AI with numerous real-world issue scenarios. \nMy primary point is to create everybody commonplace of ML and AI. Please subscribe and support the channel. \nAs I come up with new innovations. These recordings are free and I guarantee to create more curiously substance as we go ahead.\nðŸ˜€Please donate if you want to support the channel through Buy me a coffee: https://www

In [14]:
retriever_k = vector_db_F.as_retriever(search_kwargs ={"k":1})

In [15]:
retriever_k.invoke("question")

[Document(id='fabdfd4c-4e9c-4035-b34e-35242fec04d7', metadata={'source': 'data/prince.txt'}, page_content='dswithbappy focuses on providing content on Data Science, Artificial Intelligence, Machine Learning, \nDeep Learning, Computer Vision, Natural language processing, Python programming, etc. in English. \n"Real creativity won\'t make things more complex. Instead, I will simplify them." \nI have been working in the Data Science field, spearheading in machine learning, \ndeep learning, and computer vision. Typically this is my YouTube channel where I clarify \ndifferent themes on machine learning, deep learning, and AI with numerous real-world issue scenarios. \nMy primary point is to create everybody commonplace of ML and AI. Please subscribe and support the channel. \nAs I come up with new innovations. These recordings are free and I guarantee to create more curiously substance as we go ahead.\nðŸ˜€Please donate if you want to support the channel through Buy me a coffee: https://www