In [27]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="all-minilm"
) # By default uses the llama2 model:b

In [28]:
r1=embeddings.embed_documents([
    "Hello world",
    "Bye world"
])

In [29]:
len(r1[0])

384

In [30]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")


In [31]:
# from langchain_huggingface import HuggingFaceEmbeddings

# embeddings = HuggingFaceEmbeddings(
#     model_name="all-MiniLM-L6-v2"
# )

# result = embeddings.embed_documents(["Hello world", "Bye world"])
# print(result)


In [43]:
# VectorStore FAISS = Facebook AI Similarity Search

from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings


loader=TextLoader("sample.txt")
documents=loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs=text_splitter.split_documents(documents)

for doc in docs:
    print(doc)



page_content='OpenAI’s GPT-4 Turbo model offers a cost-effective alternative to GPT-4 with higher performance and a significantly extended context length of up to 128,000 tokens. While it supports most of the same capabilities as GPT-4, Turbo has usage limits depending on the API plan. For example, the free-tier ChatGPT users can send up to 40 messages every 3 hours, while Plus users are allotted up to 100 messages per 3 hours. These limits are enforced to balance performance across users and may change depending on demand and system stability. Additionally, developers integrating Turbo via the API may encounter rate limits such as tokens-per-minute (TPM) and requests-per-minute (RPM), which can be adjusted by applying for rate limit increases.' metadata={'source': 'sample.txt'}


In [None]:
embeddings = OllamaEmbeddings(
    model="all-minilm"
)

vectorstore = FAISS.from_documents(docs, embeddings)
vectorstore.save_local("faiss_index")
# vectorstore = FAISS.load_local("faiss_index", embeddings)

In [46]:
query = "How does GPT-4 Turbo compare to GPT-4 in terms of training data, update frequency, and architectural differences?"
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)


OpenAI’s GPT-4 Turbo model offers a cost-effective alternative to GPT-4 with higher performance and a significantly extended context length of up to 128,000 tokens. While it supports most of the same capabilities as GPT-4, Turbo has usage limits depending on the API plan. For example, the free-tier ChatGPT users can send up to 40 messages every 3 hours, while Plus users are allotted up to 100 messages per 3 hours. These limits are enforced to balance performance across users and may change depending on demand and system stability. Additionally, developers integrating Turbo via the API may encounter rate limits such as tokens-per-minute (TPM) and requests-per-minute (RPM), which can be adjusted by applying for rate limit increases.


In [52]:
# Retriver QA

retriver= vectorstore.as_retriever()
docs=retriver.invoke(query)
docs[0].page_content


docs_and_scores = vectorstore.similarity_search_with_score(query)
print(docs_and_scores)

[(Document(id='b6f85280-cf15-4713-8531-fcea561aa7e4', metadata={'source': 'sample.txt'}, page_content='OpenAI’s GPT-4 Turbo model offers a cost-effective alternative to GPT-4 with higher performance and a significantly extended context length of up to 128,000 tokens. While it supports most of the same capabilities as GPT-4, Turbo has usage limits depending on the API plan. For example, the free-tier ChatGPT users can send up to 40 messages every 3 hours, while Plus users are allotted up to 100 messages per 3 hours. These limits are enforced to balance performance across users and may change depending on demand and system stability. Additionally, developers integrating Turbo via the API may encounter rate limits such as tokens-per-minute (TPM) and requests-per-minute (RPM), which can be adjusted by applying for rate limit increases.'), np.float32(12.031931))]


In [55]:
## Using Chromadb as a vectorstore

from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="all-minilm"
)

docsearch = Chroma.from_documents(documents, embeddings)    

query = "?"
docs = docsearch.similarity_search(query)
print(docs[0].page_content)

OpenAI’s GPT-4 Turbo model offers a cost-effective alternative to GPT-4 with higher performance and a significantly extended context length of up to 128,000 tokens. While it supports most of the same capabilities as GPT-4, Turbo has usage limits depending on the API plan. For example, the free-tier ChatGPT users can send up to 40 messages every 3 hours, while Plus users are allotted up to 100 messages per 3 hours. These limits are enforced to balance performance across users and may change depending on demand and system stability. Additionally, developers integrating Turbo via the API may encounter rate limits such as tokens-per-minute (TPM) and requests-per-minute (RPM), which can be adjusted by applying for rate limit increases.
