# HuggingFace Tutorial

In [1]:
print("HuggingFace")

HuggingFace


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## assigning HT token in my environment
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

### Sentence transormer on huggingface

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
text_docs = "This is my test documents"

query = embeddings.embed_query(text_docs)

In [6]:
## dimention of this vector
len(query)

384

In [7]:
text_docs2 = """
This is line one.
This is another one line.
This is another third docs for testing.
"""
query2 = embeddings.embed_documents(text_docs2)

In [8]:
len(query2)

85

In [9]:
query3 = embeddings.embed_documents([text_docs, "This is another docs for testings."])
print(query3)
print(len(query3))

[[-0.019978979602456093, 0.09490571171045303, -0.0383366197347641, 0.041137173771858215, 0.04151945933699608, -0.05657500773668289, -0.021200846880674362, 0.09389421343803406, -0.053266000002622604, 0.03349548950791359, 0.07310359179973602, -0.010477759875357151, 0.030516160652041435, 0.0031025498174130917, -0.08897648006677628, -0.03770797699689865, -0.04630402848124504, -0.02641266956925392, -0.029461082071065903, 0.05675427243113518, 0.002856799867004156, 0.06412481516599655, -0.052435267716646194, -0.03617994487285614, 0.009556945413351059, 0.04497779533267021, -0.030463887378573418, 0.010382283478975296, 0.05111128091812134, -0.06933780014514923, -0.015598238445818424, 0.041700299829244614, 0.07288982719182968, 0.027851222082972527, 0.10267244279384613, -0.015121329575777054, 0.056198153644800186, -0.020520120859146118, 0.03983450308442116, 0.015093679539859295, -0.02232820726931095, -0.11222531646490097, 0.05854326859116554, 0.026832660660147667, 0.04934829846024513, 0.0032935952

## VectoreStore FAISS

In [10]:
# this will help to load the text file
from langchain_community.document_loaders import TextLoader
# this will help to store text vectore in FAISS store databse
from langchain_community.vectorstores import FAISS
## this will help to convert text to vectore
from langchain_community.embeddings import OllamaEmbeddings
# this will help to split the text
from langchain_text_splitters import CharacterTextSplitter

## load the text file
loader = TextLoader("data/story.txt")


In [11]:
## load and read the text
document =loader.load()
## split the text documnets
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=30)
## now it split the text
docs = text_splitter.split_documents(document)

In [12]:
## i will not be using OllamaEmbedding here because i have not installed Ollama in my machine
# so this is been usign HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()
db = FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x7daba046bb00>

In [13]:
## querying
query  = "rabbit woke up and stretched, and what is he realized"
result = db.similarity_search(query)
result

[Document(metadata={'source': 'data/story.txt'}, page_content="time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseveran

In [14]:
## page content
result[0].page_content

"time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.\nAfter a while, the r

## Retriver

In [15]:
retriver = db.as_retriever()
result = retriver.invoke(query)
result[0].page_content

"time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.\nAfter a while, the r

## Similarity search with score

In [16]:
## it will result with similarity score
docs_and_score = db.similarity_search_with_score(query)
docs_and_score

[(Document(metadata={'source': 'data/story.txt'}, page_content="time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and persevera

In [17]:
embedding_vector=embeddings.embed_query(query)
print(len(embedding_vector))

768


In [18]:
## query by vectoy
docs = db.similarity_search_by_vector(embedding_vector)
docs[0].page_content

"time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.\nAfter a while, the r

## saving and loading

In [19]:
## save this faiss db in local machine
db.save_local('faiss_index')

In [20]:
## load this saved faiss db form local machine
new_db  =FAISS.load_local('faiss_index', embeddings, allow_dangerous_deserialization=True)

In [21]:
docs = new_db.similarity_search_by_vector(embedding_vector)
docs[0].page_content

"time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.\nAfter a while, the r

## ChromaDB

In [22]:
## in my local pc sqllite3 was not supported so then below code is the solution of this problems
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

## building same vector db
from langchain_chroma import Chroma

In [23]:
## building a sample vectores
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [24]:
loader = TextLoader('data/story.txt')
data=loader.load()
data

[Document(metadata={'source': 'data/story.txt'}, page_content="time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow.\non the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseveran

In [25]:
## text spltter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)
splits

[Document(metadata={'source': 'data/story.txt'}, page_content="time, in a lush green forest, there lived a speedy rabbit and a slow-moving turtle. The rabbit, confident in his speed, often boasted about how fast he could run. The turtle, tired of the rabbit's bragging, challenged him to a race.\nThe other animals gathered to watch. They were curious to see if the slow turtle could beat the quick rabbit. With a wink and a grin, the rabbit accepted the challenge. They decided to race to the big oak tree on the other side of the meadow."),
 Document(metadata={'source': 'data/story.txt'}, page_content='on the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the s

In [26]:
## create vector store
embeddings = HuggingFaceEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embeddings)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x7daba21a2750>

In [27]:
query = 'rabbit woke up and stretched, and what is he realized'
docs = vectordb.similarity_search(query)
docs[0].page_content

'on the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.'

In [28]:
## save the vectordb in in local machine in particular directory
vectordb = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="./chroma_db")

In [29]:
new_vectordb = Chroma(persist_directory='./chroma_db', embedding_function=embeddings)
query_result = new_vectordb.similarity_search(query=query)
print(query_result[0].page_content)

on the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.
Meanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.


In [30]:
## similarity search with socre
result_with_score = new_vectordb.similarity_search_with_score(query=query)
result_with_score

[(Document(metadata={'source': 'data/story.txt'}, page_content='on the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.'),
  0.8649135701278006),
 (Document(metadata={'source': 'data/story.txt'}, page_content='on the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn

In [31]:
retriever = new_vectordb.as_retriever(query=query)
retr_result = retriever.invoke(query)
retr_result[0].page_content

'on the day of the race, the rabbit zoomed off as soon as the signal was given. He dashed ahead, laughing at how far he was pulling away from the turtle. Feeling overly confident, the rabbit decided to take a nap under a shady tree, believing he had plenty of time to spare.\nMeanwhile, the turtle ambled along at his slow but steady pace. He didn’t stop or get distracted. As he passed by the sleeping rabbit, he thought about how determination and perseverance mattered, even if he was much slower.'