In [14]:
!pip install langchain chromadb tiktoken transformers sentence_transformers pypdf

Collecting pypdf
  Downloading pypdf-3.17.2-py3-none-any.whl (277 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m277.9/277.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pypdf
Successfully installed pypdf-3.17.2


In [9]:
import tiktoken
from langchain.text_splitter import RecursiveCharacterTextSplitter

tokenizer = tiktoken.get_encoding('cl100k_base')

def tiktoken_len(text):
  tokens = tokenizer.encode(text)
  return len(tokens)

In [21]:
#from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader('/content/DJI_Osmo_Pocket_3_User_Manual_v1.0_en.pdf')
pages = loader.load_and_split()

In [22]:
# split it into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
    length_function=tiktoken_len
)

docs = text_splitter.split_documents(pages)

In [23]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = 'jhgan/ko-sbert-nli'
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings':True}
hf = HuggingFaceEmbeddings(
    model_name = model_name,
    model_kwargs = model_kwargs,
    encode_kwargs = encode_kwargs
)

In [24]:
db = Chroma.from_documents(docs, hf)

In [26]:
# query it
query = '4k recording'
docs = db.similarity_search(query)

print(docs[0].page_content)

DJI Osmo Pocket 3 User Manual© 2023 DJI All Rights Reserved.  29Audio-to-video Sync When enabled, the transmitter will record audio files independently 
while the camera is recording video. The recorded audio format 
of the transmitter is 24-bit mono WAV. When recording for an 
extended period, the audio file will be separated automatically every 
30 minutes. The transmitter stops recording when the storage is full.
Low Cut When enabled, the transmitter will automatically filter low-frequency 
(below 150 Hz) sounds, thereby making recordings cleaner.
32-Bit Float Recording When enabled, the transmitter can independently record audio 
files in 32-bit float, which provides larger dynamic range in post-
production audio correction.
Format Transmitter 1/
Transmitter 2Tap to format transmitter 1 or transmitter 2. Formatting will 
permanently delete all data on transmitter 1 or transmitter 2. Make 
sure to back up all required data before formatting.
Transmitter Version Displays the firmware

In [27]:
# save to disk
db2 = Chroma.from_documents(docs, hf , persist_directory='./chroma_db')
docs = db2.similarity_search(query)

In [28]:
db3 = Chroma(persist_directory='./chroma_db', embedding_function=hf)
docs= db3.similarity_search(query)

print(docs[0].page_content)

DJI Osmo Pocket 3 User Manual© 2023 DJI All Rights Reserved.  29Audio-to-video Sync When enabled, the transmitter will record audio files independently 
while the camera is recording video. The recorded audio format 
of the transmitter is 24-bit mono WAV. When recording for an 
extended period, the audio file will be separated automatically every 
30 minutes. The transmitter stops recording when the storage is full.
Low Cut When enabled, the transmitter will automatically filter low-frequency 
(below 150 Hz) sounds, thereby making recordings cleaner.
32-Bit Float Recording When enabled, the transmitter can independently record audio 
files in 32-bit float, which provides larger dynamic range in post-
production audio correction.
Format Transmitter 1/
Transmitter 2Tap to format transmitter 1 or transmitter 2. Formatting will 
permanently delete all data on transmitter 1 or transmitter 2. Make 
sure to back up all required data before formatting.
Transmitter Version Displays the firmware

In [29]:
docs = db3.similarity_search_with_relevance_scores(query, k=3)

print('가장 유사한 문서: \n\n {}\n\n'.format(docs[0][0].page_content))
print('문서 유사도:\n {}'.format(docs[0][1]))

가장 유사한 문서: 

 DJI Osmo Pocket 3 User Manual© 2023 DJI All Rights Reserved.  29Audio-to-video Sync When enabled, the transmitter will record audio files independently 
while the camera is recording video. The recorded audio format 
of the transmitter is 24-bit mono WAV. When recording for an 
extended period, the audio file will be separated automatically every 
30 minutes. The transmitter stops recording when the storage is full.
Low Cut When enabled, the transmitter will automatically filter low-frequency 
(below 150 Hz) sounds, thereby making recordings cleaner.
32-Bit Float Recording When enabled, the transmitter can independently record audio 
files in 32-bit float, which provides larger dynamic range in post-
production audio correction.
Format Transmitter 1/
Transmitter 2Tap to format transmitter 1 or transmitter 2. Formatting will 
permanently delete all data on transmitter 1 or transmitter 2. Make 
sure to back up all required data before formatting.
Transmitter Version Display

# FAISS

In [30]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


In [31]:
from langchain.vectorstores import FAISS

pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
    length_function=tiktoken_len
)
docs = text_splitter.split_documents(pages)

model_name='jhgan/ko-sbert-nli'
model_kwargs={'device':'cpu'}
encode_kwargs={'normalize_embeddings':True}
ko=HuggingFaceEmbeddings(
    model_name = model_name,
    model_kwargs = model_kwargs,
    encode_kwargs = encode_kwargs
)

In [32]:
db = FAISS.from_documents(docs, ko)

In [33]:
query = 'Who is DJI'
docs = db.similarity_search(query)
print(docs[0].page_content)


If you have any questions about this document, please 
contact DJI by sending a message to DocSupport@dji.com .
DJI and OSMO are trademarks of DJI. 
Copyright © 2023 DJI OSMO All Rights Reserved.WE ARE HERE FOR YOU
Contact 
DJI SUPPORT
https://www.dji.com/osmo-pocket-3/downloadsThis content is subject to change.


In [36]:
docs_and_scores = db.similarity_search_with_relevance_scores(query)
docs_and_scores

[(Document(page_content='If you have any questions about this document, please \ncontact DJI by sending a message to DocSupport@dji.com .\nDJI and OSMO are trademarks of DJI. \nCopyright © 2023 DJI OSMO All Rights Reserved.WE ARE HERE FOR YOU\nContact \nDJI SUPPORT\nhttps://www.dji.com/osmo-pocket-3/downloadsThis content is subject to change.', metadata={'source': '/content/DJI_Osmo_Pocket_3_User_Manual_v1.0_en.pdf', 'page': 39}),
  0.42279477925215225),
 (Document(page_content='DJI Osmo Pocket 3 User Manual© 2023 DJI All Rights Reserved. \u200327DJI Mic 2 Transmitter Status LEDs\n1 2\nDJI Mic 2 Transmitter Connection\nFollow the steps below to connect to DJI Mic 2 (hereinafter referred to as “transmitter”).\n1. Swipe down from the top of the screen and enter the control menu. Tap the settings icon \nand select Wireless Mic > TX1/TX2, and the camera is ready to link with a transmitter.\n2. Press and hold the power button of the transmitter for two seconds to power on.1 Recording Status

In [37]:
db.save_local('faiss_index')

In [None]:
new_db =