In [1]:
!pip install chromadb tiktoken transformers sentence_transformers openai langchain pypdf

Collecting chromadb
  Downloading chromadb-0.4.19-py3-none-any.whl (505 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.0/506.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting openai
  Downloading openai-1.3.9-py3-none-any.whl (221 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m221.4/221.4 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.0.350-py3-none-any.whl (809 kB)
[2K     [9

In [2]:
import openai
import os

In [3]:
os.environ['OPENAI_API_KEY'] = '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'

In [4]:
import tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

def tiktoken_len(text):
  tokens = tokenizer.encode(text)
  return len(tokens)

In [5]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader

In [7]:
loader = PyPDFLoader('/content/DJI_Osmo_Pocket_3_User_Manual_v1.0_en.pdf')
pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50, length_function=tiktoken_len)
texts = text_splitter.split_documents(pages)

In [10]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = 'jhgan/ko-sbert-nli'
model_kwargs = {'device':'cpu'}
encode_kwargs ={'normalize_embeddings': True}
hf = HuggingFaceEmbeddings(
    model_name = model_name,
    model_kwargs = model_kwargs,
    encode_kwargs = encode_kwargs
)

In [11]:
docsearch = Chroma.from_documents(texts, hf)

In [13]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

openai = ChatOpenAI(
            model_name = 'gpt-3.5-turbo',
            streaming=True, callbacks=[StreamingStdOutCallbackHandler()],
            temperature=0
)

qa = RetrievalQA.from_chain_type(llm = openai,
                                 chain_type='stuff',
                                 retriever = docsearch.as_retriever(
                                     search_type='mmr',
                                     search_kwargs={'k':3, 'fetch_k':10}),
                                 return_source_documents=True)

In [16]:
query='Is Pocket 3 capable of 4K shooting?'
result=qa(query)

Yes, the DJI Osmo Pocket 3 is capable of shooting in 4K resolution. It can shoot in 4K at 3840x2160 resolution at various frame rates such as 24, 25, and 30fps.

In [17]:
result

{'query': 'Is Pocket 3 capable of 4K shooting?',
 'result': 'Yes, the DJI Osmo Pocket 3 is capable of shooting in 4K resolution. It can shoot in 4K at 3840x2160 resolution at various frame rates such as 24, 25, and 30fps.',
 'source_documents': [Document(page_content='DJI Osmo Pocket 3 User Manual© 2023 DJI All Rights Reserved. \u200317Anti-Flicker Tap to select the anti-flicker frequency to reduce flicker caused \nby fluorescent lights or TV screens when shooting indoors. \nSelect the anti-flicker frequency according to the power grid \nfrequency in the region. The default anti-flicker frequency is set \nas Auto.\nTimecode Tap to set Timecode for the camera. Either reset the timecode \nor sync with the system time. The camera timecode can be \nsynchronized by a time code synchronizer using the USB-C port.\nNaming Management Tap to edit the naming rules for storage folders and files.\nScrn Off When Rec Tap to set the time. After recording starts, the screen will turn off \nafter the se