In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer, AutoModel
import torch
import chromadb
import os
import gc
import numpy as np

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:512"
torch.backends.cuda.max_split_size_mb = 512
torch.cuda.empty_cache()
gc.collect()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def process_pdf(pdf_file_path, batch_size=4):
   loader = PyPDFLoader(pdf_file_path)
   pages = loader.load()
   text = " ".join([page.page_content for page in pages])
   
   text_splitter = RecursiveCharacterTextSplitter(
       chunk_size=2000,
       chunk_overlap=200,
       length_function=len,
       separators=["\n\n", "\n", " ", ""]
   )
   chunks = text_splitter.split_text(text)
   
   model_path = "/home/jj/G2_Llama-3.1/"
   tokenizer = AutoTokenizer.from_pretrained(model_path)
   tokenizer.pad_token = tokenizer.eos_token
   
   model = AutoModel.from_pretrained(
       model_path,
       torch_dtype=torch.bfloat16,
       low_cpu_mem_usage=True,
   ).to(device)
   
   model.config.pad_token_id = tokenizer.pad_token_id
   model.gradient_checkpointing_enable()
   
   embeddings_list = []
   
   try:
       for i in range(0, len(chunks), batch_size):
           torch.cuda.empty_cache()
           gc.collect()
           
           batch_chunks = chunks[i:i + batch_size]
           inputs = tokenizer(
               batch_chunks,
               return_tensors="pt",
               padding=True,
               truncation=True,
               max_length=256
           ).to(device)
           
           with torch.no_grad(), torch.cuda.amp.autocast():
               outputs = model(**inputs)
               batch_embeddings = outputs.last_hidden_state.mean(dim=1)
               batch_embeddings = batch_embeddings.cpu()
               embeddings_list.append(batch_embeddings)
           
           del outputs
           del inputs
           torch.cuda.empty_cache()
           
           print(f"Processed batch {i//batch_size + 1}/{len(chunks)//batch_size + 1}")
           
   except RuntimeError as e:
       print(f"Error during processing: {e}")
       return None, None
       
   embeddings = torch.cat(embeddings_list, dim=0)
   return embeddings, chunks

client = chromadb.Client()
collection = client.create_collection("pdf_embeddings")

pdf_file_path = "/home/jj/Downloads/G2/Hand-Introduction_English.pdf"
embeddings, chunks = process_pdf(pdf_file_path, batch_size=4)

for i, embedding in enumerate(embeddings):
   embedding_list = embedding.detach().numpy().flatten().tolist()
   collection.add(
       embeddings=[embedding_list],
       documents=[chunks[i]],
       ids=[f"chunk_{i}"]
   )

query = "What is the technical core of this system??"
tokenizer = AutoTokenizer.from_pretrained("/home/jj/G2_Llama-3.1/")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModel.from_pretrained(
   "/home/jj/G2_Llama-3.1/",
   torch_dtype=torch.bfloat16,
   low_cpu_mem_usage=True,
).to(device)
model.config.pad_token_id = tokenizer.pad_token_id

with torch.no_grad(), torch.cuda.amp.autocast():
   query_inputs = tokenizer(
       query,
       return_tensors="pt",
       padding=True,
       truncation=True,
       max_length=256
   ).to(device)
   query_outputs = model(**query_inputs)
   query_embedding = query_outputs.last_hidden_state.mean(dim=1).cpu()
   
query_embedding_list = query_embedding.detach().numpy().flatten().tolist()

results = collection.query(
   query_embeddings=[query_embedding_list],
   n_results=3
)

for doc in results['documents'][0]:
   print("\nRelevant text chunk:")
   print(doc)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00,  7.24it/s]
  with torch.no_grad(), torch.cuda.amp.autocast():


Processed batch 1/2
Processed batch 2/2


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00,  8.72it/s]



Relevant text chunk:
Figure 1. System flow chart
The following is a detailed description of each function.
- User interface: Users can operate through the interface and scan their hands through the camera. When not in 
use, the user can cancel the operation through the interface button, as shown in Figure 2.
Translated from Chinese (Simplified) to English - www.onlinedoctranslator.com Figure 2: User Interface
- Image recognition real-time data processing: The images generated by the scan are processed by the system 
MediapipeImage recognition analyzes the palm, identifies the points at the finger joints and connects the two 
points into a line, calculates the joint angles and generates data, as shown in Figure 3.
Figure 3MediapipeImage recognition (the numbers in the figure are the bending angles)
- Remote control: Utilizing data generated by image recognitionWi-FiBy performing wireless remote 
transmission, users can perform contactless operations regardless of the distance, as shown

  with torch.no_grad(), torch.cuda.amp.autocast():
