# RAG

In [1]:
import os

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import unicodedata

import torch
import pandas as pd
from tqdm import tqdm

import re

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig
)
from accelerate import Accelerator

from langchain_community.document_loaders import JSONLoader
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


device: cuda


## Config

In [2]:
CHUNK_SIZE = 256
CHUNK_OVERLAP = 128

EMBEDDING_MODEL = "intfloat/e5-large"

K = 2

LLM = "google/gemma-2-9b-it"

PROMPT_TEMPLATE = """
You are an AI visual assistant surveillance operator that can analyze real-time traffic analysis and accident detection.

Respond to user's questions as accurately as possible.
Be careful not to answer with false information.

Using the provided caption information, describe the scene in a detailed manner.
{context}

Question: {question}

Answer:
"""

QUANTIZATION = "bf16" # "qlora", "bf16", "fp16"

MAX_NEW_TOKENS = 512

## Vector DB

In [3]:
def process_json(file_path, chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP):
    loader = JSONLoader(
        file_path=file_path,
        jq_schema=".frame.[].caption",
        text_content=False,
    )
    docs = loader.load()
    chunks = docs.copy()
    return chunks

def create_vector_db(chunks, model_path=EMBEDDING_MODEL):
    """FAISS DB"""
    model_kwargs = {'device': 'cuda'}
    encode_kwargs = {'normalize_embeddings': True}
    embeddings = HuggingFaceEmbeddings(
        model_name=model_path,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )
    db = FAISS.from_documents(chunks, embedding=embeddings)
    return db
    
def process_jsons_from_dataframe(unique_paths, base_directory):
    
    json_databases = {}
    
    for path in tqdm(unique_paths, desc="Processing JSONs"):
        
        normalized_path = unicodedata.normalize('NFC', path)
        full_path = os.path.normpath(
            os.path.join(
                base_directory, normalized_path.lstrip('./')
            )
        ) if not os.path.isabs(normalized_path) else normalized_path
        json_title = os.path.splitext(os.path.basename(full_path))[0]
        
        print(f"Processing {json_title}...")
        
        chunks = process_json(full_path)
        db = create_vector_db(chunks)
        
        # Retriever
        
        retriever_similarity = db.as_retriever(
            search_type="similarity",
            search_kwargs={'k': K}
        )

        retriever_mmr = db.as_retriever(
            search_type="mmr",
            search_kwargs={'k': K}
        )

        retriever_bm25 = BM25Retriever.from_documents(chunks)
        
        retriever = EnsembleRetriever(
            retrievers=[retriever_similarity, retriever_mmr, retriever_bm25],
            weights=[0.5, 0.5, 0.5]
        )        
        
        json_databases[json_title] = {
                'db': db,
                'retriever': retriever
        }
    return json_databases

## DB 생성

In [8]:
base_directory = '/home/jiyul/SPS_JY/TeletoVision_demo/models/data/' # Your Base Directory
db_list = {filename: filename for filename in os.listdir(base_directory)}
db_list

{'cam_07.mp4-meta_db.json': 'cam_07.mp4-meta_db.json',
 'demo_1.mp4-verb.json': 'demo_1.mp4-verb.json'}

In [9]:
# base_directory = 'data/' # Your Base Directory
unique_paths = [db_list['cam_07.mp4-meta_db.json']]
json_databases = process_jsons_from_dataframe(unique_paths, base_directory)

Processing JSONs:   0%|          | 0/1 [00:00<?, ?it/s]

Processing cam_07.mp4-meta_db...


Processing JSONs: 100%|██████████| 1/1 [00:04<00:00,  4.85s/it]


## MODEL Import

In [11]:
def setup_llm_pipeline():
    model_id = LLM
    quantization_options = {
        "qlora": {"quantization_config": BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4",
                    bnb_4bit_compute_dtype=torch.bfloat16)},
        "bf16": {"torch_dtype": torch.bfloat16},
        "fp16": {"torch_dtype": "float16"}
    }

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        trust_remote_code=True,
        **quantization_options.get(QUANTIZATION, {})
    )

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.use_default_system_prompt = False

    text_generation_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        # temperature=0.2,
        return_full_text=False,
        max_new_tokens=MAX_NEW_TOKENS,
        # do_sample=True,
    )

    hf = HuggingFacePipeline(pipeline=text_generation_pipeline)

    return hf

In [12]:
llm = setup_llm_pipeline()

Loading checkpoint shards: 100%|██████████| 4/4 [00:15<00:00,  3.86s/it]


### example

In [20]:
json_databases

{'cam_07.mp4-meta_db': {'db': <langchain_community.vectorstores.faiss.FAISS at 0x7fd6589e4a30>,
  'retriever': EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fd6589e4a30>, search_kwargs={'k': 2}), VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fd6589e4a30>, search_type='mmr', search_kwargs={'k': 2}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x7fd658adc220>)], weights=[0.5, 0.5, 0.5])}}

In [17]:
prompt_sample = 'person lying on the ground'
source = 'cam_07.mp4-meta_db'

docs = json_databases[source]['retriever'].invoke(prompt_sample)

for doc in docs:
    print(f"frame : {doc.metadata['seq_num']-1}")
    print(doc.page_content)
    print("=========================================================")

frame : 22
A man is lying on the ground in the middle of a busy street.
frame : 15
A motorcyclist is laying on the ground in the middle of the street.
frame : 17
A motorcycle rider is laying on the ground in the middle of the street.
frame : 24
A motorcycle rider is laying on the ground with his motorcycle on its side.


## Langchain 을 이용한 추론

In [19]:
# docs = json_databases[source]['retriever'].invoke(prompt_sample)

def format_docs(docs):
    context = ""
    for doc in docs:
        context += f"frame : {doc.metadata['seq_num']-1}"
        context += '\n'
        context += doc.page_content
        context += '\n'
    return context

results = []

question = 'Find the frame with a person lying on the floor'
source = 'cam_07.mp4-meta_db'

retriever = json_databases[source]['retriever']

template = PROMPT_TEMPLATE
prompt = PromptTemplate.from_template(template)

# RAG 체인 정의
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print(f"Question: {question}")
full_response = rag_chain.invoke(question)

print(f"Answer: {full_response}\n")

results.append({
    "Question": question,
    "Answer": full_response
})

Question: Find the frame with a person lying on the floor
Answer: Frames 16, 17, 22, and 24 all show a person lying on the floor. 





