In [1]:
# RAG + Cache + Guardrails

In [1]:
import os
import json
from typing import Dict,Any
import openai
from openai import OpenAI
import chromadb
from chromadb.utils import embedding_functions
openai.api_key = os.getenv('OPENAI_API_KEY')
class RagSystem:
    '''
        ChromaDB + openai 임베딩
    '''
    def __init__(self,name='temp'):
        self.client = chromadb.Client()
        self.collection = self.client.get_or_create_collection(name = name)
        self.embed_fn = embedding_functions.OpenAIEmbeddingFunction(
            api_key = openai.api_key,
            model_name = 'text-embedding-3-small'
        )
        if len(self.collection.get()) == 0:
            docs = [
                ('대한민국 수도','서울입니다.'),
                ('미국 수도','워싱턴 DC 입니다.'),
                ('AI 정의','인간의 지능을 모방한 기술입니다.')
            ]
            for doc_id ,(title,text) in enumerate(docs):
                self.collection.add(
                    documents=[text],
                    metadata = [{'title':title}],
                    ids = [str(doc_id)],
                    embedding_function = self.embed_fn
                )
    def query(self, question:str) -> str:
        embedding = self.embed_fn(question)
        results = self.collection.query(
            query_embeddings = [embedding],
            n_results=1
        )
        if results['documents'][0]:
            doc_text = results['documents'][0][0]
            return f'RAG 기반 답변 : {doc_text}'
        else: # 문서에 없으면
            client = OpenAI()
            response = client.chat.completions.create(
                model='gpt-5-nano',                    
                messages=[{
                    'role':'user',
                    'content' : question
                }] 
            )       
            return response.choices[0].message.content

In [4]:
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
import os


def call_llm(question:str) -> str:
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-5-nano',                    
        messages=[{
            'role':'user',
            'content' : question
        }] 
    )       
    return response.choices[0].message.content

class SimpleCache:
  def __init__(self):
    self.cache = {}  # 딕셔너리
    self.hits = 0
    self.misses = 0
  def get(self, key):
    if key in self.cache:
      self.hits += 1
      return self.cache[key]
    self.misses += 1
    return None
  def set(self,key,value):
    self.cache[key] = value
  def state(self):
    total = self.hits + self.misses
    hit_rate = self.hits / total*100 if total > 0 else 0
    return{
        'hits':self.hits,
        'misses':self.misses,
        'hit_rate':hit_rate
    }

class SemanticCache:
  def __init__(self,name = 'semantic_cache'):
    self.client = chromadb.Client()
    self.embed_fn = OpenAIEmbeddingFunction(
      api_key=os.getenv('OPENAI_API_KEY'),
      model_name="text-embedding-3-small"
    )
    self.collection = self.client.get_or_create_collection(
        name = name,
        embedding_function=self.embed_fn,
        metadata={'hnsw:space':'cosine'}
    )
  def get(self,query,threshold=0.20):
    results = self.collection.query(
        query_texts=[query],
        n_results = 1
    )
    # print(f'get results : {results}')

    if results['distances'][0] and results['distances'][0][0] < threshold:
      return results['metadatas'][0][0]['response']
    return None
  def set(self, query, response):
    import uuid  # unique id 를 자동 생성
    self.collection.add(
        documents=[query],
        metadatas=[{'response':response}],
        ids=[str(uuid.uuid4())]
    )

class MulltiLevelCache:
  def __init__(self) -> None:
    self.l1_cach = SimpleCache()  # 메모리방식 dictionary   완전일치
    self.l2_cach = SemanticCache() # ChoromaDB 벡터DB  유사도방식
  def stats(self):
    print(f'L1 catch: {self.l1_cach.cache}')
  def get(self,key):
    cached = self.l1_cach.get(key)
    if cached:
      print('L1 cache')
      return cached
    cached = self.l2_cach.get(key)
    if cached:
      print('L2 cache')
      self.l1_cach.set(key,cached)
      return cached
    # LLM 호출
    print('LLM')
    response = call_llm(key)
    self.l1_cach.set(key,response)
    self.l2_cach.set(key,response)
    return response
class GuardrailsSystem: # 사용자 방식
    def __init__(self):
        self.bad_words = ['욕설','비속어','나쁜말']
    def validate_input(self, text:str):
        if len(text.strip()) == 0:
            return False, '입력이 비어 있습니다'
        for b in self.bad_words:
            if b in text:
                return False, '입력에 허용되지 않는 단어가 포함되어 있습니다.'
        return True, 'ok'
    def validate_output(self, text:str):
        for b in self.bad_words:
            if b in text:
                return False, '출력에 허용되지 않는 단어가 포함되어 있습니다.'
        return True, 'ok'

class CacheSystem:
    def __init__(self):
        self.cache = MulltiLevelCache()
    def get(self, question:str):
        return self.cache.get(question)
    def set(self, question:str, answer:str):
        self.cache.l1_cach.set(question,answer)
        self.cache.l2_cach.set(question,answer)

class LLMApplication:
    def __init__(self):
        self.rag = RagSystem()
        self.cache = CacheSystem()
        self.guardrails = GuardrailsSystem()
    def query(self, question:str):
        # 1.입력 검증
        valid,msg = self.guardrails.validate_input(question)
        if not valid:
            return "error"
        # 2.캐쉬 확인
        cached  = self.cache.get(question)
        if cached:
            return cached
        # 3.RAG 실행
        response = self.rag.query(question)
        # 4.출력 검증
        valid,msg = self.guardrails.validate_input(response)
        if not valid:
            return "error"
        self.cache.set(question,response)
        return response   

In [5]:
# 실행
app = LLMApplication()
question = "대한민국의 수도는 어디인가요?"
answer = app.query(question)
print(answer)

LLM
서울특별시(일반적으로 서울)입니다.
