In [None]:
!pip install langchain
!pip install transformers
!pip install sentence-transformers
!pip install faiss-cpu

Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.met

In [None]:
import torch
from transformers import DPRQuestionEncoderTokenizer, DPRQuestionEncoder, DPRContextEncoderTokenizer, DPRContextEncoder
from sentence_transformers import SentenceTransformer, util
import faiss
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel

In [None]:
# 간단한 문서 데이터 준비
documents = [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "Madrid is the capital of Spain.",
    "Rome is the capital of Italy.",
    "London is the capital of the United Kingdom."
    "goorm is the capital of the AI Kingdom"
]

In [None]:
# 문서 인코더와 토크나이저 준비
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
context_encoder = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")

In [None]:
# 문서를 임베딩으로 변환
document_embeddings = []
for doc in documents:
    inputs = context_tokenizer(doc, return_tensors="pt")
    outputs = context_encoder(**inputs)
    document_embeddings.append(outputs.pooler_output.detach().numpy())

In [None]:
# 리스트 형태의 임베딩을 하나의 numpy 배열로 변환
document_embeddings = np.vstack(document_embeddings)

https://github.com/facebookresearch/faiss


FAISS (Facebook AI Similarity Search) is a library that allows developers to quickly search for embeddings of multimedia documents that are similar to each other. It solves limitations of traditional query search engines that are optimized for hash-based searches, and provides more scalable similarity search functions.

In [None]:
# FAISS 인덱스 생성 및 임베딩 추가
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification mod

In [None]:
# 질문 인코더와 토크나이저 설정
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
question_encoder = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")

# 생성 모델과 토크나이저 설정
generation_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-medium")
generation_model = GPT2LMHeadModel.from_pretrained("gpt2-medium")

In [None]:
# RAG 모델 클래스 정의
class RAGModel:
    def __init__(self, question_encoder, context_encoder, generation_model, index, tokenizer):
        self.question_encoder = question_encoder
        self.context_encoder = context_encoder
        self.generation_model = generation_model
        self.index = index
        self.tokenizer = tokenizer

    def retrieve(self, question):
        # 질문을 인코딩하여 임베딩으로 변환
        question_inputs = question_tokenizer(question, return_tensors="pt")
        question_outputs = self.question_encoder(**question_inputs)
        question_embedding = question_outputs.pooler_output.detach().numpy()

        # FAISS 인덱스를 사용하여 가장 가까운 문서 검색
        D, I = self.index.search(question_embedding, k=1)  # 가장 가까운 문서 검색
        return documents[I[0][0]]

    def generate(self, question, retrieved_doc):
        # 질문과 검색된 문서를 결합하여 입력으로 사용
        input_text = question + " " + retrieved_doc
        inputs = self.tokenizer.encode(input_text, return_tensors="pt")

        # 모델을 통해 결과 출력
        outputs = self.generation_model.generate(inputs, max_length=50)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def run(self, question):
        # 검색된 문서를 기반으로 답변 생성
        retrieved_doc = self.retrieve(question)
        print(f"Retrieved Document: {retrieved_doc}")
        return self.generate(question, retrieved_doc)

# RAG 모델 인스턴스 생성
rag_model = RAGModel(question_encoder, context_encoder, generation_model, index, generation_tokenizer)

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/493 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
# 테스트 함수
def test_rag_model(question):
    answer = rag_model.run(question)
    print(f"Question: {question}")
    print(f"Answer: {answer}")

def generate_pure_answer(question):
    inputs = generation_tokenizer.encode(question, return_tensors="pt")
    outputs = generation_model.generate(inputs, max_length=50)
    return generation_tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# 예제 질문
question = "What is the capital of the AI Kingdom?"
test_rag_model(question)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Retrieved Document: London is the capital of the United Kingdom.goorm is the capital of the AI Kingdom
Question: What is the capital of the AI Kingdom?
Answer: What is the capital of the AI Kingdom? London is the capital of the United Kingdom.goorm is the capital of the AI Kingdom.

Goorm is the capital of the AI Kingdom.

Goorm is the capital of the AI


In [None]:
# 순수 생성 모델 테스트
pure_answer = generate_pure_answer(question)
print("Pure Generation Answer:", pure_answer)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Pure Generation Answer: What is the capital of the AI Kingdom?

The capital of the AI Kingdom is the AI Kingdom.

What is the AI Kingdom's currency?

The AI Kingdom's currency is the AI Kingdom's currency.

What is
