# Hugging Face Enbedding 모델 적용 

In [3]:
pip install faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp39-cp39-macosx_11_0_arm64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp39-cp39-macosx_11_0_arm64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1
Note: you may need to restart the kernel to use updated packages.


In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Hugging Face Embedding 모델 로드
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"  # 변경 가능
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# 샘플 데이터
texts = [
    "이 제품은 훌륭합니다.",
    "배송이 너무 느립니다.",
    "가성비가 뛰어난 제품입니다.",
    "제품 품질이 좋지만 가격이 비쌉니다.",
    "배송이 빠르고 제품이 좋습니다."
]

# 텍스트 데이터를 벡터화하여 벡터 저장소 생성
vector_store = FAISS.from_texts(texts, embeddings)
print(f"'{embedding_model_name}' 임베딩 모델로 벡터화 완료!")

# 검색 테스트
query = "배송 속도는 어떤가요?"
results = vector_store.similarity_search(query, k=2)
print("검색 결과:")
for result in results:
    print(result.page_content)

'sentence-transformers/all-MiniLM-L6-v2' 임베딩 모델로 벡터화 완료!
검색 결과:
배송이 너무 느립니다.
가성비가 뛰어난 제품입니다.


# Hugging Face Text Generation 모델 적용

In [9]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cpu
Note: you may need to restart the kernel to use updated packages.


In [None]:
from transformers import pipeline
import torch

# Hugging Face Text Generation 모델 로드
text_generation_model_name = "gpt2"
text_gen_pipeline = pipeline(
    "text-generation",
    model=text_generation_model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# 입력 텍스트
input_text = "배송이 느린 경우 고객에게 어떻게 대응하면 좋을까요?"

# 텍스트 생성
generated_text = text_gen_pipeline(
    input_text,
    max_new_tokens=20,  # 생성할 새 토큰 수 설정
    num_return_sequences=1
)
print("생성된 텍스트:")
print(generated_text[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
