# Embedding & Vector Store

In [3]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [None]:
# import library

import config
from pprint import pprint
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain_core.documents import Document
from pinecone import Pinecone, ServerlessSpec, PineconeVectorStore

In [None]:
# setting

CHUNK_SIZE = config.chunk_size
CHUNK_OVERLAP = config.chunk_overlap

MODEL_NAME  = config.model_name
EMBEDDING_NAME = config.embedding_name

COLLECTION_NAME = config.collection_name
PERSIST_DIRECTORY = config.persist_directory

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
INDEX_NAME = 'blueribbon'
NAMESPACE = 'blueribbon-ns-1'


In [None]:
# load & split the data
# TextLoader 
# RecursiveChractoerTextSplitter.from_tiktoken_encoder()

## Data path
path = "data/b_ribbon.txt"


loader = TextLoader(path, encoding="utf-8")
load_docs = loader.load()

## Data load, split
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    model_name=MODEL_NAME, 
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)

docs = loader.load_and_split(splitter)

print(len(docs))

## pinecone client initialization

pc = Pinecone(api_key=PINECONE_API_KEY)
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# Pinecone 인덱스 생성, 연결
if INDEX_NAME not in pc.list_indexes():
    pc.create_index(
        name=INDEX_NAME,
        dimension=1536,  
        metric="cosine", 
        spec=ServerlessSpec(cloud="aws", region="us-east1")
    )

index = pc.Index(INDEX_NAME)

# 연결된 인덱스 정보 확인
index_info = index.describe_index_stats()
print(index_info)


## index와 연결
index = pc.Index(INDEX_NAME) 

## 연결된 index의 정보를 확인
index_info = index.describe_index_stats()
print(index_info)



# question -> Embedding Vector
query = "Restaurant?"
embedding_query = embedding_model.embed_query(query) # 한문장 변환.
print(type(embedding_query), len(embedding_query))