In [1]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
import os
import json

In [3]:
# 파일 위치 지정
keys_file_path = os.path.join('data', 'api_keys.txt')

# 파일에서 API 키를 로드하는 함수
def load_api_keys(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        keys = json.load(file)
    return keys

# API 키 사용
api_keys = load_api_keys(keys_file_path)
openAI_keys = api_keys['openAI_keys']
pinecone_keys = api_keys['pinecone_keys']

In [4]:
from openai import OpenAI

client = OpenAI(api_key=openAI_keys)

with open('data/documents.txt', 'r', encoding="UTF-8") as f:
    lines = f.readlines()

embedding_list = []
for line in lines[:5]:
    response = client.embeddings.create(
        input=line,
        model="text-embedding-3-small"
    )    
    embedding_list.append(response.data[0].embedding)    

In [7]:
em_vectors = []
for i, em in enumerate(embedding_list):
    em_vectors.append({"id": str(i), "values": em, "metadata": {"assertion": lines[i]}})

In [8]:
from pinecone import Pinecone
pc = Pinecone(api_key=pinecone_keys)

index = pc.Index("rag")

In [9]:
index.upsert(
    vectors=em_vectors
)

{'upserted_count': 5}