In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import FAISS
from openai import OpenAI

import warnings 

warnings.filterwarnings(action="ignore")

## Preprocessing 
- API 키 불러오기 
- 데이터 불러오기 
- 데이터 전처리
    - 청크 단위 스플릿

In [None]:
# Read txt data 
loader = TextLoader("../data/data.txt",encoding="utf-8")
text=loader.load()

# Read API Key 
with open("../openai_api.txt", "r") as f: 
    api_key = f.readline()
    f.close()

# Text preprocessing -> split and make chunk
text_splitter = CharacterTextSplitter(chunk_size=30, 
                                      separator="\n\n",
                                      chunk_overlap=10)
docs = text_splitter.split_documents(text)

## Embedding 
텍스트를 넣어 임베딩 

In [None]:
# Make Embedding using OpenAI Embedding
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
# embeddings = HuggingFaceEmbeddings()

# Make embdding and locate to index 
index = VectorstoreIndexCreator(
    vectorstore_cls=FAISS,
    embedding=embeddings,
    ).from_documents(docs)

# Save as vector store 
index.vectorstore.save_local("faiss-nj")

In [None]:
chat = ChatOpenAI(model_name='gpt-3.5-turbo', 
                  temperature=0.9,
                  api_key=api_key)

In [None]:
index.query("야후 연재본 어디서 보나요??", llm=chat, verbose=True)

In [None]:
index.query("유퀴즈하고 아쉬웠던 점은 무엇인가요?", llm=chat, verbose=True)

In [None]:
index.query("유퀴즈 할때 아쉬운점", llm=chat, verbose=True)

In [None]:
index.query("유퀴즈 찍을때 아쉬웠던 점", llm=chat, verbose=True)

In [None]:
index.query("유퀴즈하고 아쉬웠던 점", llm=chat, verbose=True)

In [None]:
index.query("유퀴즈 하고 아쉬운점", llm=chat, verbose=True)