In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
import os
os.environ.get('OPENAI_API_KEY')

In [None]:
# from llama_index.readers import BeautifulSoupWebReader
# documents = BeautifulSoupWebReader().load_data(['https://whatever.co/ja/'])

# from llama_index.readers import RssReader
# documents = RssReader().load_data(['https://whatever.co/ja/feed/'])

from llama_index import download_loader

WordpressReader = download_loader("WordpressReader")

loader = WordpressReader(url="https://cms.whatever.co", username=os.environ.get('WORDPRESS_USERNAME'), password=os.environ.get('WORDPRESS_PASSWORD'))
documents = loader.load_data()

In [None]:
from llama_index import LangchainEmbedding

from langchain.embeddings import HuggingFaceEmbeddings
embed_model = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="oshizo/sbert-jsnli-luke-japanese-base-lite")
)

# from langchain.embeddings import TensorflowHubEmbeddings
# embed_model = LangchainEmbedding(TensorflowHubEmbeddings())

In [None]:
from llama_index import GPTSimpleVectorIndex, LLMPredictor
from llama_index.indices.prompt_helper import PromptHelper
# index = GPTSimpleVectorIndex(documents)
index = GPTSimpleVectorIndex(
    documents,
    prompt_helper=PromptHelper(
        max_input_size=4000,  # LLM入力の最大トークン数
        num_output=256,  # LLM出力のトークン数
        chunk_size_limit=500,  # チャンクのトークン数
        max_chunk_overlap=0,  # チャンクオーバーラップの最大トークン数
        separator="。"  # セパレータ
    ),
    embed_model=embed_model
)

from langchain.chat_models import ChatOpenAI
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))

In [None]:
# from llama_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
index.query('NHKがクライアントのしごとどんなのがあった？', llm_predictor=llm_predictor, verbose=True)