In [2]:
import openai

In [None]:
!pip install gpt-index
!pip install langchain

In [3]:
from gpt_index import SimpleDirectoryReader, GPTListIndex, readers, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
from langchain import OpenAI
import sys
import os
from IPython.display import Markdown, display

def construct_index(directory_path):
    # set maximum input size
    max_input_size = 4096
    # set number of output tokens
    num_outputs = 2000
    # set maximum chunk overlap
    max_chunk_overlap = 20
    # set chunk size limit
    chunk_size_limit = 600 

    # define LLM
    llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-003", max_tokens=num_outputs))
    prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
 
    documents = SimpleDirectoryReader(directory_path).load_data()
    
    index = GPTSimpleVectorIndex(
        documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper
    )

    index.save_to_disk('index.json')

    return index

def ask_ai():
    index = GPTSimpleVectorIndex.load_from_disk('index.json')
    while True: 
        query = input("What do you want to ask? ")
        response = index.query(query, response_mode="compact")
        display(Markdown(f"Response: <b>{response.response}</b>"))
  

In [4]:
os.environ["OPENAI_API_KEY"] = input("Paste your OPENAI API key: ")

In [5]:
construct_index("../context_data/openshift/text")

INFO:gpt_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens
INFO:gpt_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 4394 tokens


<gpt_index.indices.vector_store.vector_indices.GPTSimpleVectorIndex at 0x1bdfc9144d0>

In [6]:
ask_ai()

INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 629 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 6 tokens


Response: <b>
To enable Vector on an OpenShift Container Platform cluster, edit the ClusterLogging custom resource (CR) in the openshift-logging project. Make sure that the OpenShift Container Platform version is 4.11 and the Logging subsystem for Red Hat OpenShift version is 5.4, and that FIPS is disabled.</b>

INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 638 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 7 tokens


Response: <b>
To enable vector in the yaml, add the following to the ClusterLogging custom resource (CR):

spec:
  collection:
    logs:
      type: "vector"
      vector: {}

metadata:
  annotations:
    logging.openshift.io/preview-vector-collector: enabled</b>