In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%run helper_functions.ipynb
from variables import *

all_file_mapping = process_md_files(DIRECTORY_PATH)

In [None]:
# Define the function to handle the question submission and display the results
def on_question_submit(b):
    question = text_box.value
    score_threshold = float(slider.value)

    retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": score_threshold})

    # Get the retrieved_docs
    retrieved_docs = retriever.get_relevant_documents(question)

    # Clear the previous output
    output.clear_output()

    # Check if there are relevant documents or not and display the output
    with output:
        if len(retrieved_docs) == 0:
            print("No relevant documents found.")
        else:
            print("Relevant Docs\n\n")
            for index, doc in enumerate(retrieved_docs):
                index += 1
                print(str(index) + ":", doc.page_content)
                print('\n')
                print('----------------------------------------------')
                print('\n')

# Define the function to handle the "Generate Response" button click and display the result
def generate_response(b):
    question = text_box.value
    score_threshold = float(slider.value)

    retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": score_threshold})

    retrieved_docs = retriever.get_relevant_documents(question)

    # Clear the previous output
    output.clear_output()

    # Check if there are relevant documents or not and display the output
    with output:
        if len(retrieved_docs) == 0:
            print(f"No relevant documents found with the query: {question}")
        else:        
            # Create the ChatOpenAI model and the RetrievalQA chain
            llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
            qa_chain = RetrievalQA.from_chain_type(llm, retriever= retriever, return_source_documents=True)    
            result = qa_chain({"query": question})
            # Display the result    
            print(result['result'])
            

In [None]:
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY=os.environ.get('AWS_SECRET_ACCESS_KEY')
AWS_DEFAULT_REGION=os.environ.get('AWS_DEFAULT_REGION')
session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_DEFAULT_REGION
)
s3 = session.resource('s3')

# Define the S3 bucket and object prefix
s3_bucket_name = 'haebichan-output-bucket'
s3_object_prefix = 'chroma_db/'

# Local directory to store downloaded files
local_directory = './local_chroma_db/'

# Ensure the local directory exists
os.makedirs(local_directory, exist_ok=True)

In [None]:
# download file into current directory
my_bucket = s3.Bucket(s3_bucket_name)
for s3_object in my_bucket.objects.all():
    if 'chroma' in s3_object.key:
    # Need to split s3_object.key into path and file name, else it will give error file not found.
        path, filename = os.path.split(s3_object.key)
        local_file_path = os.path.join(local_directory, os.path.basename(s3_object.key))                                
        my_bucket.download_file(s3_object.key, local_file_path)


In [None]:
# Get the document_objects
document_objects = extract_document_objects(DATABRICKS_SAVE_PATH, all_file_mapping)
# Get the vectorstore saved previously
vectorstore = Chroma(persist_directory="./local_chroma_db", embedding_function=OpenAIEmbeddings())

# Create the widgets
text_box = widgets.Text(
    description='Write your question:',
    layout=widgets.Layout(width='50%'),
    style={'description_width': 'initial'}
)

doc_display_text_box = widgets.Text(
    value= str(len(document_objects)),
    placeholder='Type something',
    description='Number of Docs in Repository:',
    disabled=True,
    style={'description_width': 'initial'}
)

slider = widgets.FloatSlider(
    value=0.7,
    min=0.0,
    max=1.0,
    step=0.01,
    description='Score Threshold:',
    layout=widgets.Layout(width='50%'),
    style={'description_width': 'initial'}
)

submit_button = widgets.Button(description='Source Document', )
submit_button.on_click(on_question_submit)

generate_button = widgets.Button(description='Generate Response')
generate_button.on_click(generate_response)

# Create an HBox to place the buttons side by side
buttons_box = widgets.HBox([submit_button, generate_button])

# Create the output widget for displaying the results
output = widgets.Output(
    layout=widgets.Layout(width='90%', height='300px', border='1px solid gray', overflow = 'auto')
)

title_html = '<h1><b>GX DocBot (Beta)</b></h1>'
subtitle_html = '<h5>This application is for retrieving relevant docs related to question or generating a response.</h5>'
subtitle_html_2 = '<h5>The app utilizes our public-facing docs + Dev Rel slack Q and A data from slack community channel.</h5>'
subtitle_html_3 = '<h6>Score Threshold determines how relevant docs are to question. If question does not meet threshold, app will not return an answer.</h6>'

title_widget = HTML(title_html)
subtitle_widget = HTML(subtitle_html)
subtitle_widget_2 = HTML(subtitle_html_2)
subtitle_widget_3 = HTML(subtitle_html_3)

# Display the widgets and output
display(title_widget)
display(subtitle_widget)
display(subtitle_widget_2)
display(subtitle_widget_3)
display(HTML("<br>"))
display(text_box)
display(slider)
display(doc_display_text_box)
display(buttons_box) 
display(output)
