<src img='https://raw.githubusercontent.com/Evogelpohl/linkArtifacts/main/pdf_openai.png'>

<img src='https://raw.githubusercontent.com/Evogelpohl/linkArtifacts/main/pdf_openai_2.png'>



## Change to `True` if you want to process all the PDFs/OCR

In [None]:
process_index_rebuild = True
process_pdf_ocr = False

## Install packages

In [None]:
pip install -q pdf2image pytesseract reportlab pinecone-client Pillow gradio langchain

In [None]:
#Lets define some key variable for the PDF OCR'ing process
#Configure paths and variables

from pathlib import Path

TESSERACT_PATH = r"C:\Program Files\Tesseract-OCR\tesseract.exe" #tesseract must be installed separately
POPPLER_PATH = r"C:\Program Files\poppler-23.01.0\Library\bin" #poppler must be installed separately

LLM_DIRECTORY = Path(r"C:\Temp\pdf_to_openai_chat")
LLM_DIRECTORY.mkdir(parents=True, exist_ok=True)

# Store the PDFs you want processed by this solution in this folder, as a *.pdf file only:
SRC_PDFS_DIRECTORY = LLM_DIRECTORY / "src_pdfs"

PAGES_DIRECTORY = LLM_DIRECTORY / "pages"
PAGES_DIRECTORY.mkdir(parents=True, exist_ok=True)

TEXT_OUTPUT_DIRECTORY = LLM_DIRECTORY / "text_output"
TEXT_OUTPUT_DIRECTORY.mkdir(parents=True, exist_ok=True)

## PDF image OCR process, saving the results to a text file

In [None]:
import platform
import pytesseract
from PIL import Image, ImageOps
from pdf2image import convert_from_path
import os

if platform.system() == "Windows":
    pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH

def create_page_images_pdf2image(pdf_path, output_directory, dpi=300):
    """
    Convert a PDF file into a list of JPEG images, one for each page.
    
    :param pdf_path: Path to the input PDF file.
    :param output_directory: Directory to save the generated JPEG images.
    :param dpi: DPI for the generated images.
    :return: A list of file paths for the generated images.
    """
    image_file_list = []
    convert_args = {"pdf_path": pdf_path, "dpi": dpi}

    if platform.system() == "Windows":
        convert_args["poppler_path"] = POPPLER_PATH

    pdf_pages = convert_from_path(**convert_args)

    for page_number, page in enumerate(pdf_pages, start=1):
        output_file = output_directory / f"page_{page_number:03}.jpg"
        page.save(output_file, "JPEG")
        image_file_list.append(output_file)
        #print(f"Image created: {output_file}")

    return image_file_list


def convert_to_bw(image_list):
    """
    Convert a list of images to black and white.
    
    :param image_list: List of input image file paths.
    """
    for image_file in image_list:
        image = Image.open(image_file)
        gray_image = ImageOps.grayscale(image)
        bw_image = gray_image.point(lambda x: 0 if x < 128 else 255, '1')
        bw_image.save(image_file)


def ocr_images(image_list, text_output_path):
    """
    Perform OCR on a list of images and append the extracted text to a file.
    
    :param image_list: List of input image file paths.
    :param text_output_path: File path to save the extracted text.
    """
    with open(text_output_path, "a") as output_file:
        for image_file in image_list:
            text = str(((pytesseract.image_to_string(Image.open(image_file)))))
            text = text.replace("-\n", "")
            output_file.write(text)
            #print(f"OCR processed: {image_file}")


if process_pdf_ocr:
    pdf_files = SRC_PDFS_DIRECTORY.glob("*.pdf")

    for pdf_file in pdf_files:
        pdf_name = pdf_file.stem
        
        pages_subdir = PAGES_DIRECTORY / pdf_name
        pages_subdir.mkdir(parents=True, exist_ok=True)
        
        image_file_list = create_page_images_pdf2image(pdf_file, pages_subdir, dpi=300)
        print(f'Completed PDF2Image Page Creation for {pdf_file}')
        
        convert_to_bw(image_file_list)
        print(f'Completed BWConvert for {pdf_file}')
        
        text_output_file = TEXT_OUTPUT_DIRECTORY / f"{pdf_name}_text.txt"
          
        ocr_images(image_file_list, text_output_file)
        print(f'Completed OCRing file {pdf_file}')


In [None]:
import os
import re

TEXT_OUTPUT_DIRECTORY = r"C:\Temp\pdf_to_openai_chat\text_output"

def clean_text_file(input_file, output_file):
    with open(input_file, "r") as f:
        text = f.read()

    cleaned_text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)

    with open(output_file, "w") as f:
        f.write(cleaned_text)

if process_pdf_ocr:
    for filename in os.listdir(TEXT_OUTPUT_DIRECTORY):
        if filename.endswith(".txt"):
            input_file = os.path.join(TEXT_OUTPUT_DIRECTORY, filename)
            output_file = os.path.join(TEXT_OUTPUT_DIRECTORY, f"{os.path.splitext(filename)[0]}_cleaned.txt")
            clean_text_file(input_file, output_file)


## Load our data (the process of OCR)

In [None]:
import os
from langchain.document_loaders import TextLoader

# loop through all the files in the directory
for filename in os.listdir(TEXT_OUTPUT_DIRECTORY):
    if filename.endswith(".txt"):
        filepath = os.path.join(TEXT_OUTPUT_DIRECTORY, filename)
        print(f"Processing file: {filepath}")
        
        # load the text file using the TextLoader
        loader = TextLoader(filepath)
        data = loader.load()
        
        print (f'You have {len(data)} document(s) in your data')
        print (f'There are {len(data[0].page_content)} characters in your document')
        
        # # print each metadata key and their values
        # for key, value in data[0].metadata.items():
        #     print(f'metadata: {key} = {value}')



def data_doc_summerizer(docs):
    print (f'You have {len(docs)} document(s)')
    
    num_words = sum([len(doc.page_content.split(' ')) for doc in docs])
    
    print (f'You have roughly {num_words} words in your docs')
    print ()
    print (f'Preview: \n{docs[0].page_content.split(". ")[0]}')

data_doc_summerizer(data)

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


chunking_size = 1024
namespace_chunk_name = f'chunk_size_{chunking_size}'

data = []  # list to store the loaded data for all text files
text_chunks = []  # list to store the split text chunks for all text files

# loop through all the files in the directory
for filename in os.listdir(TEXT_OUTPUT_DIRECTORY):
    if filename.endswith(".txt"):
        filepath = os.path.join(TEXT_OUTPUT_DIRECTORY, filename)
        print(f"Processing file: {filepath}")
        
        # load the text file using the TextLoader
        loader = TextLoader(filepath)
        loaded_data = loader.load()
        
        # add the loaded data to the list
        data.extend(loaded_data)
        
        # split the page content of the loaded data into smaller chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunking_size, chunk_overlap=20)
        text_chunks.extend(text_splitter.split_documents(loaded_data))
        
# print some information about the loaded data and text chunks
print(f'Loaded {len(data)} documents from {len(os.listdir(TEXT_OUTPUT_DIRECTORY))} text files')
print(f'Got {len(text_chunks)} text chunks in total')

# # example loop to print metadata for each document in the data list
# for document in data:
#     print(f"Metadata for document in {document.metadata['source']}:\n{document.metadata}\n")

# # example loop to print the length of each text chunk
# for i, chunk in enumerate(text_chunks):
#     print(f"Length of text chunk {i}: {len(chunk.page_content)}")


## Create the embeddings of our documents

In [None]:
import os
from getpass import getpass

# We need to get the OpenAI or Azure OpenAI API key. This is how we use & get charged for LLM usage
if "OPENAI_API_KEY" in os.environ:
    OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
else:
    OPENAI_API_KEY = getpass("Enter your OpenAI API Key: ")
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


In [None]:
# Pinecone is a service that will take the documents that you split
# And store the embedding vectors (a math construct that tells LLMs where in the model to find similar words)
# Currently, Pinecone is free for use cases like this. Other vector stores exist; FAISS, ChromaDB, etc.

import os
from getpass import getpass

PINECONE_API_ENV = "us-central1-gcp"

# We need to get the Pinecone API key.
if "PINECONE_API_KEY" in os.environ:
    PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
else:
    PINECONE_API_KEY = getpass("Enter your Pinecone API Key: ")
    os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY


In [None]:
import pinecone

# Initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_API_ENV
)

index_name = 'aaa-reports-002'

# Remove our Pinecone Index if it exists and create a new one.
if process_index_rebuild:
    try:
        pinecone.delete_index(name=index_name)
    except:
        print(f"The index {index_name} does not exist.")
    # Create an index (a database) for our embeddings
    pinecone.create_index(name=index_name, dimension=1536, metric="cosine")
    print(f"The index {index_name} was created.")


In [None]:
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
import os

# Let's create the embeddings (vector math pointers of our docs) using OpenAI's Embeddings Creator model
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# Initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_API_ENV
)

# create an empty list to store the metadata for each document
metadata_list = []

# loop through the text_chunks and set the metadata for each document
for i, chunk in enumerate(text_chunks):
    # get the filename of the source text file from the metadata of the first page in the chunk
    filename = chunk.metadata['source'].split(os.sep)[-1].split('.')[0]
    # set the sources metadata key to the filename
    metadata = {"source": filename}
    # add the metadata dictionary to the metadata_list
    metadata_list.append(metadata)

# Create the docsearch which both builds the index and provides us an object to use the index.
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, metadatas=metadata_list, index_name=index_name, namespace=namespace_chunk_name)


## Example: QA + Stuffed

In [100]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

query = "What was the issue with the SEABEEs uniform color?"

qa_stuff_prompt_template = """You are a military specialist. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer as a military expert in after-action reports.: """
QA_STUFF_PROMPT = PromptTemplate(
    template=qa_stuff_prompt_template, input_variables=["context", "question"]
)

temperature = 0.2
top_k_results = 5
metadata_filter = {"source": "aar-desertStorm_text_cleaned"}

llm = OpenAI(
    model_name="davinci",
    temperature=temperature,
    openai_api_key=OPENAI_API_KEY, 
    max_tokens=-1
)

docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)

docs_to_search = docsearch.similarity_search(
    query=query, k=top_k_results, filter=metadata_filter, namespace="chunk_size_1024"
)

chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", prompt=QA_STUFF_PROMPT)
chain({"input_documents": docs_to_search, "question": query}, return_only_outputs=True)


{'output_text': '\nThe issue with the SEABEEs uniform color was that they were the only coalition force in theater wearing the utility green uniform, which caused them to stand out and be stopped at check points and harassed. It also caused incidents of mistaken identity with potentially serious consequences, and caused uniform resupply problems in non-NCF fleet hospital and public works augment units.'}

## Example: Summarize + Map_Reduced

In [None]:
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

query = "What was the issue with the SEABEEs uniform color?"

summarize_mr_prompt_template = """Write a summary of the following:


{text}

As a military specialist in after-action reports. 
"""
SUMMARIZE_MR_PROMPT = PromptTemplate(template=summarize_mr_prompt_template, input_variables=["text"]
)

temperature = 0.2
top_k_results = 5
metadata_filter = {"source": "aar-desertStorm_text_cleaned"}

llm = OpenAI(
    model_name="davinci",
    temperature=temperature,
    openai_api_key=OPENAI_API_KEY, 
    max_tokens=-1
)

docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)

docs_to_search = docsearch.similarity_search(
    query=query, k=top_k_results, filter=metadata_filter, namespace="chunk_size_1024"
)

chain = load_summarize_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True, map_prompt=SUMMARIZE_MR_PROMPT, combine_prompt=SUMMARIZE_MR_PROMPT)
chain({"input_documents": docs_to_search}, return_only_outputs=False)


## Example: QA + Refine

In [None]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

refine_prompt_template = (
    "The original question is as follows: {question}\n"
    "We have provided an existing answer: {existing_answer}\n"
    "We have the opportunity to refine the existing answer"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{context_str}\n"
    "------------\n"
    "Given the new context, refine the original answer to better "
    "answer the question. "
    "If the context isn't useful, return the original answer. Reply as a military specialist in after-action report. "
)
refine_prompt = PromptTemplate(
    input_variables=["question", "existing_answer", "context_str"],
    template=refine_prompt_template,
)


initial_qa_template = (
    "Context information is below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the question: {question}"
)
initial_qa_prompt = PromptTemplate(
    input_variables=["context_str", "question"], template=initial_qa_template
)

query = "How long was the air war?"

temperature = 0.2
top_k_results = 5
metadata_filter = {"source": "aar-desertStorm_text_cleaned"}

llm = OpenAI(
    model_name="davinci",
    temperature=temperature,
    openai_api_key=OPENAI_API_KEY, 
    max_tokens=512
)

docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)

docs_to_search = docsearch.similarity_search(
    query=query, k=top_k_results, filter=metadata_filter, namespace="chunk_size_512"
)

chain = load_qa_chain(llm=llm, chain_type="refine", return_refine_steps=True, question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
chain({"input_documents": docs_to_search, "question": query}, return_only_outputs=False)

## Example of map_reduce with a prompt template

In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

# Define custom prompt templates for the question-answering chain
question_prompt_template = (
    "You are an AI language model trained to analyze military after-action reports. "
    "Your expertise includes military doctrine, identifying issues and recommendations, "
    "understanding changes in future strategies, analyzing tactics, and evaluating logistics. "
    "Given the following excerpt from a document, determine if it contains relevant information to answer the question.\n"
    "{context}\n"
    "Question: {question}"
)
QUESTION_PROMPT = PromptTemplate(template=question_prompt_template, input_variables=["context", "question"])

combine_prompt_template = (
    "As an AI language model with a deep understanding of military after-action reports, "
    "synthesize the following extracted parts of a document to create a comprehensive and informative answer to the question. "
    "If you don't know the answer, just say that you don't know. Don't try to make up an answer.\n\n"
    "QUESTION: {question}\n"
    "=========\n"
    "{summaries}\n"
    "========="
)
COMBINE_PROMPT = PromptTemplate(template=combine_prompt_template, input_variables=["summaries", "question"])

# Set up the metadata filter
metadata_filter = {"source": "aar-desertStorm_text_cleaned"}

# Set up search parameters
test_k = 20
test_query_question = "What were the top 5 problems, issues, or lessons?"

# Search for similar documents
test_docs_to_search_with_score = docsearch.similarity_search_with_score(query=test_query_question, k=test_k, filter=metadata_filter, namespace="chunk_size_512")

# Set up the language model
test_temp = 0.2
llm = OpenAI(temperature=test_temp, openai_api_key=OPENAI_API_KEY, max_tokens=2048)

# Load the question-answering chain
chain = load_qa_chain(llm=llm, chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)

# Extract only the Document objects from the list of tuples
test_docs_to_search = [doc_score_tuple[0] for doc_score_tuple in test_docs_to_search_with_score]

# Run the chain with the input documents and question
res = chain({"input_documents": test_docs_to_search, "question": test_query_question}, return_only_outputs=False)

# Display the result
res


## Example with Custom Prompt and the Summarization Model and Refine Chain Type


In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain


prompt_template = """Write a summary of the following:


{text}


"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
refine_template = (
    "Your job is to produce a final summary\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

# Set up the language model
test_temp = 0.2
llm = OpenAI(temperature=test_temp, openai_api_key=OPENAI_API_KEY)

# Set up the metadata filter
metadata_filter = {"source": "aar-desertStorm_text_cleaned"}

# Set up search parameters
test_k = 5
test_query_question = "SEABEE uniform issues."

# Search for similar documents
test_docs_to_search_with_score = docsearch.similarity_search(query=test_query_question, k=test_k, filter=metadata_filter, namespace="chunk_size_512")

# Extract only the Document objects from the list of tuples
#test_docs_to_search = [doc_score_tuple[0] for doc_score_tuple in test_docs_to_search_with_score]

chain = load_summarize_chain(llm=llm, chain_type="refine", return_intermediate_steps=False, question_prompt=PROMPT, refine_prompt=refine_prompt)
chain({"input_documents": test_docs_to_search}, return_only_outputs=True)



## Example of the SUMMARIZE - Refine Chain

In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain

temperature = 0.2
llm = OpenAI(temperature=0.3, openai_api_key=OPENAI_API_KEY, max_tokens=512)

summarization_question = "Summarize the issues related to the SEABEE uniform color."

metadata_filter = {"source": "aar-desertStorm_text_cleaned"}

top_k_results = 5

docs_to_search = docsearch.similarity_search(
    query=summarization_question,
    k=top_k_results,
    include_metadata=True,
    namespace="chunk_size_512",
)

chain = load_summarize_chain(llm=llm, chain_type="refine")

chain(docs_to_search)


## Let's use our connection to the LLM and ask it questions.

In [112]:
import gradio as gr
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

# Load the pre-trained question-answering LLM model using the langchain library
llm = OpenAI(temperature=0.3, openai_api_key=OPENAI_API_KEY, max_tokens=512)

# Create a list of unique document titles from the metadata of the text chunks
doc_titles = list(set(metadata['source'] for metadata in metadata_list))

# Define custom prompt template: QA-STUFF Prompt
qa_stuff_prompt_template = """You are a military specialist. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer as a military expert in after-action reports.: """
QA_STUFF_PROMPT = PromptTemplate(
    template=qa_stuff_prompt_template, input_variables=["context", "question"]
)

# Define custom prompt template: SUMMARIZE-MAP_REDUCE Prompt
summarize_mr_prompt_template = """Write a summary of the following:


{text}

As a military specialist in after-action reports. 
"""
SUMMARIZE_MR_PROMPT = PromptTemplate(template=summarize_mr_prompt_template, input_variables=["text"]
)

# Create our CHAIN object for QA_STUFFed Prompts
chain_qa = load_qa_chain(llm=llm, chain_type="stuff", prompt=QA_STUFF_PROMPT)

# Create our CHAIN object for SUMMARIZED_MR Prompts
chain_summarize = load_summarize_chain(llm=llm, chain_type="map_reduce", return_intermediate_steps=False, map_prompt=SUMMARIZE_MR_PROMPT, combine_prompt=SUMMARIZE_MR_PROMPT)

# Create our Function to process QA (mode) prompts
def qa_function(query, doc_title, temperature, k):

    # Look in the embeddings store for documents (splits of the orig text) that are similar to your question
    
    metadata_filter = {"source": doc_title}
    
    qa_docs_to_search_with_scores = docsearch.similarity_search_with_score(
        query=query, k=k, filter=metadata_filter, namespace="chunk_size_512"
    )

    # Separate the documents from their scores
    qa_docs_to_search = [doc for doc, score in qa_docs_to_search_with_scores]

    # Send the matching docs & our question to the LLM. It will return the answer below.
    llm.temperature = temperature
    output = chain_qa({"input_documents": qa_docs_to_search, "question": query}, return_only_outputs=True)

    # Extract the actual answer text from the output dictionary
    answer = output['output_text']

    # Extract the docs that the similarity_search found, scored
    truncated_docs_str = ""
    for index, (doc, score) in enumerate(qa_docs_to_search_with_scores):
        truncated_text = doc.page_content[:200].replace("\n", "")
        truncated_docs_str += f"Doc {index + 1} snippet (score: {score:.2f}): {truncated_text}\n------\n"

    return answer, truncated_docs_str


# Create our Function to process Summarization (mode) prompts
def summarization_function(query, doc_title, temperature, k):

    # Look in the embeddings store for documents (splits of the orig text) that are similar to your question
    
    metadata_filter = {"source": doc_title}
    
    sum_docs_to_search_with_scores = docsearch.similarity_search_with_score(
        query=query, k=k, filter=metadata_filter, namespace="chunk_size_512"
    )

    # Separate the documents from their scores
    sum_docs_to_search = [doc for doc, score in sum_docs_to_search_with_scores]

    # Send the matching docs & our question to the LLM. It will return the answer below.
    llm.temperature = temperature
    output = chain_summarize({"input_documents": sum_docs_to_search}, return_only_outputs=False)

    # Extract the actual answer text from the output dictionary
    answer = output['output_text']

    # Extract the docs that the similarity_search found, scored
    truncated_docs_str = ""
    for index, (doc, score) in enumerate(sum_docs_to_search_with_scores):
        truncated_text = doc.page_content[:200].replace("\n", "")
        truncated_docs_str += f"Doc {index + 1} snippet (score: {score:.2f}): {truncated_text}\n------\n"

    return answer, truncated_docs_str


# Function to determine which Function to use: qa or summarization along with the vars needed (order must match the list order provided to gradio)
def process_input(mode, query, doc_title, temperature, k):
    if mode == "Question/Answer":
        return qa_function(query, doc_title, temperature, k)
    elif mode == "Summarization":
        return summarization_function(query, doc_title, temperature, k)
    else:
        return "Invalid mode selected", ""


# The main function to launch the gradio interface
def main():
    with gr.Interface(
        fn=process_input,
        inputs=[
            gr.Radio(choices=["Question/Answer", "Summarization"], label="Model Interaction Mode", default="Summarization"),
            gr.Textbox(lines=1, label="Question"),
            gr.Dropdown(choices=doc_titles, label="Filter by AAR title", info="Filter by after-action report name"),
            gr.Slider(0, 1, step=0.1, value=0, label="Model Temp.", info="0=More Precise, 1=Greater degree of freedom"),
            gr.Slider(3, 20, step=1, value=5, label="Docs to Search", info="Number of AAR document 'chunks' to search and summarize"),
        ],
        outputs=[
            gr.Textbox(label="Answer", lines=10),
            gr.Textbox(label="Docs presented to the LLM", info="A list of document 'chunks' from the AAR with its question-similarity score."),
        ],
        title="After-Action Report ChatGPT",
        examples=[
            ["Summarization", "Echeloning and it's use in Desert Shield and Storm.", "aar-desertStorm_text_cleaned"],
            ["Summarization", "Issues related to SEABEE uniforms.", "aar-desertStorm_text_cleaned"],
            ["Question/Answer", "How long did the air war last?", "aar-desertStorm_text_cleaned"]
        ],
    ) as iface:
        iface.launch()

if __name__ == '__main__':
    main()





Running on local URL:  http://127.0.0.1:7881

To create a public link, set `share=True` in `launch()`.


CREDIT: Many thanks to Gkamradt's work here: https://github.com/gkamradt/langchain-tutorials/blob/main/data_generation/Ask%20A%20Book%20Questions.ipynb


What was the issue with the SEABEEs uniform color?