In [7]:
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
# RetrievalQA
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

from langchain.callbacks import get_openai_callback
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter

import os

In [8]:
loader = PyPDFLoader('Employee-details-1.pdf')
documents = loader.load()

persist_directory = 'db'

embedding = OpenAIEmbeddings(api_key=os.getenv('OPENAPI_KEY'))

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Create the Chroma vector store
vectordb = Chroma.from_documents(documents=texts, embedding=embedding)

 

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

qa = RetrievalQA.from_chain_type(
    llm, retriever=vectordb.as_retriever()#, chain_type_kwargs={"prompt": prompt}
)
#query = "Tell me something about each character"

prompts = [
    "What can you make of this document?",
    "Who is David Austin and what is his salary?",
    "Name an employee from China",
    "Name an employee from Europe"
]

for prompt in prompts:
    response = qa.invoke(prompt)
    print(f"Prompt: {prompt}\nResponse: {response}\n")

Prompt: What can you make of this document?
Response: {'query': 'What can you make of this document?', 'result': "This document appears to be a snippet of a database table containing information about employees. Each row represents an employee with details such as their ID, name, job title, department, hire date, salary, commission, manager ID, and department ID. The employees seem to be categorized into different departments like Administration (AD), Information Technology (IT), and Sales (SA). The document provides specific information about each employee's role, salary, and other relevant details within the organization."}

Prompt: Who is David Austin and what is his salary?
Response: {'query': 'Who is David Austin and what is his salary?', 'result': 'David Austin is an employee with the employee_id 105. His salary is 4800.'}

Prompt: Name an employee from China
Response: {'query': 'Name an employee from China', 'result': "I don't have information about any employees from China in t

In [9]:
def run_with_conversation_buffer(chain, query, conversation_buf):
    with get_openai_callback() as cb:
        result = chain.invoke(query)
        conversation_buf.memory.save_context({"input": query}, {"output": str(result)})
        print(f'Spent a total of {cb.total_tokens} tokens')

    return result

In [10]:
conversation_buf = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory()
)

# Example usage
queries = [
    "Who is David Austin?",
    "What is his salary?",
    "Is there someone else with the same first name?",
    "Is there someone with the same origin as him?"
]
for query in queries:
    response = run_with_conversation_buffer(qa, query, conversation_buf)
    print("Response:", response)

Spent a total of 1758 tokens
Response: {'query': 'Who is David Austin?', 'result': 'David Austin is an employee with employee ID 105 in the database provided. He works in the IT_PRO department, started on 25-JUN-1997, and has a salary of 4800.'}
Spent a total of 719 tokens
Response: {'query': 'What is his salary?', 'result': 'To determine the salary of the individual in question, we need to know the specific employee_id or first and last name of the person you are referring to. Without that information, it is not possible to provide the exact salary.'}
Spent a total of 1303 tokens
Response: {'query': 'Is there someone else with the same first name?', 'result': 'Yes, there are multiple people with the same first name in the provided context. For example, there are two individuals named James in the list.'}
Spent a total of 1166 tokens
Response: {'query': 'Is there someone with the same origin as him?', 'result': 'I don\'t have enough information to determine who "him" refers to in your 

In [14]:
client = OpenAI(api_key=os.getenv('OPENAPI_KEY'))

query="You are an HR servant and need to provide answers in this format: name1,name2,name3, ...,namen from this pdf:"+str(documents)+". Please list the employees"

def get_employees():
    result = qa.invoke(query)

    return result['result']
    
print(get_employees().split(', '))


['Steven King', 'Neena Kochhar', 'Lex De Haan', 'Alexander Hunold', 'Bruce Ernst', 'David Austin', 'Valli Pataballa', 'Diana Lorentz', 'Nancy Greenberg', 'Daniel Faviet', 'John Chen', 'Ismael Sciarra', 'Jose Manuel Urman']


In [None]:
import streamlit as st

# Example OpenAI response in string format
openai_response_string = "Option 1, Option 2, Option 3"

# Split the string into a list using the comma as the separator
openai_response_list = openai_response_string.split(', ')

# Add the select box to the Streamlit app using the list
selected_option = st.selectbox('Select an option:', openai_response_list)

# Display the selected option
st.write('You selected:', selected_option)
