In [None]:
from os import environ
import configs.config as config

ZILLIZ_ENDPOINT = config.ZILLIZ_ENDPOINT # cluster endpoint
ZILLIZ_USER = config.ZILLIZ_USER # cluster username
ZILLIZ_PASS = config.ZILLIZ_PASS # password
OPENAI_API_KEY = config.OPENAI_API_KEY # OpenAI API key

# Set up environment variables
environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
# use CSVLoader
loader = CSVLoader("CGU_QA.csv",source_column="Topic")

docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
# vectorize the documents using OpenAI's text-embedding
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [None]:


connection_string = f"postgresql+psycopg://{{config.POSTGRES_USER}}:{{config.POSTGRES_PASSWORD}}@{{config.POSTGRES_HOST}}:{{config.POSTGRES_PORT}}/{{config.POSTGRES_DB}}"

In [None]:
from langchain_postgres import PGVector
import json

# Setup database connection and vector store
vector_store = PGVector(
    embeddings=embeddings,
    connection=connection_string,
    table_name="vectors",
    vector_column="vector"
)

In [None]:
# Store documents with embeddings and metadata
for doc in docs:
    embedding_vector = vector_store.embeddings.embed(doc.page_content)
    vector_store.insert({
        'vector': embedding_vector,
        'metadata': json.dumps(doc.metadata)
    })

In [None]:
from langchain.chains import QAWithSourcesChain
from langchain.llms import OpenAI

# Initialize the QA chain
chain = QAWithSourcesChain(llm=OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True)

In [None]:
# Example query
query = "Do you know CGU?"
docs = vector_store.similarity_search(query, top_k=10)  # Assuming vector_store is configured correctly

# Execute the QA chain
res = chain.run({
    "input_documents": docs,
    "question": query
}, return_only_outputs=False)

print("Question：" + query + "\n")
print("Answer：" + res['output_text'])