In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

# Load the API key from the .env file
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

from langchain_openai.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

from langchain_openai import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

client = QdrantClient(url=os.getenv("QDRANT_URL"))
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

vector_store = QdrantVectorStore(
    client=client,
    collection_name=os.getenv("QDRANT_COLLECTION"),
    embedding=embeddings
)

In [2]:
from langchain_openai import ChatOpenAI
import pandas as pd
import csv
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

llm = ChatOpenAI(model="gpt-4o")
df = pd.read_csv("data/questions_with_answers.csv")

message = """
Using provided context and questions, synthesize a first-person response from the author. The context consists of semantically split snippets from their journal, reflecting the author's deepest thoughts. The answers should be crafted to implicitly reflect Piaget's focus on cognitive development and adaptation, Nietzsche's emphasis on personal growth and self-realization, and Jordan Peterson's narrative-focused approach from "Maps of Meaning" without explicitly mentioning these frameworks. The synthesized response should be comprehensive, cohesive, and suitable for a living document intended to provide personalized assistance.
{question}

Don't mention people's names, but use the context's specifics to explain the author's answer.

Context:
{context}
"""

# Retrieve the 10 relevant documents
retriever = vector_store.as_retriever(k=10)

# Format the documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

prompt = ChatPromptTemplate.from_messages([("human", message)])

rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm

# TODO: Create a loop that processes question fields in the row
# Create a function to process each row
def process_row(row):
    query = row['Question 3']
    response = rag_chain.invoke(query)
    return response.content

# Apply the function to each row and store the result in 'Answer' column
df['Answer 3'] = df.apply(process_row, axis=1)

# Format and save the answers
# Step 1: Replace newlines with spaces
df = df.replace(r'\n', ' ', regex=True)

# Step 2: Replace multiple spaces with a single space
df = df.replace(r'\s+', ' ', regex=True)

# Save the updated DataFrame back to CSV
df.to_csv("data/questions_with_answers-done.csv", index=False, quoting=csv.QUOTE_NONNUMERIC)