In [52]:
import chromadb
import os
from langchain_ollama import OllamaEmbeddings, OllamaLLM
from pathlib import Path

llm_model_name = "gemma3"

In [3]:
# Configure ChromaDB
# Initialize the ChromaDB client with persistent storage in the current directory
chroma_client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), "chroma_db"))

In [39]:


# Define a custom embedding function for ChromaDB using Ollama
class ChromaDBEmbeddingFunction:
    """
    Custom embedding function for ChromaDB using embeddings from Ollama.
    """

    def __init__(self, langchain_embeddings):
        self.langchain_embeddings = langchain_embeddings

    def __call__(self, input):
        # Ensure the input is in a list format for processing
        if isinstance(input, str):
            input = [input]
        return self.langchain_embeddings.embed_documents(input)

    # Initialize the embedding function with Ollama embeddings


embedding = ChromaDBEmbeddingFunction(
    OllamaEmbeddings(
        model="llama3",
        base_url="http://localhost:11434"  # Adjust the base URL as per your Ollama server configuration
    )
)


In [78]:
# Define a collection for the RAG workflow
collection_name = "rag_philosophy_cosine"
collection = chroma_client.get_or_create_collection(
    name=collection_name,
    metadata={
        "description": "A collection for RAG with Ollama - Philosophy without any gaps",
        "hnsw:space": "cosine",
        "hnsw:search_ef": 200
    },
    embedding_function=embedding  # Use the custom embedding function
)

In [99]:
# Load transcriptions

def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()

root = os.path.join(os.getcwd(), "transcriptions")

for root, _, files in os.walk(root):
    contents = []
    paths = []
    metadatas = []
    for file in files:
        path = os.path.join(root, file)
        paths.append(path)
        file_name = Path(file).stem
        episode = file_name.split(' - ')[0]
        title = file_name.split(' - ')[1]
        contents.append(read_file(path))
        metadatas.append({'episode': episode, 'title': title})
        print(episode, title)

Bonus Episode 001 Glenn Adamson on Material Intelligence
Bonus Episode 002 Don’t Think for Yourself, Chapter 1
Filling the Gaps a Brief History of Nothing
HoP 000 Democracy and the History of Philosophy
HoP 001 Everything is Full of Gods 
HoP 002 Infinity and Beyond 
HoP 003 Created In Our Image
HoP 004 The Man With The Golden Thigh
HoP 005 Old Man River
HoP 006 MM McCabe on Heraclitus
HoP 007 The Road Less Traveled
HoP 008 You Can't Get There From Here
HoP 009 The Final Cut
HoP 010  Mind Over Mixture
HoP 011 All You Need Is Love, and Five Other Things
HoP 012 Malcolm Schofield on the Presocratics
HoP 013 Good Humor Men
HoP 014 Making the Weaker Argument the Stronger
HoP 015 Socrates without Plato
HoP 016 Method Man
HoP 017 Raphael Woolf on Socrates
HoP 018 In Dialogue
HoP 019 Know Thyself
HoP 020 Virtue Meets Its Match
HoP 021 We Don't Need No Education
HoP 022 I Know Because The Caged Bird Sings
HoP 023 MM McCabe on Knowledge in Plato
HoP 024 Famous Last Words
HoP 025 Soul and The Ci

In [100]:
collection.add(
    documents=contents,
    ids=paths,
    metadatas=metadatas
)

In [62]:
# Function to query the ChromaDB collection
def query_chromadb(query_text, n_results=1):
    """
    Query the ChromaDB collection for relevant documents.

    Args:
        query_text (str): The input query.
        n_results (int): The number of top results to return.

    Returns:
        list of dict: The top matching documents and their metadata.
    """
    results = collection.query(
        query_texts=[query_text],
        n_results=n_results
    )
    return results["documents"], results["metadatas"]


# Function to interact with the Ollama LLM
def query_ollama(prompt):
    """
    Send a query to Ollama and retrieve the response.

    Args:
        prompt (str): The input prompt for Ollama.

    Returns:
        str: The response from Ollama.
    """
    llm = OllamaLLM(model=llm_model_name)
    return llm.invoke(prompt)


# RAG pipeline: Combine ChromaDB and Ollama for Retrieval-Augmented Generation
def rag_pipeline(query_text):
    """
    Perform Retrieval-Augmented Generation (RAG) by combining ChromaDB and Ollama.

    Args:
        query_text (str): The input query.

    Returns:
        str: The generated response from Ollama augmented with retrieved context.
    """
    # Step 1: Retrieve relevant documents from ChromaDB
    retrieved_docs, metadata = query_chromadb(query_text)
    context = " ".join(retrieved_docs[0]) if retrieved_docs else "No relevant documents found."

    # Step 2: Send the query along with the context to Ollama
    augmented_prompt = f"Context: {context}\n\nQuestion: {query_text}\nAnswer:"
    print("######## Augmented Prompt ########")
    print(augmented_prompt)

    response = query_ollama(augmented_prompt)
    return response

In [None]:
collection.embedding

In [101]:
query = "aurelius?"  # Change the query as needed
results = collection.query(
    query_texts=[query],
    n_results=3
)

In [102]:
results

{'ids': [['C:\\Users\\joep\\PycharmProjects\\PhiliosophyGAN\\transcriptions\\HoP 153 - A Matter of Taste – Ibn Arabi and Mysticism.txt',
   'C:\\Users\\joep\\PycharmProjects\\PhiliosophyGAN\\transcriptions\\HoP 134 - Balancing Acts - Arabic Ethical Literature.txt',
   'C:\\Users\\joep\\PycharmProjects\\PhiliosophyGAN\\transcriptions\\HoP 212 - Like Father, Like Son - Debating the Trinity.txt']],
 'embeddings': None,
 'documents': [[" Hi, I'm Peter Adamson and you're listening to the History of Philosophy podcast, brought to you with the support of the LMU in Munich, online at www.historyoffilosophy.net. Today's episode, A Matter of Taste, Ibn Arabi and Mysticism. Never let it be said that an obsession with Aristotle prevents you from getting out and meeting people. Take a Verruise. There are not one, but two famous stories about his encounters with contemporaries. We've already heard the one about his audience with the Almohad emir. Here's the other one. Averruise has heard tell of a y