In [None]:
'''
Changes:
- Removed Swarm Framework. It is pointless to use this since we initialise the LLM just once and this is quite a simple workflow.
- In the future, might re-implement if necessary

'''



import pandas as pd
import time
from langchain.schema import HumanMessage
from concurrent.futures import ThreadPoolExecutor
import logging
from langchain_ollama import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document
import ollama
from langchain.retrievers.multi_query import MultiQueryRetriever
import re

# Configure logging
logging.basicConfig(level=logging.INFO)

# Constants
DOC_PATH = "../data/BA_AirlineReviews.csv"
MODEL_NAME = "llama3.1:8b"
EMBEDDING_MODEL = "nomic-embed-text"
VECTOR_STORE_NAME = "simple-rag"

def load_data(data_frame):
    data_summary = [
        f"ReviewHeader: {row['ReviewHeader']}, ReviewBody: {row['ReviewBody']}"
        for _, row in data_frame.iterrows()
    ]
    documents = [Document(page_content=entry) for entry in data_summary]
    return documents


def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = text_splitter.split_documents(documents)
    logging.info("Documents split into chunks.")
    return chunks


def create_vector_db(chunks):
    ollama.pull(EMBEDDING_MODEL)
    vector_db = Chroma.from_documents(
        documents=chunks,
        embedding=OllamaEmbeddings(model=EMBEDDING_MODEL),
        collection_name=VECTOR_STORE_NAME,
    )
    logging.info("Vector database created.")
    return vector_db


def extract_themes_from_feedback(df):
    documents = load_data(df)
    chunks = split_documents(documents)
    vector_db = create_vector_db(chunks)
    llm = ChatOllama(model=MODEL_NAME)

    QUERY_PROMPT = ChatPromptTemplate.from_template("""
    You are an AI language model assistant specializing in detailed text analysis. 
    Identify and retrieve all relevant themes from the text database based on the question:
    Original question: {question}
    Provide a comprehensive list of themes identified from the database.
    Your themes will be passed onto a function for row by row classification.
    Therefore you must *ALWAYS* return the themes in this format:
    1. Theme Name: Explanation
    2. Theme Name: Explanation
    """)

    retriever = MultiQueryRetriever.from_llm(vector_db.as_retriever(), llm, prompt=QUERY_PROMPT)

    template = """Answer the question based ONLY on the following context:
    {context}
    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)

    chain = (
        {"context": retriever, 
         "question": RunnablePassthrough()} 
        | prompt 
        | llm 
        | StrOutputParser()
    )

    question = "What are the overall themes from the feedback?"
    response = chain.invoke(input=question)
    themes = response.strip()

    pattern = r'(\d+\.\s[^:]+:\s[^0-9]+)'
    matches = re.findall(pattern, themes)

    themes_column_name_pattern = r'\d+\.\s\*\*([^:]+)\*\*:'
    match_column_name = re.findall(themes_column_name_pattern, themes)

    logging.info("Themes extracted successfully.")
    print("Extracted Themes:", match_column_name)
    print("Extracted Themes and explanation:", matches)
    return matches, match_column_name


def classify_row_with_theme(row, theme, chat_model):
    text = row["ReviewBody"]
    prompt = f"Does the following text mention issues related to '{theme}'? *ONLY* respond with 'Yes' or 'No'. Text: '{text}'"
    try:
        response = chat_model.invoke([HumanMessage(content=prompt)])
        response_text = response.content.strip()
    except Exception as e:
        logging.error(f"Error processing row {row['RowID']}: {e}")
        response_text = "Cant classify"
    return row['RowID'], response_text


def classify_all_rows_with_theme(df, theme, chat_model):
    theme_tags = []
    with ThreadPoolExecutor(max_workers=5) as executor:
        async_tasks = {
            executor.submit(classify_row_with_theme, row, theme, chat_model): row["RowID"]
            for _, row in df.iterrows()
        }

        for task in async_tasks:
            row_id = async_tasks[task]
            try:
                result = task.result()
                theme_tags.append(result[1])
            except Exception as e:
                logging.error(f"Cannot classify row {row_id}: {e}")
                theme_tags.append("Cant classify")

    df[f'{theme}'] = theme_tags
    return df


def run_demo_loop():
#def run_demo_loop(agent_a, agent_b):
    print("Starting Ollama Swarm CLI 🐝")

    df = pd.read_csv(DOC_PATH)
    df = df.sample(n=100)
    df = df.rename(columns={"Unnamed: 0": "RowID"})

    themes, theme_columns = extract_themes_from_feedback(df)
    print(theme_columns)

    chat_model = ChatOllama(model=MODEL_NAME)

    for theme in theme_columns:
        df = classify_all_rows_with_theme(df, theme, chat_model)

    print("Classification complete. Data saved to CSV.")
    df.to_csv("../data/classified_feedback.csv", index=False)


if __name__ == "__main__":
    run_demo_loop()

INFO:root:Documents split into chunks.


Starting Ollama Swarm CLI 🐝


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/pull "HTTP/1.1 200 OK"
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:root:Vector database created.
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.multi_query:Generated queries: ['After analyzing the text database, I have identified the following overall themes from the feedback:', "1. **Satisfaction with Product/Service**: This theme encompasses feedback that expresses customers' satisfaction or dissatisfaction with the quality, features, and performance of a product or service.", '2. **Customer Support Experience**: This theme includes feedback related to interactions with customer support teams, including their helpfulness, responsiveness, and problem-solving abilities.', '3. **Va

Extracted Themes: ['Poor customer service', 'Inconsistent experience', 'Refund issues', 'Aging amenities', 'Disorganization and lack of transparency']
Extracted Themes and explanation: ['1. **Poor customer service**: Several reviews mention a lack of helpfulness and understanding from British Airways staff, particularly in resolving issues with refunds.\n', '2. **Inconsistent experience**: Some reviews highlight inconsistencies between flights, such as differences in food quality, entertainment options, and crew behavior.\n', '3. **Refund issues**: Multiple reviews express frustration with the difficulty or refusal to process refunds for cancelled flights.\n', '4. **Aging amenities**: A few reviews mention outdated or non-functional entertainment systems, USB sockets, and other onboard features.\n', '5. **Disorganization and lack of transparency**: Some reviewers were left in the dark about flight status updates, cancellations, or changes, leading to frustration and delays.\n\nThese th

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127

Classification complete. Data saved to CSV.
