## Multidoc Minions

Examples for running the minions (vanilla) on a multidoc dataset (list of markdown files).


In [1]:
from local_rag_document_search import load_markdown_files

  from .autonotebook import tqdm as notebook_tqdm


chromadb is not installed. Please install it using `pip install chromadb`.


In [2]:
DOC_PATH = "data/meeting_summaries"

# Load the documents
file_contents, file_paths = load_markdown_files(DOC_PATH)

Loading markdown files from: data/meeting_summaries
Found 9 .md files
--------------------------------------------------
✓ Loaded: 1_q3_marketing_strategy_review.md (1872 chars)
✓ Loaded: 2_1on1_performance_review_mike_rodriguez.md (1710 chars)
✓ Loaded: 3_marketing_sales_alignment_meeting.md (1762 chars)
✓ Loaded: 4_website_redesign_vendor_evaluation.md (1826 chars)
✓ Loaded: 5_1on1_checkin_lisa_wang.md (1560 chars)
✓ Loaded: 6_executive_leadership_team_monthly_update.md (1875 chars)
✓ Loaded: 7_crisis_communication_planning_session.md (2028 chars)
✓ Loaded: 8_marketing_analytics_platform_demo.md (1833 chars)
✓ Loaded: 9_1on1_career_development_emma_davis.md (2196 chars)

Successfully loaded 9 documents


In [3]:
from minions.minions import Minions
from minions.clients.ollama import OllamaClient
from minions.clients.openai import OpenAIClient
from pydantic import BaseModel

class StructuredLocalOutput(BaseModel):
    explanation: str
    citation: str | None
    answer: str | None

LOCAL_MODEL_NAME = "qwen2.5:3b"
REMOTE_MODEL_NAME = "gpt-4o-mini"

local_client = OllamaClient(
                        model_name=LOCAL_MODEL_NAME,
                        temperature=0.0,
                        max_tokens=500,
                        num_ctx=4096,
                        use_async=False, # TODO: consider changing to True
                        structured_output_schema=StructuredLocalOutput
                    )
                    

remote_client = OpenAIClient(
    model_name=REMOTE_MODEL_NAME,
    temperature=0.0,
    max_tokens=4096
)

protocol = Minions(local_client, remote_client)

In [4]:
# # concat all the files into a single string using a "---" separator and the file paths as titles for each section 
# filenames_and_contents = []
# for filename, content in zip(file_paths, file_contents):
#     filenames_and_contents.append(f"Filename: {filename}")
#     filenames_and_contents.append(content)
#     filenames_and_contents.append("---DOC_SEPARATOR---")

# # join the list into a single string
# filenames_and_contents = "\n".join(filenames_and_contents)

# # print the result
# print(filenames_and_contents[:4000])

In [5]:
from minions.minions import Document
document_list = [Document(content=content, filename=filename) for filename, content in zip(file_paths, file_contents)]
print(document_list[0].content[:300])

## 1. Q3 Marketing Strategy Review - Meeting Summary

**Date:** October 15, 2024  
**Time:** 2:00 PM - 3:00 PM EST  
**Attendees:** Sarah Chen (VP Marketing), Mike Rodriguez (Digital Marketing Manager), Lisa Wang (Content Lead), James Mitchell (Analytics Manager), Emma Davis (Social Media Manager)




In [6]:

print("zoom AI companion meeting summaries with the following filename metadata: " + ", ".join([doc.filename for doc in document_list]))

output = protocol(
        task= "what is Lisa Wang's role in the company?",# "what is the close rate for enterprise leads versus SMB leads?",# "how many languages are supported by the new website?",
        doc_metadata= "a list of short zoom AI companion meeting summaries",# "a list of short zoom AI companion meeting summaries with the following filenames: " + ", ".join([doc.filename for doc in document_list]),
        context=document_list,
        max_rounds=5,  # you can adjust rounds as needed for testing
    )

zoom AI companion meeting summaries with the following filename metadata: data/meeting_summaries/1_q3_marketing_strategy_review.md, data/meeting_summaries/2_1on1_performance_review_mike_rodriguez.md, data/meeting_summaries/3_marketing_sales_alignment_meeting.md, data/meeting_summaries/4_website_redesign_vendor_evaluation.md, data/meeting_summaries/5_1on1_checkin_lisa_wang.md, data/meeting_summaries/6_executive_leadership_team_monthly_update.md, data/meeting_summaries/7_crisis_communication_planning_session.md, data/meeting_summaries/8_marketing_analytics_platform_demo.md, data/meeting_summaries/9_1on1_career_development_emma_davis.md

Task: what is Lisa Wang's role in the company?
Max rounds: 5
Retrieval: None
Round 1/5
def chunk_by_section(
    doc: str, max_chunk_size: int = 3000, overlap: int = 20
) -> List[str]:
    sections = []
    start = 0
    while start < len(doc):
        end = start + max_chunk_size
        sections.append(doc[start:end])
        start += max_chunk_size - o

KeyboardInterrupt: 

In [6]:
from minions.minions import Document
output = protocol(
        task= "how many 1:1 meetings did I hold in total?", # "how many languages are supported by the new website?",
        doc_metadata="a list of short zoom AI companion meeting summaries with the following file paths: " + ", ".join(file_paths),
        context=[Document(content=content, filename=filename) for filename, content in zip(file_paths, file_contents)],
        max_rounds=5,  # you can adjust rounds as needed for testing
    )


Task: how many 1:1 meetings did I hold in total?
Max rounds: 5
Retrieval: None
Round 1/5
def chunk_by_section(
    doc: str, max_chunk_size: int = 3000, overlap: int = 20
) -> List[str]:
    sections = []
    start = 0
    while start < len(doc):
        end = start + max_chunk_size
        sections.append(doc[start:end])
        start += max_chunk_size - overlap
    return sections

Attempt 1/10
```python
def prepare_jobs(
    context: List[Document],
    prev_job_manifests: Optional[List[JobManifest]] = None,
    prev_job_outputs: Optional[List[JobOutput]] = None,
) -> List[JobManifest]:
    job_manifests = []
    
    for document in context:
        chunks = chunk_by_section(document.content, max_chunk_size=500, overlap=20)
        
        for chunk in chunks:
            if "1on1" in document.filename:
                task = "Extract the names of individuals involved in the 1:1 meeting."
                advice = "Look for names mentioned in the context of the meeting."
         

KeyboardInterrupt: 