In [1]:
1+1


2

In [2]:
import os
import xml.etree.ElementTree as ET


In [3]:
def extract_summary_from_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Extract all text from the XML (adjust tag logic if needed)
    text_chunks = []
    for elem in root.iter():
        if elem.text and elem.text.strip():
            text_chunks.append(elem.text.strip())
    
    return " ".join(text_chunks)


In [4]:
def load_all_summaries(folder_path):
    summaries = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".abssumm.xml"):
            full_path = os.path.join(folder_path, filename)
            summary_text = extract_summary_from_xml(full_path)
            if summary_text:
                summaries.append({
                    "filename": filename,
                    "text": summary_text
                })
    return summaries


In [5]:
# Update with the path to your `abstractive` folder
ami_folder = r"ami_public_manual_1.6.2\abstractive"
summaries = load_all_summaries(ami_folder)

# Quick check
print(f"Loaded {len(summaries)} summaries")
print(summaries[0]['filename'])
print(summaries[0]['text'][:500])  # Preview first 500 characters


Loaded 142 summaries
ES2002a.abssumm.xml
The project manager introduced the upcoming project to the team members and then the team members participated in an exercise in which they drew their favorite animal and discussed what they liked about the animal. The project manager talked about the project finances and selling prices. The team then discussed various features to consider in making the remote. The industrial designer will work on the working design of the remote. The user interface designer will work on the technical functions 


In [6]:
!pip install transformers --quiet


In [7]:
from transformers import GPT2TokenizerFast

# Load GPT-2 tokenizer (used by OpenAI for estimating token counts)
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")


In [8]:
def chunk_text(text, max_tokens=500):
    words = text.split()
    chunks = []
    current_chunk = []
    current_tokens = 0

    for word in words:
        token_len = len(tokenizer.encode(word, add_special_tokens=False))
        if current_tokens + token_len > max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = [word]
            current_tokens = token_len
        else:
            current_chunk.append(word)
            current_tokens += token_len

    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks


In [9]:
chunked_data = []

for summary in summaries:
    chunks = chunk_text(summary["text"], max_tokens=500)
    for i, chunk in enumerate(chunks):
        chunked_data.append({
            "filename": summary["filename"],
            "chunk_id": i,
            "text": chunk
        })

print(f"Total chunks created: {len(chunked_data)}")
print(chunked_data[0])


Total chunks created: 160
{'filename': 'ES2002a.abssumm.xml', 'chunk_id': 0, 'text': 'The project manager introduced the upcoming project to the team members and then the team members participated in an exercise in which they drew their favorite animal and discussed what they liked about the animal. The project manager talked about the project finances and selling prices. The team then discussed various features to consider in making the remote. The industrial designer will work on the working design of the remote. The user interface designer will work on the technical functions of the remote. The marketing executive will work on what requirements the remote has to fulfill The remote will sell for 25 Euro. The remote will be sold on an international scale. The production costs cannot exceed 12.50 Euro. Whether the remote will be used exclusively for televisions.'}


In [10]:
pip install sentence-transformers


Note: you may need to restart the kernel to use updated packages.


In [11]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Fast, small, good quality

def get_local_embedding(text):
    return embedding_model.encode(text)

In [12]:
texts = [item['text'] for item in chunked_data]
embeddings = [get_local_embedding(text) for text in texts]


In [13]:
import faiss
import numpy as np

dimension = len(embeddings[0])  # should be 384 for MiniLM
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype('float32'))

print(f"FAISS index contains {index.ntotal} vectors")


FAISS index contains 160 vectors


In [14]:
!pip install requests --quiet

In [15]:
import requests

TOGETHER_API_KEY = "cc4b628095c0531f06fe08ff20e1f0bad8cf4e6c39ed2b3c70744a6278a7faab"  # paste the key from your dashboard

def generate_answer(prompt, model="mistralai/Mixtral-8x7B-Instruct-v0.1"):
    url = "https://api.together.xyz/v1/completions"
    headers = {
        "Authorization": f"Bearer {TOGETHER_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": model,
        "prompt": prompt,
        "max_tokens": 512,
        "temperature": 0.3,
        "top_p": 0.9
    }

    response = requests.post(url, headers=headers, json=payload)
    result = response.json()

    try:
        return result["choices"][0]["text"]
    except Exception as e:
        print("❌ Error parsing Together.ai response:")
        print(result)
        return "No output returned."



In [16]:
def search_faiss(query, top_k=5):
    query_embedding = get_local_embedding(query)
    D, I = index.search(np.array([query_embedding]).astype('float32'), top_k)
    return [chunked_data[i]['text'] for i in I[0]]


In [17]:
def ask_rag_agent(query):
    top_chunks = search_faiss(query, top_k=5)
    context = "\n\n".join(top_chunks)

    prompt = f"""You are a helpful meeting assistant.
Based on the following context from AMI meeting summaries, answer the question.

Context:
{context}

Question:
{query}

Answer:"""

    return generate_answer(prompt)


In [18]:
response = ask_rag_agent("What were the key decisions made in meeting TS3008a?")
print(response)



 In meeting TS3008a, the following key decisions were made:

1. The remote will not include teletext and will be only for TV use.
2. The target group is users under 40.
3. The remote will feature the corporate logo and color.
4. The remote will have an LCD screen and speech recognition.
5. The advanced functions such as audio settings, contrast and channel programming will be accessed through the screen.
6. The remote will have buttons for basic functions and make the advanced functions accessible through the screen.
7. The remote will have buttons for power, volume selection, channel selection, digits from zero to nine, mute, a menu for teletext, a battery indicator, to access double digits, screen settings, channel settings, and audio settings.
8. The remote will have a rechargeable battery which comes with a charger.
9. The remote will allow users to have the possibility to enlarge the buttons.
10. The remote will have a general menu with the most used functions.
11. The remote wil

In [19]:
response = ask_rag_agent("Who was the main speaker in ES2002b and What were the usability concerns in TS3010b")
print(response)


The main speaker in ES2002b was the User Interface Designer, who presented the major components of the interface design, dividing the interface into two parts: voice commands and buttons. The Marketing Expert was also a key speaker, as she reported on research which shows that users think most remotes are ugly, easily lost and bad for RSI. Audio settings are rarely used, and the power, channel and volume buttons are used most often. The remote should be user-friendly and have a good look and feel.

The usability concerns in TS3010b were that remotes were too difficult to use, users want fancier and more ergonomic designs, shock protection, voice recognition, and LCD screens. The Project Manager also announced a new requirement that the remote is only to control televisions. The group decided to eliminate the LCD screen and voice recognition from the design due to time and cost restraints. They also decided to include a previous channel change button to the standard remote buttons, and

In [26]:
response = ask_rag_agent("TS3010b give the action items of this meeting, and who is responsible for what and write the name of person wherever possible?")
print(response)


Action Items from the Meeting:

1. Gather more information for the next meeting, the functional design meeting. (All Participants)
2. Decide on the inclusion of speech recognition in the design. (All Participants)
3. Determine the target group and features to attract them. (All Participants)
4. Discuss and decide on the buttons for the remote control. (All Participants)
5. Create the design of the remote control. (To be decided in the next meeting)
6. Industrial Designer: Work on the working design and technical function.
7. User Interface Designer: Work on the working design and functional design.
8. Marketing Manager: Look for user requirement specifications such as friendliness, selling price, and profit.
9. Consider the possibility of a touch screen, LCD, and other functions. (All Participants)
10. Fill out the questionnaire. (All Participants)
11. Receive specific instructions for the next meeting by email. (All Participants)
