In [1]:
!pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: sacremoses
Successfully installed sacremoses-0.1.1


In [2]:
import os
import glob
import re
import json
import string
import itertools


from sacremoses import MosesTokenizer, MosesDetokenizer

# -------------------HERE------------------

# # Run this for me
INPUT_FILE = "/kaggle/input/d/amerasaad/transcripts/Bdb001.interaction.txt"

# OUTPUT_DIR = "/kaggle/working/preprocessed_data/"

# OUTPUT_FILE = "preprocessed_trunscript"

# Run this for you

BASE_PATH = os.getcwd()
# INPUT_FILE = os.path.join(BASE_PATH, 'transcripts', 'Bdb001.interaction.txt')
OUTPUT_DIR = os.path.join(BASE_PATH, 'preprocessed_data')
OUTPUT_FILE = 'preprocessed_trunscript'
# -----------------------------------------

In [3]:
def read_transcripts(meetings_file):
    transcript = ""

    with open(meetings_file, "r", encoding="utf-8") as f:
        transcript = f.read().splitlines()

    return transcript

In [4]:
transcript = read_transcripts(INPUT_FILE)
# en_automin2023 = read_transcripts(os.path.join(ELITR_AUTOMIN_2023_DIR, AUTOMIN_EN_DIR))

In [5]:
transcript

["\ufeffSpeaker 1: Yeah, we had a long discussion about how easy we want to make it for people to bleep things out.  Morgan wants to make it hard.  I didn't even check yesterday.  ",
 '',
 "Speaker 0: It didn't move yesterday either when I started it.  I don't know if it doesn't like both of them.  ",
 '',
 'Speaker 1: Channel 3?  ',
 '',
 "Speaker 0: You know, I discovered something yesterday on these wireless ones.  You can tell if it's picking up breath noise and stuff.  ",
 '',
 'Speaker 1: Yeah, it has a little indicator on it, on the AF.  ',
 '',
 "Speaker 0: So if you breathe under, breathe, and then you see AF go off, then you know it's picking up your mouth noise.  ",
 '',
 "Speaker 2: Oh, that's good, because we have a lot of breath tests.  In fact, if you listen to just the channels of people not talking, it's like...  ",
 '',
 'Speaker 1: What, did you see Hannibal recently or something?  Exactly.  ',
 '',
 "Speaker 2: Very disconcerting.  Okay.  So I was going to try to ge

In [6]:
import re

def parse_transcript_by_speaker(transcript):
    parsed_transcript = []

    for line in transcript:
        # Match lines starting with "Speaker X:"
        match = re.match(r"^Speaker\s+(\d+):\s*(.*)", line)
        if match:
            speaker_id, utterance = match.group(1), match.group(2).strip()
            parsed_transcript.append({
                "role": f"Speaker {speaker_id}",
                "utterance": [utterance]
            })
        elif len(parsed_transcript) > 0:
            # Continuation of previous speaker
            parsed_transcript[-1]["utterance"].append(line.strip())

    return parsed_transcript


In [7]:
import re
import string
import itertools
from sacremoses import MosesTokenizer, MosesDetokenizer

def remove_asr_errors(tokens):
    # Add ^ and $ to anchor patterns to full token matches
    ASR_STOPWORDS = [
        r"^u+h+m*-?$", r"^m*h+m+-?$", r"^u+m+-?$",
        r"^e+h+m*-?$", r"^e*m+-?$", r"^e+r+m+-?$",
        r"^a+h+$", r"^u+h+n+-?$", r"^h+u+(h|m)+-?$"
    ]
    ASR_STOPWORDS_COMBINATIONS = [
        f"{stop0[:-1]}-{stop1[1:]}"  # strip anchors when combining
        for stop0, stop1 in itertools.combinations(ASR_STOPWORDS, 2)
    ]

    # Remove ASR stopwords
    filtered_tokens = [
        token for token in tokens
        if not any(re.fullmatch(regex, token.lower()) for regex in ASR_STOPWORDS + ASR_STOPWORDS_COMBINATIONS)
    ]

    # Handle dangling hyphens
    filtered_tokens2 = []
    for idx, token in enumerate(filtered_tokens):
        if token == "-" or not token.endswith("-"):
            filtered_tokens2.append(token)
        elif idx == len(filtered_tokens)-1 or not filtered_tokens[idx+1].lower().startswith(token[:-1].lower()):
            filtered_tokens2.append(token[:-1])

    return filtered_tokens2

def remove_tags(text):
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(r"\(?\)", "", text)
    text = re.sub(r"[\[\]()]", "", text)
    return text

def is_punct(s):
    return all(c in string.punctuation + "–" for c in s)

def normalize_text(text):
    text = remove_tags(text)

    tokenizer = MosesTokenizer(lang="en")
    detokenizer = MosesDetokenizer(lang="en")

    tokens = tokenizer.tokenize(text)
    tokens = remove_asr_errors(tokens)

    # Remove punctuation at start
    try:
        first_non_punct_idx = next(idx for idx, token in enumerate(tokens) if not is_punct(token))
        tokens = tokens[first_non_punct_idx:]
    except StopIteration:
        tokens = []

    if tokens:
        # Remove consecutive duplicates (case-insensitive)
        tokens = [
            token for idx, token in enumerate(tokens)
            if idx == 0 or token.lower() != tokens[idx - 1].lower()
        ]

        # Remove consecutive punctuations
        tokens = [
            token for idx, token in enumerate(tokens)
            if idx == len(tokens) - 1 or not is_punct(tokens[idx]) or not is_punct(tokens[idx + 1])
        ]

        # Capitalize first word if it's alphabetic
        if tokens[0].isalpha():
            tokens[0] = tokens[0][0].upper() + tokens[0][1:]

        # End sentence with period if it doesn't already
        if not is_punct(tokens[-1][-1]):
            tokens.append(".")

    return detokenizer.detokenize(tokens)


In [8]:
def preprocess_transcript(transcript):
    roles = []
    utterances = []


    for line in transcript:
        normalized_utterance = [normalize_text(sentence) for sentence in line["utterance"]]
        normalized_utterance = " ".join(sentence for sentence in normalized_utterance if len(sentence) > 0)

        if len(normalized_utterance) > 0:
            roles.append(line["role"])
            utterances.append(normalized_utterance)

    assert len(roles) == len(utterances)
    return {"roles": roles, "utterances": utterances}

In [9]:
def preprocess_transcripts(transcripts):
    preprocessed_transcripts = {}

    for meeting_id, transcript in transcripts.items():
        preprocessed_transcript = parse_transcript_by_speaker(transcript)
        preprocessed_transcripts[meeting_id] = preprocess_transcript(preprocessed_transcript)

    return preprocessed_transcripts

In [10]:
parsed_transcript = parse_transcript_by_speaker(transcript)
input_preprocessed = preprocess_transcript(parsed_transcript)


In [11]:
def save_preprocessed(preprocessed):
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    with open(os.path.join(OUTPUT_DIR, f"{OUTPUT_FILE}.json"), "w") as f:
        json.dump(preprocessed, f, ensure_ascii=False, indent=4)

In [12]:
save_preprocessed(input_preprocessed)


In [13]:
# !zip -r preprocessed_data.zip /content/preprocessed_data


In [14]:
# -------------------------HERE------------------------
# Run this for me
INPUT_DATA_PATH = "/kaggle/working/preprocessed_data/preprocessed_trunscript.json"
OUTPUT_DATA_PATH = "/kaggle/working/output"

# Run this for you

# BASE_PATH = os.getcwd()
# INPUT_DATA_PATH = os.path.join(BASE_PATH, 'preprocessed_data', 'preprocessed_trunscript.json')
# OUTPUT_DATA_PATH = os.path.join(BASE_PATH, 'output')
# -----------------------------------------------------

# MODEL_SHORT_NAME = "bart_large_xsum_samsum"
# MODEL_SHORT_NAME = "MEETING_SUMMARY"
MODEL_SHORT_NAME = "bart-large-cnn-samsum"

# MODEL = f"facebook/bart-large-xsum"
# MODEL = f"knkarthick/{MODEL_SHORT_NAME}"
MODEL = f"philschmid/{MODEL_SHORT_NAME}"

# SUMMARIZER_MODEL = f"models/{MODEL_SHORT_NAME}/checkpoint-5500"
SUMMARIZER_MODEL = MODEL

In [15]:
import torch
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
from nltk.tokenize import sent_tokenize
from sklearn.metrics.pairwise import cosine_similarity

import datetime
import json
import os
import re
import numpy as np
import pandas as pd
import networkx as nx
import math
import nltk
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [16]:
if torch.cuda.is_available():
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name())
    torch.cuda.set_device(0)
else:
    print('No GPU available, using the CPU instead.')

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


In [17]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)
device = 0 if torch.cuda.is_available() else None
summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=device)

tokenizer_config.json:   0%|          | 0.00/300 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Device set to use cuda:0


In [18]:
def load_preprocessed_transcripts(file_name):
  with open(f"{file_name}", "r") as f:
    preprocessed_transcripts = json.load(f)

  return preprocessed_transcripts

In [19]:
input_preprocessed = load_preprocessed_transcripts(INPUT_DATA_PATH)
# europarl_preprocessed = load_preprocessed_transcripts(os.path.join(PREPROCESSED_DIR, EUROPARL_DATA_PATH))

In [20]:
input_preprocessed

{'roles': ['Speaker 0',
  'Speaker 1',
  'Speaker 0',
  'Speaker 1',
  'Speaker 0',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 3',
  'Speaker 1',
  'Speaker 3',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 3',
  'Speaker 1',
  'Speaker 2',
  'Speaker 3',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 3',
  'Speaker 1',
  'Speaker 3',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 0',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 0',
  'Speaker 1',
  'Speaker 3',
  'Speaker 0',
  'Speaker 1',
  'Speaker 3',
  'Speaker 2',
  'Speaker 3',
  'Speaker 2',
  'Speaker 3',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 3',
  'Speaker 0',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
  'Speaker 2',
  'Speaker 1',
 

In [21]:
def segment_transcript(max_input_length, transcript, tokenizer):
  def split_line(line, role, tokenizer):
    splits = []

    sentences = sent_tokenize(line)
    split_idx = len(sentences)//2
    line1 = " ".join(sentences[:split_idx]) + '.\n'
    line2 = role + ": " + " ".join(sentences[split_idx:])

    for line in [line1, line2]:
      if len(tokenizer.encode(line)) >= max_input_length:
        splits += split_line(line, role, tokenizer)
      else:
        splits.append(line)

    return splits

  roles = transcript['roles']
  attendees = sorted(list(set(roles)))
  utterances = transcript['utterances']
  segmented_transcript = [""]

  for role, utterance in zip(roles, utterances):
    line = role + ': ' + utterance + '\n'
    # TODO remove short lines?
    tokenized_line = tokenizer.encode(line)

    if len(tokenized_line)>=max_input_length:
        line_splits = split_line(line, role, tokenizer)
    else:
        line_splits = [line]

    for line_split in line_splits:
        tokenized = tokenizer.encode(segmented_transcript[-1]+line_split)
        if len(tokenized)>=max_input_length:
            segmented_transcript.append(line_split)
        else:
            segmented_transcript[-1] += line_split

  return segmented_transcript, attendees

In [22]:

segmented_transcript_long, attendees_long = segment_transcript(512, input_preprocessed, tokenizer)
segmented_transcript_avg, _ = segment_transcript(768, input_preprocessed, tokenizer)
segmented_transcript_short, _ = segment_transcript(1024, input_preprocessed, tokenizer)

print(segmented_transcript_short[0])

Token indices sequence length is longer than the specified maximum sequence length for this model (1043 > 1024). Running this sequence through the model will result in indexing errors


Speaker 0: It didn't move yesterday either when I started it. I don't know if it doesn't like both of them.
Speaker 1: Channel 3?
Speaker 0: You know, I discovered something yesterday on these wireless ones. You can tell if it's picking up breath noise and stuff.
Speaker 1: Yeah, it has a little indicator on it, on the AF.
Speaker 0: So if you breathe under, breathe, and then you see AF go off, then you know it's picking up your mouth noise.
Speaker 2: Oh, that's good, because we have a lot of breath tests. In fact, if you listen to just the channels of people not talking, it's like...
Speaker 1: What, did you see Hannibal recently or something? Exactly.
Speaker 2: Very disconcerting. Okay. So I was going to try to get out of here in half an hour because I really appreciate people coming. And the main thing that I was going to ask people to help with today is to... give input on what kinds of database format we should use in starting to link up things like word transcripts and annotati

In [23]:
print(len(segmented_transcript_short))
print(len(segmented_transcript_avg))
print(len(segmented_transcript_long))

5
7
10


In [24]:
def summarize(input_text, summarizer):
  summarization = summarizer(input_text)[0]["summary_text"].strip()
  return summarization

def generate_summary(segmented_transcript, summarizer):
  summarized_segments = [summarize(transcript_segment, summarizer) for transcript_segment in segmented_transcript]
  return " ".join(summarized_segments)

In [25]:
# summary_short = generate_summary(segmented_transcript_short, summarizer)
# summary_short

In [26]:
# summary_avg = generate_summary(segmented_transcript_avg, summarizer)
# summary_avg

In [27]:
# -------------HERE-----------------
summary_long = generate_summary(segmented_transcript_long, summarizer)
print(summary_long)

# ----------------------------------

Speaker 1 and Speaker 2 are going to help Speaker 1 create a database format to link up word transcripts and annotations of word transcripts. Speaker 1 has already developed an XML format for this sort of thing. Speaker 2 is going to be standing up and drawing on the board. Speaker 1 describes a timeline with a single timeline, lots of different sections, each of which has IDs attached to it, and then you can refer from other sections to those IDs. The IDs are arbitrary assigned by a program, not by a user. There are also optional things like accuracy, id and stamping. Speaker 1 would use an existing way of doing an alignment for phone level. Speaker 2 would use a binary representation. Speaker 3 would use some file that configures how much information he wants in his XML or something. Speaker 1 doesn't want to do it. Speaker 1 would use pfile for word level data. Speaker 2 would use ICSI format for frame level representation of features. Speaker 3 would use text format. Speaker 0 was 

In [28]:
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [29]:
from nltk.tokenize import sent_tokenize
import datetime

def fix_entities(text):
    entity_map = {
        "Speaker": "SPEAKER",
        "Organization": "ORGANIZATION",
        "Project": "PROJECT",
        "Location": "LOCATION",
        "Annotator": "ANNOTATOR",
        "Url": "URL",
        "Number": "NUMBER",
        "Password": "PASSWORD",
        "Phone": "PHONE",
        "Path": "PATH",
        "Email": "EMAIL",
        "Other": "OTHER"
    }

    for key, val in entity_map.items():
        pattern = re.compile(rf"{key}\s*(\d+)", re.IGNORECASE)
        text = pattern.sub(lambda m: f"{val}{m.group(1)}", text)

    return text


def create_minutes(summary):
    summary = fix_entities(summary.strip())
    if not summary:
        return "- No summary available."
    sentences = sent_tokenize(summary)
    minutes = "\n".join([f"- {sent}" for sent in sentences])
    return minutes


def format_minutes(attendees, minutes):
    tday = datetime.date.today().strftime("%B %d, %Y")  # e.g. March 28, 2025
    att = ", ".join(attendees) if attendees else "N/A"
    return f"""DATE: {tday}
ATTENDEES: {att}


SUMMARY
{minutes}


Minuted by: Team Synapse"""


In [30]:
# ----------------------HERE---------------------
minutes_text = format_minutes(attendees_long, create_minutes(summary_long))

print(minutes_text)

# -----------------------------------------------

DATE: June 26, 2025
ATTENDEES: Speaker 0, Speaker 1, Speaker 2, Speaker 3


SUMMARY
- SPEAKER1 and SPEAKER2 are going to help SPEAKER1 create a database format to link up word transcripts and annotations of word transcripts.
- SPEAKER1 has already developed an XML format for this sort of thing.
- SPEAKER2 is going to be standing up and drawing on the board.
- SPEAKER1 describes a timeline with a single timeline, lots of different sections, each of which has IDs attached to it, and then you can refer from other sections to those IDs.
- The IDs are arbitrary assigned by a program, not by a user.
- There are also optional things like accuracy, id and stamping.
- SPEAKER1 would use an existing way of doing an alignment for phone level.
- SPEAKER2 would use a binary representation.
- SPEAKER3 would use some file that configures how much information he wants in his XML or something.
- SPEAKER1 doesn't want to do it.
- SPEAKER1 would use pfile for word level data.
- SPEAKER2 would use ICSI fo

# Deploy as an API using FastAPI + ngrok

In [31]:
!pip install fastapi uvicorn pyngrok

Collecting fastapi
  Downloading fastapi-0.115.14-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.3-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.14-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.5/95.5 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.3-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Downloading starlette-0.46.2-py3-none-any.whl (72 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, pyngrok,

In [32]:
from fastapi import FastAPI,File, UploadFile, Form
import uvicorn
from pyngrok import ngrok
import nest_asyncio
from pydantic import BaseModel

In [33]:
nest_asyncio.apply()

In [34]:
!ngrok authtoken 2v6HSyefAGrtn5M23OyJHPLbwZV_3bcw9gHUCCbfA55k4ZuNT

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml                                


In [35]:
app = FastAPI(title="Transcript Processing API")

In [36]:
# Define a request model to accept the transcript text
class TranscriptRequest(BaseModel):
    transcript: str

# Define a response model for both summary and meeting minutes
class SummaryMinutesResponse(BaseModel):
    summary: str
    minutes: str

@app.post("/summaryMinutes", response_model=SummaryMinutesResponse)
def process_transcript(request: TranscriptRequest):
    print(f"Transcript Content: {transcript[:10]}")
    
    transcript_lines = request.transcript.splitlines()
    
    parsed_transcript = parse_transcript_by_speaker(transcript_lines)
    
    preprocessed = preprocess_transcript(parsed_transcript)
    
    segmented_transcript, attendees = segment_transcript(512, preprocessed, tokenizer)
    
    summary_text = generate_summary(segmented_transcript, summarizer)
    
    minutes_text = format_minutes(attendees, create_minutes(summary_text))

    response = SummaryMinutesResponse(summary=summary_text, minutes=minutes_text)

    print("=== API Output ===")
    print(f"Summary:\n{response.summary}\n")
    print(f"Minutes:\n{response.minutes}\n")
    
    # Return the summary and minutes as a JSON response
    return response

In [None]:
public_url = ngrok.connect(8000).public_url
print(f"Public API URL: {public_url}")
uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [31]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Public API URL: https://52e0-34-83-211-31.ngrok-free.app
Transcript Content: ["\ufeffSpeaker 1: Yeah, we had a long discussion about how easy we want to make it for people to bleep things out.  Morgan wants to make it hard.  I didn't even check yesterday.  ", '', "Speaker 0: It didn't move yesterday either when I started it.  I don't know if it doesn't like both of them.  ", '', 'Speaker 1: Channel 3?  ', '', "Speaker 0: You know, I discovered something yesterday on these wireless ones.  You can tell if it's picking up breath noise and stuff.  ", '', 'Speaker 1: Yeah, it has a little indicator on it, on the AF.  ', '']


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


=== API Output ===
Summary:
Morgan wants to make it easy for people to bleep things out. Speaker 1 has developed an XML format for this sort of thing. Speaker 0 discovered that wireless speakers can tell if they are picking up breath noise and stuff. Speaker 2 is going to draw on the board. Speaker 1 describes a timeline with a single timeline, lots of different sections, each of which has IDs attached to it, and then you can refer from other sections to those IDs. The IDs are arbitrary assigned by a program, not by a user. There are also optional things like accuracy, id and stamping. Speaker 1 would use an existing way of doing an alignment for phone level. Speaker 2 would use a binary representation. Speaker 3 would use some file that configures how much information he wants in his XML or something. Speaker 1 doesn't want to do it. Speaker 1 would use pfile for word level data. Speaker 2 would use ICSI format for frame level representation of features. Speaker 3 would use text forma