In [2]:
import torch
from transformers import pipeline

# 1. Summarization pipeline (multilingual models like 'facebook/mbart-large-50-many-to-many-mmt' for real use)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# 2. Action Item Extraction (using zero-shot classification as a proxy)
action_item_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def summarize_transcript(transcript):
    summary = summarizer(transcript, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

def extract_action_items(transcript):
    # Split transcript into sentences (simple split; for production use NLP sentence segmentation)
    sentences = transcript.split('.')
    action_labels = ["action item", "decision", "follow up", "task", "to do"]
    action_items = []
    for sentence in sentences:
        if sentence.strip():
            result = action_item_classifier(sentence, action_labels)
            # If 'action item' is the top label and score > 0.5, treat as an action item
            if result['labels'][0] == "action item" and result['scores'][0] > 0.5:
                action_items.append(sentence.strip())
    return action_items

# --- Example Usage ---
if __name__ == "__main__":
    transcript = """
The meeting commenced with a review of last quarter’s sales figures, highlighting a 10% growth despite seasonal fluctuations. John presented the updated financial spreadsheet, noting that marketing expenses increased by 15%, primarily due to the new campaign for the product launch. Sarah asked about the ROI of these campaigns, and John agreed to provide a detailed report at the next meeting.

The team then discussed customer feedback from the recent survey. Most respondents appreciated the improvements in the mobile app, but several raised concerns about data privacy and login issues. Priya suggested implementing a two-factor authentication feature, and the engineering team agreed to evaluate its feasibility.

The product roadmap for Q4 was next on the agenda. Alice proposed prioritizing the mobile app update, focusing on new features such as offline access and enhanced notifications. Bob recommended a phased rollout to minimize risks, and everyone concurred. Tim volunteered to draft a timeline for the update and share it with the group by Friday.

Marketing strategies for the upcoming quarter were discussed. Carol presented options for expanding social media outreach, including targeted ads and influencer partnerships. Bob cautioned that the team should monitor engagement metrics closely. Priya will coordinate with Carol to develop sample ad copy by next week.

The group considered international expansion opportunities, especially in the Latin American market. Sarah explained the regulatory requirements for localization and translation. Alice agreed to start researching local vendors for translation services, and John will oversee the legal compliance process.

The team also reviewed project management practices. Tim suggested adopting a new task-tracking tool to improve transparency and accountability. Bob will evaluate available platforms and present his findings at the next meeting.

Carol announced a client meeting scheduled for next Wednesday, focusing on the new app features and customer feedback. Priya will prepare a presentation, and Bob will provide the latest user engagement statistics.

The meeting ended with a review of action items and deadlines. Alice reminded everyone to update their tasks in the shared project tracker, and Sarah confirmed the next meeting will be held on Thursday at 2 PM.
"""
    
    print("Meeting Summary:")
    print(summarize_transcript(transcript))
    
    print("\nAction Items:")
    for item in extract_action_items(transcript):
        print("- " + item)
        

ModuleNotFoundError: No module named 'torch'

In [5]:
import whisper
from transcriptobot_ml import summarize_transcript, extract_action_items

# Step 1: Transcribe audio
model = whisper.load_model("tiny")
result = model.transcribe("meeting_audio.mp3")
transcript = result['text']

# Step 2: Process transcript
print("Meeting Summary:")
print(summarize_transcript(transcript))

print("\nAction Items:")
for item in extract_action_items(transcript):
    print("-", item)

Meeting Summary:
Team training session is set for next Wednesday at 2 p.m. The marketing materials are almost ready. Bob, can you draft the press release and share it with the team?

Action Items:
- Bob, can you draft the press release and share it with the team? I'll have it ready by Monday
- I'll set up a meeting with them this afternoon
- I'll update the project timeline, according


In [11]:
from huggingface_hub import login

login("Dhanalakshmi")


HTTPError: Invalid user token.

In [14]:
import torch
from transformers import pipeline, MBartForConditionalGeneration, MBart50TokenizerFast

# 1. Summarization pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# 2. Zero-shot classification pipeline for action item detection
action_item_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-small-mnli-fever-anli")

# 3. Translation model and tokenizer
translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
translation_tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")

# Helper function to translate summary
def translate_text(text, target_lang_code):
    translation_tokenizer.src_lang = "en_XX"
    encoded = translation_tokenizer(text, return_tensors="pt")
    generated_tokens = translation_model.generate(**encoded, forced_bos_token_id=translation_tokenizer.lang_code_to_id[target_lang_code])
    return translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

# Summarization
def summarize_transcript(transcript):
    summary = summarizer(transcript, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Action Item Extraction
def extract_action_items(transcript):
    sentences = transcript.split('.')
    action_labels = ["action item", "decision", "follow up", "task", "to do"]
    action_items = []
    for sentence in sentences:
        if sentence.strip():
            result = action_item_classifier(sentence, action_labels)
            if result['labels'][0] == "action item" and result['scores'][0] > 0.5:
                action_items.append(sentence.strip())
    return action_items

# --- Example Usage ---
if __name__ == "__main__":
    transcript = """
The meeting commenced with a review of last quarter’s sales figures, highlighting a 10% growth despite seasonal fluctuations. John presented the updated financial spreadsheet, noting that marketing expenses increased by 15%, primarily due to the new campaign for the product launch. Sarah asked about the ROI of these campaigns, and John agreed to provide a detailed report at the next meeting.

The team then discussed customer feedback from the recent survey. Most respondents appreciated the improvements in the mobile app, but several raised concerns about data privacy and login issues. Priya suggested implementing a two-factor authentication feature, and the engineering team agreed to evaluate its feasibility.

The product roadmap for Q4 was next on the agenda. Alice proposed prioritizing the mobile app update, focusing on new features such as offline access and enhanced notifications. Bob recommended a phased rollout to minimize risks, and everyone concurred. Tim volunteered to draft a timeline for the update and share it with the group by Friday.

Marketing strategies for the upcoming quarter were discussed. Carol presented options for expanding social media outreach, including targeted ads and influencer partnerships. Bob cautioned that the team should monitor engagement metrics closely. Priya will coordinate with Carol to develop sample ad copy by next week.

The group considered international expansion opportunities, especially in the Latin American market. Sarah explained the regulatory requirements for localization and translation. Alice agreed to start researching local vendors for translation services, and John will oversee the legal compliance process.

The team also reviewed project management practices. Tim suggested adopting a new task-tracking tool to improve transparency and accountability. Bob will evaluate available platforms and present his findings at the next meeting.

Carol announced a client meeting scheduled for next Wednesday, focusing on the new app features and customer feedback. Priya will prepare a presentation, and Bob will provide the latest user engagement statistics.

The meeting ended with a review of action items and deadlines. Alice reminded everyone to update their tasks in the shared project tracker, and Sarah confirmed the next meeting will be held on Thursday at 2 PM.
"""

    # Summarize
    english_summary = summarize_transcript(transcript)
    print("📝 Meeting Summary (English):")
    print(english_summary)

    # Translations
    print("\n🌐 Translations:")
    print("🔸 Tamil:    ", translate_text(english_summary, "ta_IN"))
    print("🔸 Hindi:    ", translate_text(english_summary, "hi_IN"))
    print("🔸 French:   ", translate_text(english_summary, "fr_XX"))
    print("🔸 Spanish:  ", translate_text(english_summary, "es_XX"))

    # Action Items
    print("\n✅ Action Items:")
    for item in extract_action_items(transcript):
        print("- " + item)


Device set to use cpu


OSError: MoritzLaurer/DeBERTa-v3-small-mnli-fever-anli is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=<your_token>`

In [23]:
import torch
from transformers import pipeline

# 1. Summarization pipeline (smaller model)
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")  # lighter than 12-6

# 2. Zero-shot classification for action item detection (small model)
action_item_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-small-mnli-fever-anli")

# 3. Translation pipelines (lightweight, language-specific)
translator_ta = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ta")
translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
translator_fr = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
translator_es = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")

# Summarize transcript
def summarize_transcript(transcript):
    summary = summarizer(transcript, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Action item extraction
def extract_action_items(transcript):
    sentences = transcript.split('.')
    action_labels = ["action item", "decision", "follow up", "task", "to do"]
    action_items = []
    for sentence in sentences:
        if sentence.strip():
            result = action_item_classifier(sentence, action_labels)
            if result['labels'][0] == "action item" and result['scores'][0] > 0.5:
                action_items.append(sentence.strip())
    return action_items

# --- Example Usage ---
if __name__ == "__main__":
    transcript = """
The meeting commenced with a review of last quarter’s sales figures, highlighting a 10% growth despite seasonal fluctuations. John presented the updated financial spreadsheet, noting that marketing expenses increased by 15%, primarily due to the new campaign for the product launch. Sarah asked about the ROI of these campaigns, and John agreed to provide a detailed report at the next meeting.

The team then discussed customer feedback from the recent survey. Most respondents appreciated the improvements in the mobile app, but several raised concerns about data privacy and login issues. Priya suggested implementing a two-factor authentication feature, and the engineering team agreed to evaluate its feasibility.

The product roadmap for Q4 was next on the agenda. Alice proposed prioritizing the mobile app update, focusing on new features such as offline access and enhanced notifications. Bob recommended a phased rollout to minimize risks, and everyone concurred. Tim volunteered to draft a timeline for the update and share it with the group by Friday.

Marketing strategies for the upcoming quarter were discussed. Carol presented options for expanding social media outreach, including targeted ads and influencer partnerships. Bob cautioned that the team should monitor engagement metrics closely. Priya will coordinate with Carol to develop sample ad copy by next week.

The group considered international expansion opportunities, especially in the Latin American market. Sarah explained the regulatory requirements for localization and translation. Alice agreed to start researching local vendors for translation services, and John will oversee the legal compliance process.

The team also reviewed project management practices. Tim suggested adopting a new task-tracking tool to improve transparency and accountability. Bob will evaluate available platforms and present his findings at the next meeting.

Carol announced a client meeting scheduled for next Wednesday, focusing on the new app features and customer feedback. Priya will prepare a presentation, and Bob will provide the latest user engagement statistics.

The meeting ended with a review of action items and deadlines. Alice reminded everyone to update their tasks in the shared project tracker, and Sarah confirmed the next meeting will be held on Thursday at 2 PM.
"""

    # Summarize
    english_summary = summarize_transcript(transcript)
    print("📝 Meeting Summary (English):")
    print(english_summary)

    # Translations (smaller models)
    print("\n🌐 Translations:")
    print("🔸 Tamil:   ", translator_ta(english_summary)[0]['translation_text'])
    print("🔸 Hindi:   ", translator_hi(english_summary)[0]['translation_text'])
    print("🔸 French:  ", translator_fr(english_summary)[0]['translation_text'])
    print("🔸 Spanish: ", translator_es(english_summary)[0]['translation_text'])

    # Action Items
    print("\n✅ Action Items:")
    for item in extract_action_items(transcript):
        print("- " + item)


Device set to use cpu


model.safetensors:  68%|######8   | 315M/460M [00:00<?, ?B/s]

OSError: MoritzLaurer/DeBERTa-v3-small-mnli-fever-anli is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=<your_token>`

In [3]:
from transformers import pipeline

# Load a model (this will resume download if partial exists)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

text = "This is a test meeting transcript. It has several points."

summary = summarizer(text, max_length=50, min_length=20, do_sample=False)

print("Summary:", summary[0]['summary_text'])

Device set to use cpu
Your max_length is set to 50, but your input_length is only 14. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)


Summary: This is a test meeting transcript. It has several points. It was written by a group of students at the University of New Hampshire.
