In [10]:
import whisper
import pickle
import joblib
from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize
import pandas as pd
import plotly.graph_objects as go



In [4]:
# 1. Transcribe audio
model = whisper.load_model("base", device="cpu")
result = model.transcribe("meeting_3.mp3")
full_text = result["text"]
print("✅ Transcription done.")



✅ Transcription done.


In [5]:

# 2. Split into sentences
sentences = sent_tokenize(full_text)

# 3. Load embedding model and generate embeddings
embed_model = SentenceTransformer('all-MiniLM-L6-v2')  # or your preferred model
embeddings = embed_model.encode(sentences, batch_size=32, show_progress_bar=True)

Batches:   0%|          | 0/12 [00:00<?, ?it/s]

In [9]:
# 4. Load PCA and transform embeddings (assuming PCA is saved or create a new one)
# You should save PCA model when doing PCA on training data; here, assume you have pca saved.
# For now, let's load pca model from disk (or fit PCA if not saved):

pca = joblib.load("pca_model.pkl")

embeddings_pca = pca.transform(embeddings)  # shape: (num_sentences, 3)

# 5. Load SVM model
with open("svm_model.pkl", "rb") as f:
    svm_model = pickle.load(f)

# 6. Predict labels
predicted_labels = svm_model.predict(embeddings_pca)

# 7. Separate sentences
use_sent = [s for s, label in zip(sentences, predicted_labels) if label == 0]
nuse_sent = [s for s, label in zip(sentences, predicted_labels) if label == 1]

print(f"Useful sentences count: {len(use_sent)}")
print(f"Non-useful sentences count: {len(nuse_sent)}")

# Optional: print or save somewhere
print("\nUseful sentences:")
for s in use_sent:
    print(f"- {s}")

print("\nNon-useful sentences:")
for s in nuse_sent:
    print(f"- {s}")


Useful sentences count: 336
Non-useful sentences count: 42

Useful sentences:
-  We're ready to record.
- So commit, boost duty.
- Simon, thank you for I saw you already posted on the thing and branded too.
- And so Parker, if you could grab a slot on the scheduled air and Alita will need some slots as well so we can cover that.
- And then Core Max and I recuse the three of us from commit, boost duty, given the other scope of stage architect stuff.
- Now I'm going to try to do last year.
- I did the entire 24 hour live chat when that was a mistake.
- But I'm going to try to be in there for as many chats as possible because I don't think I think there are.
- I know of some cases where the speakers won't be able to make it.
- Or be sure there's someone in there to answer the chat questions.
- Yeah, I'm doing the same thing.
- At you know, this is this is one of those things where like we we knew this.
- This is but we didn't account for it.
- So cover, you know, cover commit chat for A-p

Visualizing

This is good one !

In [None]:
# df_new = pd.DataFrame({
#     'sentence': sentences,
#     'pca_1': embeddings_pca[:, 0],
#     'pca_2': embeddings_pca[:, 1],
#     'pca_3': embeddings_pca[:, 2],
# })

# # Load trained SVM model
# svm_model = joblib.load("svm_model.pkl")

# # Predict labels
# X_new = df_new[['pca_1', 'pca_2', 'pca_3']].values
# df_new['predicted_label'] = svm_model.predict(X_new)

# # Plot points colored by predicted label
# fig = go.Figure()

# for label in df_new['predicted_label'].unique():
#     subset = df_new[df_new['predicted_label'] == label]
#     fig.add_trace(go.Scatter3d(
#         x=subset['pca_1'], y=subset['pca_2'], z=subset['pca_3'],
#         mode='markers',
#         name=f'Label {label}',
#         marker=dict(size=5),
#     ))

# # Plot the SVM decision boundary (hyperplane)
# # Only for linear kernel SVMs

# coef = svm_model.coef_[0]
# intercept = svm_model.intercept_[0]

# # Create grid to plot the plane
# xx, yy = np.meshgrid(
#     np.linspace(df_new['pca_1'].min(), df_new['pca_1'].max(), 30),
#     np.linspace(df_new['pca_2'].min(), df_new['pca_2'].max(), 30)
# )

# # Calculate corresponding z for the plane: coef_1*x + coef_2*y + coef_3*z + intercept = 0
# # => z = -(coef_1 * x + coef_2 * y + intercept) / coef_3
# zz = -(coef[0] * xx + coef[1] * yy + intercept) / coef[2]

# fig.add_trace(go.Surface(
#     x=xx,
#     y=yy,
#     z=zz,
#     showscale=False,
#     opacity=0.5,
#     name='Decision Boundary',
#     colorscale='RdBu',
#     reversescale=True
# ))

# fig.update_layout(
#     title="3D PCA Embeddings with SVM Decision Boundary",
#     scene=dict(
#         xaxis_title='PCA 1',
#         yaxis_title='PCA 2',
#         zaxis_title='PCA 3',
#     )
# )

# fig.show(renderer="browser")


Opening in existing browser session.


LLM part

In [15]:
text = ' '.join(use_sent)
print(text)

 We're ready to record. So commit, boost duty. Simon, thank you for I saw you already posted on the thing and branded too. And so Parker, if you could grab a slot on the scheduled air and Alita will need some slots as well so we can cover that. And then Core Max and I recuse the three of us from commit, boost duty, given the other scope of stage architect stuff. Now I'm going to try to do last year. I did the entire 24 hour live chat when that was a mistake. But I'm going to try to be in there for as many chats as possible because I don't think I think there are. I know of some cases where the speakers won't be able to make it. Or be sure there's someone in there to answer the chat questions. Yeah, I'm doing the same thing. At you know, this is this is one of those things where like we we knew this. This is but we didn't account for it. So cover, you know, cover commit chat for A-pack hours. That is an agenda item we should add to our architect sync. And we're like right up on the heel

In [16]:
def split_text_into_chunks(text, chunk_size=1000, overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start += chunk_size - overlap  # Move forward but keep overlap
    return chunks


In [17]:
chunks = split_text_into_chunks(text, chunk_size=1000, overlap=200)

# Optional: print a few chunks
for i, chunk in enumerate(chunks):
    print(f"--- Chunk {i+1} ---")
    print(chunk)
    print()


--- Chunk 1 ---
 We're ready to record. So commit, boost duty. Simon, thank you for I saw you already posted on the thing and branded too. And so Parker, if you could grab a slot on the scheduled air and Alita will need some slots as well so we can cover that. And then Core Max and I recuse the three of us from commit, boost duty, given the other scope of stage architect stuff. Now I'm going to try to do last year. I did the entire 24 hour live chat when that was a mistake. But I'm going to try to be in there for as many chats as possible because I don't think I think there are. I know of some cases where the speakers won't be able to make it. Or be sure there's someone in there to answer the chat questions. Yeah, I'm doing the same thing. At you know, this is this is one of those things where like we we knew this. This is but we didn't account for it. So cover, you know, cover commit chat for A-pack hours. That is an agenda item we should add to our architect sync. And we're like righ

In [18]:
import requests

def summarize_chunk(chunk, model="llama3.2:3b"):
    prompt = f"Summarize the following meeting transcript:\n\n{chunk}"
    
    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": model,
                "prompt": prompt,
                "stream": False
            }
        )
        response.raise_for_status()
        return response.json().get("response", "⚠️ No response received.")
    
    except Exception as e:
        return f"❌ Error: {str(e)}"


In [19]:
summaries = []

for idx, chunk in enumerate(chunks):
    print(f"--- Summarizing Chunk {idx + 1} ---")
    summary = summarize_chunk(chunk)
    print(summary)
    print()
    summaries.append(summary)


--- Summarizing Chunk 1 ---
The transcript appears to be a meeting discussion about recording duties and coverage for a live event. Here's a summary:

* The speakers have committed to participating in a live chat session.
* Simon has already made some postings, and Parker is asked to grab a scheduled airtime slot.
* Alita will also need slots covered.
* Core Max and the speaker are excusing themselves from commit/boost duty for their stage architect work.
* The speaker volunteers to cover the last year's 24-hour live chat experience, which they acknowledge was a mistake but will try again with more coverage.
* It is noted that there may be some absent speakers and the need for coverage in those cases.
* Coverage of commit/chat during A-pack hours (an agenda item) needs to be added to their architect sync.

--- Summarizing Chunk 2 ---
The meeting transcript appears to be a discussion about the architect sync and covering A-pack hours for commit chat. The speaker mentions that they didn'

In [20]:
import json

# Create a list of chunk summary entries
data = []

for i, summary in enumerate(summaries, start=1):
    data.append({
        "chunk": f"Chunk {i}",
        "summary": summary
    })

# Save to JSON file
with open("chunk_summaries.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

print("✅ Summaries saved to 'chunk_summaries.json'")


✅ Summaries saved to 'chunk_summaries.json'


Action Items

In [21]:
def extract_action_items(summaries):
    context = "\n\n".join([f"{s['chunk']}: {s['summary']}" for s in summaries])
    
    prompt = (
        "From the following meeting summaries, extract a list of clear, actionable tasks "
        "or to-do items discussed. Include who is responsible if mentioned.\n\n"
        f"{context}\n\n"
        "List the tasks in bullet points or numbered list."
    )

    return summarize_chunk(prompt)

In [22]:
import json

# Load summaries
with open("chunk_summaries.json", "r", encoding="utf-8") as f:
    summaries = json.load(f)

# Extract action items
action_items = extract_action_items(summaries)

print("📌 Action Items / TODOs:")
print(action_items)


📌 Action Items / TODOs:
Here is a list of tasks mentioned in the transcript:

1. **Track individual page performance**: Measure user engagement and success metrics for specific web pages.
2. **Measure contribution to downstream processes**: Track the impact of web pages on business outcomes such as inquiries, sign-ups, etc.
3. **Implement demand generation metrics**: Develop metrics to measure the effectiveness of demand generation efforts.
4. **Prioritize PMM investments**: Balance investing in projects with maintaining business-as-usual operations.
5. **Track multiple views or pages**: Use website analytics to track user behavior and engagement across multiple pages.
6. **Instrument PMM practices**: Set up measurement tools for Product Marketing (PMM) efforts.
7. **Create a sales QBR measurement framework**: Develop a framework to measure the effectiveness of Sales Quality Business Requirements (QBRs).
8. **Implement an asynchronous review process**: Consider implementing an async re

In [None]:
with open("action_items.txt", "w", encoding="utf-8") as f:
    f.write(action_items)