In [1]:
import pandas as pd
import numpy as np
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from tabulate import tabulate

# Load NLP model
try:
    nlp = spacy.load("en_core_web_sm")
except:
    !python -m spacy download en_core_web_sm
    nlp = spacy.load("en_core_web_sm")

class EnterpriseNLP_Engine:
    def __init__(self):
        # Deterministic rule signals
        self.decision_signals = ['decided', 'agreed', 'approved', 'finalized', 'confirmed']
        self.blocker_signals = ['not decided', 'unclear', 'pending', 'unresolved', 'revisit', 'needs confirmation']
        self.action_signals = ['will', 'shall', 'responsible', 'assigned to', 'needs to', 'coordinate', 'complete']
        self.question_signals = ['?', 'should we', 'do we', 'unclear', 'open question']
        self.domain_keywords = ['deployment', 'deadline', 'budget', 'qa', 'security', 'risk', 'monitoring', 'ui', 'launch', 'pipeline']

    def extract_owner(self, doc, prev_entities):
        """Heuristic NER and pronoun resolution."""
        current_names = [ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG"]]
        if current_names:
            return current_names[0]

        text = doc.text.lower()
        if any(p in text for p in ["he", "she", "they"]) and prev_entities:
            return prev_entities[-1]
        return "Not Mentioned"

    def analyze(self, transcript):
        doc = nlp(transcript)
        sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
        if not sentences:
            return None

        # TF-IDF and similarity scoring
        vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(sentences)
        t_scores = np.asarray(tfidf_matrix.sum(axis=1)).flatten()
        t_norm = (t_scores / (t_scores.max() if t_scores.max() > 0 else 1)) * 100
        centroid = np.asarray(tfidf_matrix.mean(axis=0))
        s_scores = cosine_similarity(tfidf_matrix, centroid).flatten() * 100

        analysis_data = []
        entity_history = []

        for i, sent_text in enumerate(sentences):
            low_sent = sent_text.lower()
            sent_doc = nlp(sent_text)

            # Extract entities
            owner = self.extract_owner(sent_doc, entity_history)
            if owner != "Not Mentioned": entity_history.append(owner)
            deadline = [ent.text for ent in sent_doc.ents if ent.label_ == "DATE"]

            # Rule-based classification
            has_blocker = any(sig in low_sent for sig in self.blocker_signals)
            has_decision = any(sig in low_sent for sig in self.decision_signals)
            has_action = any(sig in low_sent for sig in self.action_signals)
            is_question = any(sig in low_sent for sig in self.question_signals) or "?" in sent_text

            # Avoid false ‚Äúnegative decisions‚Äù: ignore sentences with blockers when classifying decisions
            if has_decision and not has_blocker:
                category = "Key Decision"
                k = 100
            elif has_action and not is_question and not has_blocker:
                category = "Action Item"
                k = 80
            elif is_question or has_blocker:
                category = "Open Question"
                k = 40
            else:
                category = "Contextual Information"
                k = 0

            # Final scoring formula
            p = (i / len(sentences)) * 100
            a = 100 if category == "Action Item" else 0
            final_score = (0.4 * k) + (0.25 * t_norm[i]) + (0.2 * s_scores[i]) + (0.1 * p) + (0.05 * a)

            analysis_data.append({
                "SID": f"S-{i:03d}",
                "Text": sent_text,
                "Score": round(final_score, 2),
                "Category": category,
                "Owner": owner,
                "Deadline": deadline[0] if deadline else "None",
                "Keywords": [word for word in self.domain_keywords if word in low_sent]
            })

        return pd.DataFrame(analysis_data)

    def print_report(self, df):
        print("\nüìå KEY DECISIONS")
        for _, r in df[df['Category'] == "Key Decision"].iterrows():
            print(f"‚Ä¢ {r['Text']} [{r['SID']}]\n  Reason: Verified agreement signal without blocking modifiers.")

        print("\nüìå ACTION ITEMS")
        for _, r in df[df['Category'] == "Action Item"].iterrows():
            print(f"‚Ä¢ Task: {r['Text']}\n  Owner: {r['Owner']} | Deadline: {r['Deadline']} | ID: {r['SID']}")

        print("\nüìå OPEN QUESTIONS")
        for _, r in df[df['Category'] == "Open Question"].iterrows():
            reason = "Pending confirmation/Blocker detected" if "not" in r['Text'].lower() else "Explicit inquiry"
            print(f"‚Ä¢ {r['Text']} [{r['SID']}]\n  Status: {reason}")

        print("\nüìå TOPIC CLUSTERS")
        for kw in self.domain_keywords:
            ids = df[df['Keywords'].apply(lambda x: kw in x)]['SID'].tolist()
            if ids: print(f"Cluster: {kw.upper()} | IDs: {ids}")

        print("\nüìå SENTENCE IMPORTANCE TABLE")
        print(tabulate(df[['SID', 'Score', 'Category', 'Text']].sort_values('Score', ascending=False).head(10),
                       headers='keys', tablefmt='grid'))

        print("\nüìå EXTRACTION LOGIC SUMMARY")
        print("- Hybrid Rule + ML engine (Regex + TF-IDF + Cosine Similarity)")
        print("- No abstract summarization")
        print("- 1:1 traceability with Sentence IDs")
        print("- Hallucination-resistant owner & deadline extraction")

    def generate_full_script(self, df):
        """Generate a clean, fully prepared meeting script with no duplicates."""
        topic_dict = {}
        for _, r in df.iterrows():
            for kw in r['Keywords']:
                if kw not in topic_dict:
                    topic_dict[kw] = {"Decisions": [], "Action Items": [], "Open Questions": []}
                if r['Category'] == "Key Decision" and r['Text'] not in topic_dict[kw]["Decisions"]:
                    topic_dict[kw]["Decisions"].append(f"{r['Text']} [{r['SID']}]")
                if r['Category'] == "Action Item" and r['Text'] not in topic_dict[kw]["Action Items"]:
                    topic_dict[kw]["Action Items"].append(f"{r['Text']} [Owner: {r['Owner']}, Deadline: {r['Deadline']}, ID: {r['SID']}]")
                if r['Category'] == "Open Question" and r['Text'] not in topic_dict[kw]["Open Questions"]:
                    topic_dict[kw]["Open Questions"].append(f"{r['Text']} [{r['SID']}]")

        print("\n================== FULLY PREPARED MEETING SCRIPT ==================")
        print("\nOpening Statement:\nGood afternoon everyone, let‚Äôs begin the weekly product sync meeting.\n")

        for topic, contents in topic_dict.items():
            if any(contents.values()):
                print(f"Topic: {topic.upper()}")
                if contents["Decisions"]:
                    print("  Decisions:")
                    for d in contents["Decisions"]:
                        print(f"    ‚Ä¢ {d}")
                if contents["Action Items"]:
                    print("  Action Items:")
                    for a in contents["Action Items"]:
                        print(f"    ‚Ä¢ {a}")
                if contents["Open Questions"]:
                    print("  Open Questions:")
                    for q in contents["Open Questions"]:
                        print(f"    ‚Ä¢ {q}")
                print("")

        print("Closing Statement:\nThank you all for attending the meeting.")
        print("\n" + "="*70)

# ------------------- CONTINUOUS LOOP MODE -------------------
engine = EnterpriseNLP_Engine()

while True:
    transcript = input("\nPlease provide the <RAW_MEETING_TRANSCRIPT>. Type 'exit' to stop the system:\n")
    if transcript.lower() == 'exit':
        print("System shutdown complete.")
        break
    if len(transcript.strip()) < 15:
        print("Input too short. Please provide a valid transcript.")
        continue

    df = engine.analyze(transcript)
    if df is not None and not df.empty:
        engine.print_report(df)
        engine.generate_full_script(df)



Please provide the <RAW_MEETING_TRANSCRIPT>. Type 'exit' to stop the system:
Good afternoon everyone, let‚Äôs begin the weekly product sync meeting. The main objective today is to review the current release status and finalize next steps. Project Phoenix is currently running one week behind the planned schedule. After reviewing the latest progress report, we agreed to move the deployment date to October 15. Rahul will be responsible for coordinating with the DevOps team to update the deployment pipeline. Ananya mentioned that there are still some unresolved UI issues on the checkout page. She will complete the UI fixes and share an update by this Friday. There was a discussion about whether the existing cloud budget can support increased traffic during launch. This point is still unclear and needs confirmation from the finance team. The security team confirmed that all critical vulnerabilities identified last sprint have been resolved. However, it has not been decided who will monitor

In [None]:
""" Good afternoon everyone, let‚Äôs begin the weekly product sync meeting.
The main objective today is to review the current release status and finalize next steps.
Project Phoenix is currently running one week behind the planned schedule.
After reviewing the latest progress report, we agreed to move the deployment date to October 15.
Rahul will be responsible for coordinating with the DevOps team to update the deployment pipeline.
Ananya mentioned that there are still some unresolved UI issues on the checkout page.
She will complete the UI fixes and share an update by this Friday.
There was a discussion about whether the existing cloud budget can support increased traffic during launch.
This point is still unclear and needs confirmation from the finance team.
The security team confirmed that all critical vulnerabilities identified last sprint have been resolved.
However, it has not been decided who will monitor system metrics during the first 24 hours post-deployment.
Should we assign on-call responsibility to the DevOps team for the launch window?
Let‚Äôs revisit the monitoring decision once we receive the budget confirmation.
If there are no further questions, we can close today‚Äôs meeting.
"""