<a href="https://colab.research.google.com/github/JashVaghasiya/EasyMinutes-MOM/blob/main/Action_words_Rephraser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install transformers nltk torach numpy pandas

In [None]:
pip install spacy download en_core_web_sm

In [None]:
import gc
from google.colab import drive

drive.mount('/content/drive')
gc.collect()

In [None]:
import spacy
import torch
from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
import nltk

# Make sure to download necessary NLTK data and spaCy model
nltk.download('punkt', quiet=True)
# !python -m spacy download en_core_web_sm

# Load spaCy for NLP tasks
nlp = spacy.load("en_core_web_sm")

# Load BERT and T5 models and tokenizers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Assuming 'folder_path' contains the BERT model and tokenizer
folder_path = '/content/drive/MyDrive/my_model'  # Update this path
bert_tokenizer = BertTokenizer.from_pretrained(folder_path)
bert_model = BertForSequenceClassification.from_pretrained(folder_path).to(device)

t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)

def split_into_sentences(transcript):
    """Split transcript into sentences using NLTK."""
    return nltk.tokenize.sent_tokenize(transcript)

def rephrase_with_model(sentence, model, tokenizer, device):
    """Rephrase a sentence using the T5 model."""
    input_ids = tokenizer.encode(sentence, return_tensors="pt").to(device)
    outputs = model.generate(input_ids, max_length=100, num_beams=5, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def extract_and_rephrase_action_phrases(sentence, nlp, t5_model, t5_tokenizer, device):
    """Extract action phrases from a sentence and rephrase them for clarity."""
    doc = nlp(sentence)
    action_phrase = ""

    for token in doc:
        # Check for the verb that indicates an action
        if token.pos_ == "VERB":
            # Construct a basic phrase: Verb + Direct Object
            direct_objects = [child for child in token.children if child.dep_ == "dobj"]
            if direct_objects:
                action_phrase = token.text + " " + " ".join([dobj.text for dobj in direct_objects])
            else:
                # If no direct object, use the verb itself
                action_phrase = token.text
            break  # Focus on the first action verb found for simplicity

    # Rephrase the constructed action phrase for better clarity, if any phrase was constructed
    if action_phrase:
        # Correcting the model usage with proper input formatting
        input_ids = t5_tokenizer.encode(action_phrase, return_tensors="pt").to(device)
        outputs = t5_model.generate(input_ids, max_length=50, num_beams=5, early_stopping=True)
        rephrased_phrase = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return rephrased_phrase
    else:
        return ""  # Return an empty string if no action verb was identified or no phrase could be constructed

def process_transcript_and_extract_refined_phrases(transcript, bert_model, bert_tokenizer, t5_model, t5_tokenizer, nlp, device):
    sentences = split_into_sentences(transcript)
    refined_phrases = []

    for sentence in sentences:
        inputs = bert_tokenizer.encode_plus(sentence, return_tensors="pt").to(device)
        outputs = bert_model(**inputs)  # Corrected line: inputs are unpacked as a dictionary
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(probabilities, dim=-1).item()

        if predicted_class == 1:  # Action sentence
            refined_phrase = extract_and_rephrase_action_phrases(sentence, nlp, t5_model, t5_tokenizer, device)
            if refined_phrase:
                refined_phrases.append(refined_phrase)

    return refined_phrases

# Example usage
transcript = "Jane: Good morning, everyone. Let’s start with the current status of our software and hardware updates for our next-gen universal remote. Alex, can you kick us off with the software update? Alex: Sure, Jane. The team has successfully integrated the latest voice recognition software. However, we're encountering latency issues when the remote processes multiple commands in quick succession. We’re considering optimizing the command queue management to resolve this.  Sam: On the hardware side, we’ve upgraded the microcontroller to support the new software features Alex mentioned. We're also experimenting with a new battery design to extend the remote’s life. The prototype is promising, but we need to ensure it doesn't significantly increase the production cost. Mia: Design-wise, we’ve incorporated feedback from the last user testing session. The new button layout and the tactile feedback have been well-received in preliminary tests. However, aligning the new design with Sam’s hardware changes is our next challenge. Eric: From a QA perspective, we need to schedule a comprehensive testing phase for both the software updates and the new hardware components. It’s crucial to test the voice recognition in various environments to ensure it performs consistently. Jane: Great updates, team. Addressing the latency in voice command processing is our top priority. Alex, let’s brainstorm with your team on the command queue management. Sam, keep us posted on the battery design cost analysis. Mia, work with Sam to ensure the design and hardware are aligned. Eric, start planning the testing phase, focusing on voice recognition performance. Alex: Will do. I’ll also look into leveraging cloud processing to reduce the load on the remote’s processor, which might help with the latency. Sam: Understood. I’ll coordinate with the suppliers to get an estimate on the new battery costs and report back by next week. Mia: I’ll schedule a meeting with Sam tomorrow to discuss the design and hardware integration. Eric: I’m on it. I’ll prepare a detailed testing plan and reach out to the team for input. Jane: Excellent. Let’s reconvene next week to review progress on these action items. Thank you, everyone, for your hard work. Meeting adjourned."
refined_action_phrases = process_transcript_and_extract_refined_phrases(
    transcript, bert_model, bert_tokenizer, t5_model, t5_tokenizer, nlp, device)
print("Refined Action Phrases:", refined_action_phrases)


In [8]:
def remove_repetitive_words(phrase):
    words = phrase.split()
    # Remove consecutive duplicate words
    filtered_words = [word for i, word in enumerate(words) if i == 0 or word != words[i-1]]
    return ' '.join(filtered_words)

def refine_action_phrases(phrases):
    # First, remove repetitive words within each phrase
    refined_phrases = [remove_repetitive_words(phrase) for phrase in phrases]

    # Then, filter out phrases that are too generic or do not imply a clear action
    # Note: This step might require manual adjustment based on your criteria for "actionable" phrases
    actionable_phrases = [phrase for phrase in refined_phrases if len(phrase.split()) > 1 and not phrase.lower() in ['work', 'start', 'look at']]

    # Remove duplicates while preserving order
    seen = set()
    deduplicated_phrases = [x for x in actionable_phrases if not (x in seen or seen.add(x))]
    return deduplicated_phrases

refined_action_phrases = refine_action_phrases(refined_action_phrases)
print(refined_action_phrases)

['kick us', 'integrated software', 'Microcontroller upgraded microcontroller', 'incorporated feedback', 'aligning design', 'Be besoin', 'addressing latency', 'keep us on us.', 'schedule meeting', 'préparer plan.', 'Vielen Dank für']


In [9]:
def to_present_continuous(phrase):
    doc = nlp(phrase)
    new_phrase = []

    for token in doc:
        # Check if the token is a verb
        if token.pos_ == "VERB":
            # Convert verb to -ing form
            ing_verb = token.lemma_ + "ing"
            new_phrase.append(ing_verb)
        else:
            new_phrase.append(token.text)

    return ' '.join(new_phrase)

present_continuous_phrases = [to_present_continuous(phrase) for phrase in refined_action_phrases]
print(present_continuous_phrases)

['kicking us', 'integrateing software', 'Microcontroller upgradeing microcontroller', 'incorporateing feedback', 'aligning design', 'Be besoin', 'addressing latency', 'keeping us on us .', 'schedule meeting', 'préparer plan .', 'Vielen Dank für']
