<h1 align="center"> NLP Assignment Speech-to-Text</h1>

#

In [7]:
from transformers import pipeline
import re
import json
import speech_recognition as sr

class IntentRecognizer:
    def __init__(self):
        # Define intent patterns and their corresponding labels
        self.intent_patterns = {
            "intro": ["my name is", "this is", "from", "hello hi"],
            "purpose": ["calling about", "call regarding"]
        }

    def predict(self, transcription):
        # Clean the transcription and tokenize it
        cleaned_transcription = preprocess(transcription)

        # Match the tokens against intent patterns
        matched_intents = []
        for intent, patterns in self.intent_patterns.items():
            for pattern in patterns:
                if re.search(pattern, cleaned_transcription):
                    matched_intents.append(intent)

        # Determine the most frequent intent
        if matched_intents:
            intent_counts = {intent: matched_intents.count(intent) for intent in set(matched_intents)}
            predicted_intent = max(intent_counts, key=intent_counts.get)
        else:
            predicted_intent = "general"

        # Extract entities from the transcription
        entities = self.extract_entities(predicted_intent, cleaned_transcription)

        return predicted_intent, entities

    def extract_entities(self, intent, transcription):
        entities = {}
        if intent == "intro":
            caller_name = re.search(r"this is (\w+)", transcription)
            customer_name = re.search(r"hello hi (\w+)", transcription)
            company = re.search(r"from \b([A-Za-z&]+(?:\s+[A-Za-z&]+)*)\b", transcription)
            if caller_name:
                entities["Caller Name"] = caller_name.group(1)
            if customer_name:
                entities["Customer Name"] = customer_name.group(1)
            if company:
                entities["Company"] = company.group(1)
        elif intent == "purpose":
            product = re.search(r"how would you like a (\w+)", transcription)
            if product:
                entities["Product"] = product.group(1)

        return entities

def preprocess(transcription):
    return transcription.lower()



# Generate a summary of the call
def generate_summary(transcription, entities, intent):
    # Create a list to store the sentence-level information
    sentence_info = []

    # Extract relevant information from the transcription, entities, and intent
    sentences = transcription.split(".")
    for sentence in sentences:
        cleaned_sentence = sentence.strip()
        if cleaned_sentence:
            sentence_entities = []
            for entity_name, entity_value in entities.items():
                sentence_entities.append({
                    "entity_name": entity_name,
                    "entity_value": entity_value
                })
            sentence_info.append({
                "sentence": cleaned_sentence,
                "intent": intent,
                "entities": sentence_entities
            })

    # Create the output dictionary
    output = {
        "task_1_output": transcription,
        "task_3_output": sentence_info
    }

    # Convert the output to JSON format
    json_output = json.dumps(output, indent=4)

    # Write the JSON output to a file
    with open("/content/drive/MyDrive/Internship_Assignment_DimensionLessTech/output.json", "w") as file:
        file.write(json_output)

    return json_output



def convert_audio_to_text(audio_file):
    r = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_file) as source:
        audio = r.record(source)

    # Perform speech recognition
    try:
        text = r.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        print("Unable to recognize speech")
    except sr.RequestError as e:
        print("Speech recognition request error: {0}".format(e))

    return None


audio_file_path = "/content/drive/MyDrive/Internship_Assignment_DimensionLessTech/sales_call_telephone_marketers.wav"

# Convert audio to text (ASR step)
transcription = convert_audio_to_text(audio_file_path)

# Initialize the NLU pipeline
nlp_pipeline = pipeline("ner", model="dslim/bert-base-NER", device=0)
intent_recognizer = IntentRecognizer()
intent, entities = intent_recognizer.predict(transcription)

# Generate a summary of the call
summary = generate_summary(transcription, entities, intent)

print()
print("Summary:")
print()
print(summary)


Summary:

{
    "task_1_output": "hello hi Nancy this is Mike from AT&T Incorporation",
    "task_3_output": [
        {
            "sentence": "hello hi Nancy this is Mike from AT&T Incorporation",
            "intent": "intro",
            "entities": [
                {
                    "entity_name": "Caller Name",
                    "entity_value": "mike"
                },
                {
                    "entity_name": "Customer Name",
                    "entity_value": "nancy"
                },
                {
                    "entity_name": "Company",
                    "entity_value": "at&t incorporation"
                }
            ]
        }
    ]
}
