In [None]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

model_path = "Models/ClassificationModel"

# Load the trained model
model = DistilBertForSequenceClassification.from_pretrained(model_path)

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained(model_path)

print("Model and tokenizer loaded successfully!")

In [None]:
import torch
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=64)
    with torch.no_grad():
        outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1).item()
    categories = ["Name", "Phone Number", "Amount", "Account Number"]
    return categories[prediction]

# Example Prediction
example_text = "Transfer 0 rupees to nitin"
print("Predicted Category:", predict(example_text))

In [None]:
print(model)

In [None]:
%pip install torchinfo

In [None]:
from torchinfo import summary
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification

model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

# Create dummy input
inputs = tokenizer("This is a test sentence", return_tensors="pt")

# Use torchinfo to print summary
summary(model, input_data=(inputs['input_ids'],), depth=3)

In [None]:
import re
import spacy


In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
# pip install spacy
# python -m spacy download en_core_web_sm



# Common intro patterns (regex) to capture names after phrases
INTRO_PATTERNS = [
    r"(?:this is|this's)\s+([A-Z][\w\-]+(?:\s+[A-Z][\w\-]+)*)",
    r"(?:i am|i'm|im)\s+([A-Z][\w\-]+(?:\s+[A-Z][\w\-]+)*)",
    r"(?:call me|friends call me|my name is)\s+([A-Z][\w\-]+(?:\s+[A-Z][\w\-]+)*)",
    r"introduce myself\s*,?\s*i am\s+([A-Z][\w\-]+(?:\s+[A-Z][\w\-]+)*)"
]
compiled_patterns = [re.compile(p, re.IGNORECASE) for p in INTRO_PATTERNS]

def extract_names_spacy(text):
    names = []

    # 1) Rule-based regex patterns (high precision)
    for pat in compiled_patterns:
        for m in pat.finditer(text):
            candidate = m.group(1).strip()
            # Basic cleanup: strip trailing punctuation
            candidate = re.sub(r'[\.,;:!?\)]*$', '', candidate)
            names.append(candidate)

    # 2) POS-based extraction using spaCy: consecutive PROPN tokens
    doc = nlp(text)
    prop_seq = []
    for token in doc:
        if token.pos_ == "PROPN":
            prop_seq.append(token.text)
        else:
            if prop_seq:
                # join sequence and add if plausible (length > 1 char and more than 1 token or looks like a name)
                candidate = " ".join(prop_seq)
                # avoid adding duplicates
                if candidate not in names:
                    names.append(candidate)
                prop_seq = []
    # flush
    if prop_seq:
        candidate = " ".join(prop_seq)
        if candidate not in names:
            names.append(candidate)

    # 3) Clean results: remove false positives (common words) - optional filter step
    cleaned = []
    for nm in names:
        nm_clean = nm.strip()
        # discard single-letter tokens or digits
        if len(nm_clean) < 2: continue
        if re.search(r'\d', nm_clean): continue
        cleaned.append(nm_clean)
    return cleaned


In [None]:
extract_names_spacy("My Name is Nitin Mishra")

In [None]:
def extract_with_regex(text):
    import re

    # Phone (Indian-style 10 digits, optional +91 or 0)
    phones = re.findall(r'(?:(?:\+91|0)?[\s\-]?)?[6-9]\d{9}', text)

    # Account numbers (usually 11 to 18 digits)
    accounts = re.findall(r'\b\d{11,18}\b', text)

    # Amounts (₹, Rs., or plain numbers with commas/decimals)
    amounts = re.findall(r'(?:₹|Rs\.?|INR)?[\s]?[0-9,]+(?:\.\d{1,2})?', text)

    return {
        "phones": phones[0],
        "accounts": accounts[0],
        "amounts": amounts
    }


In [None]:
extract_with_regex("account number 123456789012 Please transfer ₹5,000 to account number 123456789012 and call me at 9876543210")

In [None]:
import spacy

# Load English model
nlp = spacy.load("en_core_web_sm")

def extract_names(sentence):
    doc = nlp(sentence)
    names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
    return names

# Test sentences
sentences = [
    "My name is Akash Kumar.",
    "Barack Obama was the 44th President of the United States.",
    "Please connect me with Dr. A. P. J. Abdul Kalam."
]

for s in sentences:
    print(f"Sentence: {s}")
    print("Extracted Names:", extract_names(s))
    print("-" * 50)


In [None]:
import threading
import speech_recognition as sr
# import ipywidgets as widgets
# from IPython.display import display, clear_output

In [17]:
import threading
import speech_recognition as sr

# Global flag
listening = False
listener_thread = None

def listen_in_background():
    global listening
    r = sr.Recognizer()
    with sr.Microphone() as mic:
        while listening:
            try:
                r.adjust_for_ambient_noise(mic, duration=0.5)
                audio = r.listen(mic, timeout=2)
                text = r.recognize_google(audio)
                print("You said:", text)
            except sr.WaitTimeoutError:
                continue
            except sr.UnknownValueError:
                print("[Unrecognized Speech]")
            except sr.RequestError as e:
                print("[API Error]", e)
                break

def start_listening():
    """Start listening in background thread"""
    global listening, listener_thread
    if not listening:
        listening = True
        listener_thread = threading.Thread(target=listen_in_background)
        listener_thread.start()
        print("Listening started...")

def stop_listening():
    """Stop listening"""
    global listening
    listening = False
    print("Listening stopped.")

In [18]:
start_listening()

Listening started...


You said: hello
You said: this is nothing
You said: this is Nitin
You said: he is Sumanth
You said: my phone number is 62631 46230
[Unrecognized Speech]
[Unrecognized Speech]
You said: Tanu is in Raipur
You said: Tanu likes pop
You said: Tanu likes popcorn
[Unrecognized Speech]
[Unrecognized Speech]


In [20]:
stop_listening()


Listening stopped.


In [None]:
import threading
import speech_recognition as sr
import logging
from typing import Optional, List

class SpeechToText:
    """
    Continuous speech-to-text helper that listens in a background thread.

    Key features:
    - Continuous listening (no phrase_time_limit or timeout by default).
    - Thread-safe transcripts storage and access.
    - Optional minimal error handling; set raise_on_error=True to let exceptions bubble up.
    - Optional mic_index to select a non-default microphone.
    """

    def __init__(
        self,
        mic_index: Optional[int] = None,
        recognizer: Optional[sr.Recognizer] = None,
        raise_on_error: bool = False,
        ambient_adjust_seconds: float = 0.5,
    ):
        self.recognizer = recognizer or sr.Recognizer()
        self.mic_index = mic_index
        self.raise_on_error = raise_on_error
        self.ambient_adjust_seconds = ambient_adjust_seconds

        self._stop_event = threading.Event()
        self._thread: Optional[threading.Thread] = None
        self._lock = threading.Lock()
        self._transcripts: List[str] = []

    def _listen_loop(self):
        """Background loop that listens continuously until stop is requested."""
        print("Listener thread starting. mic_index=%s", self.mic_index)

        # This may raise if microphone not found/openable
        mic = sr.Microphone(device_index=self.mic_index)

        with mic as source:
            # optional ambient noise adaptation
            try:
                if self.ambient_adjust_seconds and self.ambient_adjust_seconds > 0:
                    self.recognizer.adjust_for_ambient_noise(source, duration=self.ambient_adjust_seconds)
            except Exception as e:
                print("Warning : Ambient noise adjust failed: %s", e)
                if self.raise_on_error:
                    raise

            # Main continuous loop: blocks on recognizer.listen(source)
            while not self._stop_event.is_set():
                try:
                    # BLOCKS until a phrase is captured (no timeout/phrase_time_limit)
                    audio = self.recognizer.listen(source)

                    # Synchronous recognition call (this blocks until response)
                    text = self.recognizer.recognize_google(audio)

                    with self._lock:
                        self._transcripts.append(text)
                    print("Recognized: %s", text)

                except sr.WaitTimeoutError:
                    # This won't normally happen without timeout param, but kept for safety
                    continue

                except sr.UnknownValueError:
                    # speech was unintelligible
                    continue

                except sr.RequestError as e:
                    # issues contacting the recognition service (network, quota, etc.)
                    print("Recognition request failed: %s", e)
                    if self.raise_on_error:
                        raise
                    # break or continue: break to avoid tight error loop
                    break

                except Exception as e:
                    # Unexpected error - log and optionally raise
                    self.logger.exception("Unexpected listening/recognition error: %s", e)
                    if self.raise_on_error:
                        raise
                    break

        print("Listener thread exiting.")

    def start_listening(self):
        """Start background listening. No-op if already running."""
        if self._thread and self._thread.is_alive():
            print("Already listening.")
            return

        self._stop_event.clear()
        self._thread = threading.Thread(target=self._listen_loop, daemon=True)
        self._thread.start()
        print("Listening started.")

    def stop_listening(self, wait_join_seconds: float = 2.0):
        """Stop listening and join the background thread (waits up to timeout)."""
        print("Stopping listener...")
        self._stop_event.set()
        if self._thread:
            self._thread.join(timeout=wait_join_seconds)
            if self._thread.is_alive():
                print("Listener thread did not exit within timeout.")
            else:
                print("Listener thread stopped.")
        else:
            print("No listener thread to stop.")

    def get_transcripts(self) -> List[str]:
        """Return a copy of collected transcripts."""
        with self._lock:
            return list(self._transcripts)

    def clear_transcripts(self):
        with self._lock:
            self._transcripts.clear()


In [9]:
import threading
import speech_recognition as sr
import logging
import sys
from typing import List

class speech_to_text:
    try:
        def __init__(self):
            self.listening = False
            self.listener_thread = None
            self._transcripts: List[str] = []  # Store transcripts
            self._lock = threading.Lock()  # Thread lock for thread-safe operations

        def listen_in_background(self):
            r = sr.Recognizer()
            with sr.Microphone() as mic:
                while self.listening:
                    try:
                        r.adjust_for_ambient_noise(mic, duration=0.5)
                        audio = r.listen(mic, timeout=2, phrase_time_limit=15)
                        text = r.recognize_google(audio)
                        print("You said:", text)
                        
                        # Store the transcript thread-safely
                        with self._lock:
                            self._transcripts.append(text)
                            
                    except sr.WaitTimeoutError:
                        continue
                    except sr.UnknownValueError:
                        print("[Unrecognized Speech]")
                    except sr.RequestError as e:
                        print("[API Error]", e)
                        break

        def start_listening(self):
            logging.info("Start listening in background thread")
            if not self.listening:
                self.listening = True
                self.listener_thread = threading.Thread(target=self.listen_in_background)
                self.listener_thread.start()
            logging.info("Listening started...")

        def stop_listening(self):
            """Stop listening and wait for the background thread to finish"""
            logging.info("Stop listening")
            self.listening = False
            
            # Wait for thread to finish (similar to original code)
            if self.listener_thread and self.listener_thread.is_alive():
                self.listener_thread.join(timeout=2.0)  # Wait up to 2 seconds
                if self.listener_thread.is_alive():
                    logging.warning("Listener thread did not exit within timeout.")
                else:
                    logging.info("Listener thread stopped successfully.")
            else:
                logging.info("No active listener thread to stop.")
            
            logging.info("Listening stopped.")

        def get_transcripts(self) -> List[str]:
            """Return a copy of collected transcripts"""
            with self._lock:
                return list(self._transcripts)  # Return a copy to avoid external modification

        def clear_transcripts(self):
            """Clear all stored transcripts"""
            with self._lock:
                self._transcripts.clear()
            logging.info("Transcripts cleared")

    except Exception as e:
        raise AssisstantException(e, sys)

In [10]:
stt = speech_to_text()           # default: no raising of exceptions
stt.start_listening()


You said: problem solve karte hain theek hai to Hota kya hai
You said: chal de bhai
[Unrecognized Speech]
[Unrecognized Speech]
You said: bhosdi ke
You said: gandu Sun Le
[Unrecognized Speech]
[Unrecognized Speech]
You said: Pan ke bhoot
[Unrecognized Speech]
You said: bataya
[Unrecognized Speech]


In [11]:
# speak into your microphone...
# later:
stt.stop_listening()
print(stt.get_transcripts())
stt.clear_transcripts()

['problem solve karte hain theek hai to Hota kya hai', 'chal de bhai', 'bhosdi ke', 'gandu Sun Le', 'Pan ke bhoot', 'bataya']


### Adding app.py with complete backend integration. (without frontend integration as Quick Backup)

In [None]:
from assisstants.exception.exception import AssisstantException
from assisstants.logging.logger import logging
import sys

from assisstants.loader.model_loader import ModelLoader
from assisstants.processor.text_processor import TextProcessor
from assisstants.Classifier.text_classifier import TextClassifier
from assisstants.extractor.fields_extractor import ExtractFields
from assisstants.voice.voice import speech_to_text

import threading
import speech_recognition as sr

import streamlit as st

if __name__ == "__main__":
    try:
        logging.info("VOICE ACTIVATED FORM ASSISSTANT started")
        # initializing Speech to text class when app starts
        stt = speech_to_text()

        ## Voice to text conversion
        # starting the listening in background thread when start button is triggered
        stt.start_listening()

        # stop listening after stop button is triggered
        stt.stop_listening()

        # getting the transcripts
        transcripts = stt.get_transcripts()

        ## Starting text processing, classification and field extraction

        # Sending transcripts to text processor
        text = transcripts[0] 
        if text == "[Unrecognized Speech]" or text == "[API Error]" or text == "":
            logging.warning("No valid speech input recognized.")
            st.warning("No valid speech input recognized. Please speak again.")
            st.stop()  # Stop Streamlit execution and show the warning
        stt.clear_transcripts() # claer transcripts after fectching the texts
        processed_text = TextProcessor().process_text(text)   

        # Loading model and tokenizer
        # @st.cache_resource  ## Uncomment while deploying on streamlit cloud(Caching for fast loading of model and tokenizer)
        def init():
            return ModelLoader.get_model(), ModelLoader.get_tokenizer()

        # calling the init function to load model and tokenizer
        model, tokenizer = init()

        #initializing text classifier
        classifier = TextClassifier()
        label = classifier.classify(processed_text) 

        # Initializing field extractor
        extractor = ExtractFields()

        # Extracting the entity based on label and sending to the respective label in the form
        entity = extractor.extract(label, processed_text)
        # print(f"Extracted {label}: {entity}")

    except Exception as e:
        raise AssisstantException(e, sys)