In [1]:
import speech_recognition as sr
import re
import numpy as np
import pandas as pd
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import recall_score, classification_report, confusion_matrix, accuracy_score
from sklearn.naive_bayes import MultinomialNB

# Function to remove digits and lemmatize the text
def remove_digit(data):
    corpos = []
    for i in range(0, len(data)):
        review = re.sub('[^a-zA-Z]', ' ', data['content'][i])
        review = review.lower()
        review = review.split()
        ps = WordNetLemmatizer()
        review = [ps.lemmatize(word) for word in review if word not in stopwords.words('english')]
        review = ' '.join(review)
        corpos.append(review)
    return corpos

# Function to train the model and return the trained model
def detect_model(corpos, data):
    cv = TfidfVectorizer(max_features=2000)
    x = cv.fit_transform(corpos).toarray()
    y = pd.get_dummies(data['label'])
    y = y.iloc[:, 1].values
    fraud_detect = MultinomialNB().fit(x, y)
    print("Model has been trained.")
    return fraud_detect

# Load the trained model
data = pd.read_csv("fraud_call.file", sep='\t', names=['label', 'content'])
proper_list = remove_digit(data)
cv = TfidfVectorizer(max_features=2000)
x = cv.fit_transform(proper_list).toarray()
y = pd.get_dummies(data['label'])
y = y.iloc[:, 1].values
model = MultinomialNB().fit(x, y)
print("Model has been trained.")

# Define a function to preprocess a given message
def preprocess_message(message):
    review = re.sub('[^a-zA-Z]', ' ', message)
    review = review.lower()
    review = review.split()
    ps = WordNetLemmatizer()
    review = [ps.lemmatize(word) for word in review if word not in stopwords.words('english')]
    review = ' '.join(review)
    return review



Model has been trained.


In [3]:
# Initialize the speech recognizer
r = sr.Recognizer()

# Start the microphone and listen for input
with sr.Microphone() as source:
    print("Listening...")
    r.adjust_for_ambient_noise(source)
    audio = r.listen(source)

# Use the speech recognizer to transcribe the audio input to text
try:
    text = r.recognize_google(audio)
    print("You said: ", text)
    
    # Preprocess the transcribed text and pass it to the ML model for prediction
    processed_text = preprocess_message(text)
    vectorized_text = cv.transform([processed_text]).toarray()
    prediction = model.predict(vectorized_text)
    
    # Print the predicted label
    if prediction == 1:
        print("Genuiene call detected.")
    else:
        print("Fraud call detected!")
        
except sr.UnknownValueError:
    print("Could not understand audio input.")
except sr.RequestError as e:
    print("Could not request results from Google Speech Recognition service; {0}".format(e))


Listening...
You said:  you have won a cash prize of 1 CR please forward the OTP on your mobile
Fraud call detected!
