In [None]:
The expected answer is: {expected_answer}. Your similarity score is: {similarity_score:.2f}.

In [4]:
!pip install gtts SpeechRecognition pyaudio


Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
Collecting pyaudio
  Downloading PyAudio-0.2.14-cp39-cp39-win_amd64.whl (164 kB)
Installing collected packages: SpeechRecognition, pyaudio, gtts
Successfully installed SpeechRecognition-3.10.4 gtts-2.5.1 pyaudio-0.2.14


In [12]:
import joblib
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import random
from gtts import gTTS
import os
import speech_recognition as sr


In [13]:
# Define the clean_text function
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9]", " ", text)  # Remove punctuation
    stop_words = set(stopwords.words('english'))
    word_tokens = nltk.word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]
    return " ".join(filtered_text)

# Function to calculate similarity score between two text strings
def calculate_similarity(user_answer, expected_answer):
    user_vector = tfidf_vectorizer.transform([user_answer])
    expected_vector = tfidf_vectorizer.transform([expected_answer])
    similarity_score = cosine_similarity(user_vector, expected_vector)[0][0]
    return similarity_score

# Function to calculate score based on similarity score
def calculate_score(similarity_score):
    if similarity_score >= 0.9:
        return 100
    elif similarity_score >= 0.8:
        return 90
    elif similarity_score >= 0.7:
        return 80
    elif similarity_score >= 0.6:
        return 70
    else:
        return 50  # Assign a base score to encourage improvement

In [None]:
# Function to convert text to speech
def text_to_speech(text, filename="temp_audio.mp3"):
    tts = gTTS(text=text, lang='en')
    tts.save(filename)
    os.system(f"start {filename}")  # For Windows; use "afplay" for Mac and "mpg321" for Linux

# Function to convert speech to text
def speech_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
    try:
        print("Recognizing...")
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except sr.UnknownValueError:
        print("Sorry, I did not understand that.")
        return ""
    except sr.RequestError:
        print("Sorry, I am unable to provide speech recognition services at the moment.")
        return ""

In [7]:
import random
import speech_recognition as sr
import joblib
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from gtts import gTTS
import os

# Load the trained model and vectorizer
model = joblib.load('interview_bot_model.pkl')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')

# Load the dataset to get questions and expected answers
df = pd.read_csv("dataset.csv")

# Define the clean_text function
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9]", " ", text)  # Remove punctuation
    stop_words = set(stopwords.words('english'))
    word_tokens = nltk.word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]
    return " ".join(filtered_text)

# Function to calculate similarity score between two text strings
def calculate_similarity(user_answer, expected_answer):
    user_vector = tfidf_vectorizer.transform([user_answer])
    expected_vector = tfidf_vectorizer.transform([expected_answer])
    similarity_score = cosine_similarity(user_vector, expected_vector)[0][0]
    return similarity_score

# Function to calculate score based on similarity score
def calculate_score(similarity_score):
    # Scale the similarity score to the range of 0-100
    score = int(similarity_score * 100)
    return score

# Function to convert text to speech
def text_to_speech(text):
    tts = gTTS(text=text, lang='en')
    tts.save("output.mp3")
    os.system("start output.mp3")  # Use 'afplay' on macOS or 'mpg321' on Linux

# Function to recognize speech and convert it to text
def start_recording():
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        print("Bot: Listening...")
        audio = recognizer.listen(source)

    try:
        print("Bot: Recognizing...")
        user_answer = recognizer.recognize_google(audio)
        print(f"You said: {user_answer}")
        return user_answer
    except sr.UnknownValueError:
        print("Bot: Sorry, I could not understand what you said. Please try again.")
        return None
    except sr.RequestError as e:
        print(f"Bot: Could not request results from Google Speech Recognition service; {e}")
        return None

# Main interaction loop
def main():
    total_score = 0
    question_count = 0

    print("Welcome to the Interview Bot!")
    text_to_speech("Welcome to the Interview Bot!")
    print("Let's start the interview?")
    text_to_speech("Let's start the interview?")

    while True:
        if question_count == 0:
            choice = input("Bot: Let's start the interview? (yes/no): ")
        else:
            choice = input("Bot: Ready for the next question? (yes/no): ")

        if choice.lower() in ["no", "exit"]:
            print(f"Thank you for using the Interview Bot! Your total score is: {total_score}")
            text_to_speech(f"Thank you for using the Interview Bot! Your total score is: {total_score}")
            break
        elif choice.lower() in ["yes", "ok", "okay"]:
            question_count += 1

            # Pick a random question from the dataset
            random_question_idx = random.randint(0, len(df) - 1)
            question = df.iloc[random_question_idx]['questions']
            expected_answer = df.iloc[random_question_idx]['expected_answer']

            print("Bot: Here is your question:")
            print(question)
            text_to_speech(question)  # Convert question to speech

            user_start = input("Bot: Type 'start' to begin answering: ")
            if user_start.lower() != "start":
                print("Bot: Recording aborted.")
                continue

            user_answer = start_recording()  # Get user's answer via speech

            if user_answer:  # Only process if there is an answer
                # Predict the answer using the model (optional)
                predicted_answer = clean_text(user_answer)  # Clean user's answer

                # Calculate similarity score between user's answer and expected answer
                similarity_score = calculate_similarity(predicted_answer, expected_answer)

                # Calculate score based on similarity score
                score = calculate_score(similarity_score)

                # Provide feedback, display the expected answer and score
                feedback = f"Your similarity score is: {similarity_score:.2f}."
                print("\nBot:", feedback)
                text_to_speech(feedback)
                print("\n")

                total_score += score
            else:
                print("Bot: I didn't hear an answer, let's try another question.")
        else:
            print("Bot: Invalid input, please type 'yes' or 'no'.")

if __name__ == "__main__":
    main()


Welcome to the Interview Bot!
Let's start the interview?
Bot: Let's start the interview? (yes/no): yes
Bot: Here is your question:
What are the differences between C++ and Python?
Bot: Type 'start' to begin answering: start
Bot: Listening...
Bot: Recognizing...
You said: C plus plus is a top down approach Python is an interpreting language

Bot: Your similarity score is: 0.37.


Bot: Ready for the next question? (yes/no): yes
Bot: Here is your question:
What are the certifications you have done?


KeyboardInterrupt: Interrupted by user