In [2]:
import tkinter as tk
from tkinter import messagebox
import threading
import speech_recognition as sr
import numpy as np
import pandas as pd
import re
import string
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
import seaborn as sns

# Load dataset
resumeDataSet = pd.read_csv("C:/Users/Arpita Patil/OneDrive/Pictures/Desktop/AugmentedResumeDataSet.csv")
resumeDataSet['cleaned_resume'] = ''

# Clean resumes
def cleanResume(resumeText):
    resumeText = re.sub('http\\S+\\s*', ' ', resumeText)
    resumeText = re.sub('RT|cc', ' ', resumeText)
    resumeText = re.sub('#\\S+', '', resumeText)
    resumeText = re.sub('@\\S+', '  ', resumeText)
    resumeText = re.sub('[%s]' % re.escape(string.punctuation), ' ', resumeText)
    resumeText = re.sub('\\s+', ' ', resumeText)
    return resumeText

resumeDataSet['cleaned_resume'] = resumeDataSet.Resume.apply(lambda x: cleanResume(x))

# TF-IDF Vectorization
requiredText = resumeDataSet['cleaned_resume'].values
requiredTarget = resumeDataSet['Category'].values

word_vectorizer = TfidfVectorizer(sublinear_tf=True, stop_words='english')
word_vectorizer.fit(requiredText)
WordFeatures = word_vectorizer.transform(requiredText)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    WordFeatures, requiredTarget, random_state=42, test_size=0.2, shuffle=True, stratify=requiredTarget
)

# Train classifier with RandomForest
clf = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Display confusion matrix
def show_confusion_matrix():
    plt.figure(figsize=(10, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

# Function to find similar resumes
def find_similar_resumes(query, tfidf_matrix, resume_data, top_n=5):
    query_vector = word_vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()

    if np.all(similarities == 0):
        return None  # No matches found
    top_indices = similarities.argsort()[-top_n:][::-1]
    results = []

    for i in top_indices:
        results.append({
            "Name": resume_data.iloc[i]["Name"],
            "Email": resume_data.iloc[i]["Email"],
            "Phone": resume_data.iloc[i]["Phone"],
            "Resume": resume_data.iloc[i]["Resume"],
            "Similarity": similarities[i]
        })

    return results

# Function to handle queries
def handle_query(query):
    if query.strip() == "bye":
        return "Goodbye! Have a great day."

    similar_resumes = find_similar_resumes(query, WordFeatures, resumeDataSet, top_n=5)

    if similar_resumes:
        response = "The most similar resumes to your query are:\n"
        for i, resume_info in enumerate(similar_resumes, start=1):
            response += (
                f"\nRank {i} | Similarity Score: {resume_info['Similarity']:.2f}\n"
                f"Name: {resume_info['Name']}\n"
                f"Email: {resume_info['Email']}\n"
                f"Phone: {resume_info['Phone']}\n"
                f"Resume Preview: {resume_info['Resume'][:500]}...\n"
            )
        return response

    return "No matching resumes found. Please refine your query."

# Voice input function
def recognize_voice():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        try:
            print("Listening for voice input...")
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source)
            query = recognizer.recognize_google(audio)
            text_output.insert(tk.END, f"Voice Query: {query}\n")
            response = handle_query(query)
            text_output.insert(tk.END, f"Response: {response}\n")
        except sr.UnknownValueError:
            messagebox.showerror("Error", "Could not understand the audio. Please try again.")
        except sr.RequestError:
            messagebox.showerror("Error", "Voice recognition service is unavailable.")
# GUI setup
root = tk.Tk()
root.title("Resume Query Tester")

# Input and output text boxes
text_input = tk.Text(root, height=5, width=60, font=("Arial", 12))
text_input.pack(padx=10, pady=10)
text_output = tk.Text(root, height=15, width=60, font=("Arial", 12))
text_output.pack(padx=10, pady=10)

# Button functions
def submit_query():
    query = text_input.get("1.0", "end-1c").strip()
    if query:
        response = handle_query(query)
        text_output.insert(tk.END, f"Text Query: {query}\nResponse: {response}\n")
    text_input.delete("1.0", tk.END)

def voice_query():
    threading.Thread(target=recognize_voice, daemon=True).start()

def display_metrics():
    metrics = f"Model Accuracy: {accuracy * 100:.2f}%\n\nClassification Report:\n{classification_report(y_test, y_pred)}"
    messagebox.showinfo("Model Metrics", metrics)
    show_confusion_matrix()

# Buttons
submit_button = tk.Button(root, text="Submit Query", font=("Arial", 12), command=submit_query)
submit_button.pack(pady=5)
voice_button = tk.Button(root, text="Voice Input", font=("Arial", 12), command=voice_query)
voice_button.pack(pady=5)
metrics_button = tk.Button(root, text="Show Metrics", font=("Arial", 12), command=display_metrics)
metrics_button.pack(pady=5)

# Start Tkinter main loop
root.mainloop()

In [3]:
print(resumeDataSet.columns)


Index(['Name', 'Email', 'Phone', 'LinkedIn', 'Category', 'Resume',
       'cleaned_resume'],
      dtype='object')
