In [8]:
# All Imports
import os
import re
import fitz
import pytesseract
from PIL import Image
import pandas as pd
import numpy as np
import spacy
import shutil
import threading
import warnings
import sys

from sklearn.preprocessing import LabelEncoder, RobustScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score, recall_score,
    f1_score, roc_curve, roc_auc_score, log_loss, classification_report
)

# Plotting Imports
import matplotlib.pyplot as plt
import seaborn as sns

# GUI Imports
import tkinter as tk
from tkinter import ttk, messagebox
from tkinterdnd2 import DND_FILES, TkinterDnD

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

print("All libraries imported successfully.")

All libraries imported successfully.


In [9]:
# Model Training

# Global variables that all other blocks will use
best_model = None
preprocessor = None
le_target = None
feature_columns = []

# Define Features, Target, and Categorical Columns
feature_columns = [
    'Programming_Skills',
    'Coding_Skills_Years',
    'Job_Experience_NumCompanies',
    'Previous_Salary',
    'Desired_Salary',
    'Projects',
    'Internships',
    'Education',
    'Gap_Years'
]
target_column = 'Shortlisting_class'

# Separate features into numerical and categorical
numerical_features = [
    'Programming_Skills',
    'Coding_Skills_Years',
    'Job_Experience_NumCompanies',
    'Previous_Salary',
    'Desired_Salary',
    'Projects',
    'Internships',
    'Gap_Years'
]
categorical_features = ["Education"]

positive_label_encoded = 1

X_test_processed = None
y_test = None

print("Starting Model Training :")
try:
    df = pd.read_csv("Resume_Dataset.csv")
    print(f"Successfully loaded Resume_Dataset.csv. Shape: {df.shape}")

    X = df[feature_columns]
    y = df[target_column]
    
    # Label Encoding
    le_target = LabelEncoder()
    y_encoded = le_target.fit_transform(y)
    
    if 'Shortlisted' in le_target.classes_:
        positive_label_encoded = le_target.transform(
            [c for c in le_target.classes_ if 'shortlist' in c.lower()]
        )[0]
        print(f"Positive class 'Shortlisted' is encoded as: {positive_label_encoded}")

    # Preprocessing Pipeline
    # Create the transformer for numerical features
    numeric_transformer = Pipeline(steps=[
        ('scaler', RobustScaler())
    ])

    # Create the transformer for categorical features
    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
    ])

    # Combine transformers using ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ],
        remainder='passthrough'
    )
    
    # Data Split (BEFORE processing)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )

    # Apply preprocessing
    # Fit the preprocessor on the training data
    X_train_processed = preprocessor.fit_transform(X_train)
    # Only transform the test data
    X_test_processed = preprocessor.transform(X_test)
    
    # Get feature names after OHE for better model understanding
    ohe_feature_names = preprocessor.named_transformers_['cat'].named_steps['onehot'].get_feature_names_out(categorical_features)
    all_feature_names = numerical_features + list(ohe_feature_names)
    print(f"Total features after processing: {len(all_feature_names)}")

    # 7. Model Training (Grid Search)
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [10, 20, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    }

    grid = GridSearchCV(
        RandomForestClassifier(random_state=42, class_weight='balanced'),
        param_grid, 
        cv=3, 
        n_jobs=-1, 
        scoring='f1_macro',
        verbose=1
    )
    
    grid.fit(X_train_processed, y_train)
    best_model = grid.best_estimator_

    print("\nGrid Search Complete :")
    print(f"Best Parameters: {grid.best_params_}")
    print("Model Trained Successfully! All components are in global memory.")

except FileNotFoundError:
    print("Error: 'Resume_Dataset.csv' not found.")
    print("Please make sure the dataset is in the same folder as this notebook.")
except Exception as e:
    print(f"Model training failed: {e}")

Starting Model Training :
Successfully loaded Resume_Dataset.csv. Shape: (10000, 15)
Positive class 'Shortlisted' is encoded as: 1
Total features after processing: 16
Fitting 3 folds for each of 24 candidates, totalling 72 fits

Grid Search Complete :
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Model Trained Successfully! All components are in global memory.


In [10]:
if'best_model' in locals() and best_model is not None and 'preprocessor' in locals():
    try:
        importances = best_model.feature_importances_
        num_features = numerical_features
        ohe_trasformer = preprocessor.named_transformers_['cat'].named_steps['onehot']
        cat_features_ohe = ohe_trasformer.get_feature_names_out(categorical_features)
        all_processed_features = num_features + list(cat_features_ohe)

        if len(importances) != len(all_processed_features):
            print(f"error: missmatch in feature count!.")
            print(f"impertance found: {len(importances)}")
            print(f"features found: {len(all_processed_features)}")
        else:
            df_importances = pd.DataFrame({
                'Feature': all_processed_features,
                'Importance': importances
            })

            df_importances = df_importances.sort_values(by='Importance', ascending=False)
            print("Model Feature Importances(Highest to lowest):")
            print(df_importances)

            plt.figure(figsize=(10, 8))
            sns.barplot(x='Importance', y='Feature', data=df_importances.head(15), palette='viridis')

            plt.title('Top 15 Feature Importances')
            plt.xlabel('Importance Score')
            plt.ylabel('Feature')
            plt.tight_layout()

            plot_filename = 'feature_importances.png'
            plt.savefig(plot_filename)
            print(f"\nFeature importance plot saved as '{plot_filename}'.")
            plt.close()
    except Exception as e:
        print(f"Failed to compute feature importances: {e}")
else:
    print("Model, or preprocessor not found in memory. Cannot compute feature importances.")
    print("Please ensure that the model training block has been executed successfully.")



Model Feature Importances(Highest to lowest):
                           Feature  Importance
5                         Projects    0.268550
1              Coding_Skills_Years    0.267273
3                  Previous_Salary    0.182025
4                   Desired_Salary    0.091740
6                      Internships    0.068024
0               Programming_Skills    0.061563
2      Job_Experience_NumCompanies    0.042426
7                        Gap_Years    0.011910
8                   Education_B.E.    0.001982
12                Education_M.Tech    0.001294
14                   Education_MCA    0.000998
10  Education_BSc Computer Science    0.000797
15                   Education_MSc    0.000442
13                   Education_MBA    0.000333
11               Education_Diploma    0.000326
9                    Education_BCA    0.000316

Feature importance plot saved as 'feature_importances.png'.


In [12]:
# Performance Metrics

if best_model is not None and X_test_processed is not None and y_test is not None:
    print("Generating Performance Metrics :")
    
    # Calculate predictions and probabilities first :
    y_pred = best_model.predict(X_test_processed)
    y_proba_all = best_model.predict_proba(X_test_processed)
    
    # Calculate metrics that don't depend on a positive class :
    accuracy = accuracy_score(y_test, y_pred)
    loss = log_loss(y_test, y_proba_all) # Log loss

    print(f"\nMODEL PERFORMANCE (on Test Set)")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Log Loss: {loss:.4f}")

    if positive_label_encoded in best_model.classes_:
        # Positive class (e.g., 'Shortlisted') exists :
        positive_class_index = np.where(best_model.classes_ == positive_label_encoded)[0][0]
        y_proba_positive = y_proba_all[:, positive_class_index]
        
        # Get the name of the positive class for printing
        positive_class_name = le_target.classes_[positive_label_encoded]
        
        # Calculate metrics for the positive class
        precision = precision_score(y_test, y_pred, pos_label=positive_label_encoded, zero_division=0)
        recall = recall_score(y_test, y_pred, pos_label=positive_label_encoded, zero_division=0)
        f1 = f1_score(y_test, y_pred, pos_label=positive_label_encoded)
        auc = roc_auc_score(y_test, y_proba_positive)

        print(f"Precision (for '{positive_class_name}'): {precision:.4f}")
        print(f"Recall (for '{positive_class_name}'):    {recall:.4f}")
        print(f"F1 Score (for '{positive_class_name}'):  {f1:.4f}")
        print(f"AUC:             {auc:.4f}")

        # Generate ROC Curve Plot :
        try:
            fpr, tpr, thresholds = roc_curve(y_test, y_proba_positive, pos_label=positive_label_encoded)
            
            plt.figure(figsize=(7, 6))
            plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {auc:.4f})')
            plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('Receiver Operating Characteristic (ROC) Curve')
            plt.legend(loc="lower right")
            plt.grid(alpha=0.3)
            plt.savefig("roc_curve.png")
            print("ROC curve plot saved as 'roc_curve.png'")
            plt.close()
        except Exception as e:
            print(f"Failed to generate ROC curve: {e}")

    else:
        # Fallback for multi-class or if positive label is missing :
        print(f"Warning: Positive label {positive_label_encoded} not found. Calculating weighted metrics.")
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        print(f"Weighted Precision: {precision:.4f}")
        print(f"Weighted Recall:    {recall:.4f}")
        print(f"Weighted F1 Score:  {f1:.4f}")
        print("AUC/ROC curve is skipped (requires a defined positive class).")

    # Classification Report :
    target_names = le_target.inverse_transform(np.unique(y_test))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=target_names, zero_division=0))

    # Confusion Matrix :
    cm = confusion_matrix(y_test, y_pred, labels=le_target.transform(target_names))
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=target_names,
                yticklabels=target_names)
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig("confusion_matrix.png")
    print("Confusion matrix plot saved as 'confusion_matrix.png'")
    plt.close()

else:
    print("Model, X_test_processed, or y_test not found.")
    print("Please run Above Block (Model Training) first.")

Generating Performance Metrics :

MODEL PERFORMANCE (on Test Set)
Accuracy: 0.9560
Log Loss: 0.1828
Precision (for 'Shortlisted'): 0.9536
Recall (for 'Shortlisted'):    0.9592
F1 Score (for 'Shortlisted'):  0.9564
AUC:             0.9611
ROC curve plot saved as 'roc_curve.png'

Classification Report:
              precision    recall  f1-score   support

    Rejected       0.96      0.95      0.96       994
 Shortlisted       0.95      0.96      0.96      1006

    accuracy                           0.96      2000
   macro avg       0.96      0.96      0.96      2000
weighted avg       0.96      0.96      0.96      2000

Confusion matrix plot saved as 'confusion_matrix.png'


In [5]:
# NLP & Prediction Functions

print("Loading NLP model...")
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading 'en_core_web_sm'...")
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
print("NLP model loaded.")

def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        with fitz.open(pdf_path) as doc:
            for page in doc:
                page_text = page.get_text()
                if page_text.strip():
                    text += page_text + "\n"
                else:
                    # OCR for scanned resumes
                    pix = page.get_pixmap()
                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                    text += pytesseract.image_to_string(img) + "\n"
    except Exception as e:
        print(f"Error reading PDF {pdf_path}: {e}")
        return ""
    return text

def safe_int(val, default=0):
    try:
        if isinstance(val, str):
            val = val.replace(",", "").strip()
        return int(float(val))
    except (ValueError, TypeError):
        return default

def safe_float(val, default=0.0):
    try:
        if isinstance(val, str):
            val = val.replace(",", "").strip()
        return float(val)
    except (ValueError, TypeError):
        return default

def find_experience(text, keywords):
    best_match = 0
    text_norm = text.lower().replace('\n', ' ')
    for keyword in keywords:
        try:
            for match in re.finditer(keyword, text_norm):
                window_start = max(0, match.start() - 40)
                search_window = text_norm[window_start : match.start()]
                exp_match = re.search(r"(\d+)\+?\s*(?:year|yrs)", search_window)
                if exp_match:
                    years = safe_int(exp_match.group(1))
                    if years > best_match:
                        best_match = years
        except Exception as e:
            print(f"Error in find_experience with keyword '{keyword}': {e}")
    return best_match

def parse_salary(salary_str):
    salary_str = salary_str.lower().strip().replace(",", "")
    number_match = re.search(r"([\d\.]+)", salary_str)
    if not number_match: return 0
    number = safe_float(number_match.group(1), 0)
    if "lpa" in salary_str or "lakh" in salary_str:
        return int(number * 100000)
    elif number > 20000:
        return int(number)
    else:
        return 0

def find_salary(text, keywords):
    text_norm = text.lower().replace('\n', ' ')
    salary_regex = r"([\d\.]+\s*(?:lpa|lakh))|(\b[\d,]+\d{3}\b)"
    for match in re.finditer(salary_regex, text_norm, re.IGNORECASE):
        window_start = max(0, match.start() - 40)
        search_window = text_norm[window_start:match.start()]
        for keyword in keywords:
            if keyword in search_window:
                return parse_salary(match.group(0))
    return 0

# MAIN NLP FUNCTION :

def extract_features_nlp(text, candidate_name="Unknown"):
    global feature_columns
    if not feature_columns:
        print("Error: feature_columns is not set. Please run Block 2 first.")
        return {}

    # Initialize all expected features to 0
    features = {key: 0 for key in feature_columns}
    
    text_lower = text.lower()
    text_with_spaces = text_lower.replace('\n', ' ')

    # Extract Personal Info (for the report) :
    info_dict = {
        "Name": candidate_name,
        "Email": "N/A",
        "Phone No.": "N/A",
        "Age": "N/A",
        "Gender": "N/A"
    }

    # Extract Email
    email_match = re.search(r"[\w\.-]+@[\w\.-]+\.\w+", text_lower)
    if email_match:
        info_dict["Email"] = email_match.group(0)

    # Extract Phone
    phone_match = re.search(r"(\+91[\s-]?)?(\b[\d\s-]{10}\b)", text)
    if phone_match:
        info_dict["Phone No."] = phone_match.group(2).replace(" ", "").replace("-", "")

    # Extract Age
    age_match = re.search(r"age[:\s]*(\d{2})", text_lower)
    if age_match:
        info_dict["Age"] = safe_int(age_match.group(1))

    # Extract Gender
    gender_match = re.search(r"gender[:\s]*(male|female)", text_lower, re.IGNORECASE)
    if gender_match:
        info_dict["Gender"] = gender_match.group(1).capitalize()
    elif "\bfemale\b" in text_lower:
        info_dict["Gender"] = "Female"
    elif "\bmale\b" in text_lower:
        info_dict["Gender"] = "Male"

    # Extract Training Features (for the model) :
    
    # Programming_Skills
    prog_skills_list = ['python', 'java', 'c++', 'sql', 'javascript', 'react', 'node.js', 'pandas', 'scikit-learn', 'tensorflow']
    found_skills = 0
    for skill in prog_skills_list:
        if skill in text_lower:
            found_skills += 1
    features['Programming_Skills'] = found_skills

    # Coding_Skills_Years
    features['Coding_Skills_Years'] = find_experience(text_lower, ["coding", "programming", "developer", "software engineer"])

    # Job_Experience_NumCompanies
    company_match = re.search(r"(?:worked at|companies like|previous employers)\s*([A-Z][\w\s,]+(?:(?:,| and)\s*[A-Z][\w\s,]+)*)", text)
    if company_match:
        companies_str = company_match.group(1).replace(" and ", ", ")
        features['Job_Experience_NumCompanies'] = len([c.strip() for c in companies_str.split(",") if c.strip() and c.strip() != " "])

    # Salaries
    features['Previous_Salary'] = find_salary(text_with_spaces, ["previous", "current compensation", "current salary"])
    features['Desired_Salary'] = find_salary(text_with_spaces, ["expected", "desired", "seeking", "target salary"])

    # Projects
    projects_match = re.search(r"(\d+)\s*(?:project|projects)", text_lower)
    if projects_match: 
        features['Projects'] = safe_int(projects_match.group(1))

    # Internships
    internship_match = re.search(r"(\d+)\s*(?:internship|internships)", text_lower)
    if internship_match: 
        features['Internships'] = safe_int(internship_match.group(1))

    # Education
    stream_match = re.search(r"\b(b\.?tech|m\.?tech|b\.?sc|m\.?sc|b\.?ca|m\.?ca|b\.?e\.?|m\.?b\.?a|diploma|computer science)\b", text_lower)
    features['Education'] = 'Diploma' # Default if nothing found
    if stream_match:
        s = stream_match.group(0).lower()
        if "b.tech" in s or "btech" in s or "b.e" in s: features["Education"] = "B.E."
        elif "m.tech" in s or "mtech" in s: features["Education"] = "M.Tech"
        elif "b.sc" in s or "bsc" in s or "computer science" in s: features["Education"] = "BSc Computer Science"
        elif "bca" in s: features["Education"] = "BCA"
        elif "mba" in s: features["Education"] = "MBA"
        elif "mca" in s: features["Education"] = "MCA"
        elif "diploma" in s: features["Education"] = "Diploma"

    # Gap_Years
    gap_match = re.search(r"gap of (\d+)\s*year", text_lower)
    if gap_match: 
        features['Gap_Years'] = safe_int(gap_match.group(1))

    # Combine Dictionaries :
    final_features_dict = {}
    final_features_dict.update(info_dict) # Add personal info first
    
    # Add all the training features
    for key in feature_columns:
        final_features_dict[key] = features.get(key, 0)
        
    return final_features_dict

def process_and_predict_resume(pdf_path):
    global best_model, preprocessor, le_target, feature_columns
    
    if not best_model or not preprocessor:
        raise Exception("Model or preprocessor is not trained. Please run Block 2.")

    name = os.path.basename(pdf_path).replace(".pdf", "")
    text = extract_text_from_pdf(pdf_path)
    if not text.strip():
        print(f"Warning: No text extracted from {name}.")
        return {"name": name, "Prediction": "Error - Empty PDF"}

    features_dict = extract_features_nlp(text, candidate_name=name)
    
    X_new = pd.DataFrame([features_dict])
    
    X_new_for_processing = X_new[feature_columns]

    X_new_processed = preprocessor.transform(X_new_for_processing)
    
    prediction_encoded = best_model.predict(X_new_processed)[0]
    prediction_label = le_target.inverse_transform([prediction_encoded])[0]
    
    result_icon = " Shortlisted" if 'shortlist' in prediction_label.lower() else " Rejected"
    
    # Add all extracted features to the final dictionary
    features_dict["Prediction"] = result_icon
    return features_dict

print("All NLP and Prediction functions are defined (v12 - Now extracts all info).")

Loading NLP model...
NLP model loaded.
All NLP and Prediction functions are defined (v12 - Now extracts all info).


In [None]:
# The GUI Application

print("Launching GUI :")

# PATHS (from your code) :
RESUME_FOLDER = r"D:\AICreation\Resumes"
OUTPUT_FILE = r"D:\AICreation\Resume_Results.xlsx"
os.makedirs(RESUME_FOLDER, exist_ok=True)

# This list will hold the *file names* of dropped files
staged_files = [] 

# Thread-Safe Prediction Logic :

def process_resumes_background(root, analyze_btn, progress_label, progress_bar):
    # Get resumes from the staged_files list
    total = len(staged_files)
    if total == 0:
        root.after(0, lambda: messagebox.showwarning("No Resumes", "Please drag and drop PDF resumes first!"))
        root.after(0, on_analysis_complete, analyze_btn, progress_label, progress_bar, 0, False)
        return

    results = []
    
    for i, pdf_name in enumerate(staged_files, start=1):
        pdf_path = os.path.join(RESUME_FOLDER, pdf_name)
        try:
            # This will now return the new features
            result_dict = process_and_predict_resume(pdf_path)
            
            progress_percent = int((i / total) * 100)
            # Schedule progress bar update
            root.after(0, progress_bar.config, {"value": progress_percent})

        except Exception as e:
            print(f"Error processing {pdf_name}: {e}")
            result_dict = {"name": pdf_name.replace(".pdf", ""), "Prediction": f"Error: {e}"}
        
        results.append(result_dict)

    # Analysis Done, now create Excel :
    try:
        df_result = pd.DataFrame(results)
        if "Prediction" in df_result.columns:
            # Move Prediction column to the end in Excel
            cols = [c for c in df_result.columns if c != "Prediction"] + ["Prediction"]
            df_result = df_result[cols]
            
        df_result.to_excel(OUTPUT_FILE, index=False)
        
        root.after(0, on_analysis_complete, analyze_btn, progress_label, progress_bar, total, True)
    
    except Exception as e:
        root.after(0, lambda: messagebox.showerror("Error", f"Failed to save Excel file: {e}"))
        root.after(0, on_analysis_complete, analyze_btn, progress_label, progress_bar, 0, False)


def on_analysis_complete(analyze_btn, progress_label, progress_bar, total_files, success):
    
    # This function runs on the MAIN GUI THREAD.
    
    global staged_files
    
    if success:
        progress_label.config(text=" Analysis complete! Opening Excel file...")
        messagebox.showinfo("Complete", f"Analysis complete for {total_files} resumes.")
        
        clear_all_resumes(progress_label)
        
        # Auto-open the Excel file
        try:
            os.startfile(OUTPUT_FILE)
        except Exception as e:
            try:
                import subprocess
                if sys.platform == "darwin":      # macOS
                    subprocess.call(("open", OUTPUT_FILE))
                elif sys.platform.startswith("linux"):  # Linux
                    subprocess.call(("xdg-open", OUTPUT_FILE))
            except:
                 messagebox.showinfo("File Saved", f"Results saved at:\n{OUTPUT_FILE}\n\nUnable to auto-open: {e}")
    
    # Reset the GUI
    analyze_btn.config(text=" Start Analysis", state="normal")
    if not success:
        progress_label.config(text="Analysis failed or was canceled.")
    progress_bar.config(value=0)


def start_analysis_thread(root, analyze_btn, progress_label, progress_bar, drop_area):
    
    #This is the main button-click function.
    
    analyze_btn.config(text="Analyzing...", state="disabled")
    progress_label.config(text="Starting analysis...")
    
    threading.Thread(
        target=process_resumes_background,
        args=(root, analyze_btn, progress_label, progress_bar),
        daemon=True
    ).start()


# File Drop Handler :
def on_drop(event, drop_area):
    global staged_files
    files = drop_area.tk.splitlist(event.data)
    count = 0
    for file_path in files:
        if file_path.lower().endswith(".pdf"):
            file_name = os.path.basename(file_path)
            dest_path = os.path.join(RESUME_FOLDER, file_name)
            
            if file_name not in staged_files:
                shutil.copy(file_path, dest_path)
                staged_files.append(file_name) # Add to our list
                count += 1
            else:
                print(f"Skipped: {file_name} is already in the queue.")
    
    drop_area.config(text=f"{len(staged_files)} file(s) in queue. Drop more or Analyze.")
    if count > 0:
        messagebox.showinfo("Files Added", f"Added {count} new PDF(s) to the queue.")

# NEW Delete Functions :
def delete_previous_file(drop_area):
    #Deletes the last file added to the queue.
    global staged_files
    if not staged_files:
        messagebox.showwarning("Empty", "The resume queue is already empty.")
        return
    
    try:
        file_name = staged_files.pop() # Remove last item
        file_path = os.path.join(RESUME_FOLDER, file_name)
        
        if os.path.exists(file_path):
            os.remove(file_path)
            
        print(f"Deleted: {file_name}")
        messagebox.showinfo("File Removed", f"Removed last file: {file_name}")
        drop_area.config(text=f"{len(staged_files)} file(s) in queue. Drop more or Analyze.")
        if not staged_files:
             drop_area.config(text="Drop PDFs Here")
            
    except Exception as e:
        messagebox.showerror("Error", f"Could not delete {file_name}: {e}")

def clear_all_resumes(drop_area_or_label):
    # Deletes all resumes from the folder and clears the list.
    global staged_files
    if not staged_files:
        messagebox.showinfo("Empty", "The resume queue is already empty.")
        return
    
    if messagebox.askyesno("Confirm Clear All", f"Are you sure you want to clear all {len(staged_files)} resumes from the queue?"):
        try:
            for file_name in staged_files:
                file_path = os.path.join(RESUME_FOLDER, file_name)
                if os.path.exists(file_path):
                    os.remove(file_path)
            
            staged_files.clear()
            print("Cleared all resumes from queue and folder.")
            if isinstance(drop_area_or_label, tk.Label):
                drop_area_or_label.config(text="Drop PDFs Here")
            
        except Exception as e:
            messagebox.showerror("Error", f"An error occurred while clearing resumes: {e}")

# Tkinter GUI :
if 'best_model' in locals() and best_model is not None:
    root = TkinterDnD.Tk()
    root.title("AI Resume Analyzer")
    root.geometry("500x450")
    root.config(bg="#f2f2f2")

    Label = tk.Label
    style = ttk.Style()
    style.configure("TButton", font=("Arial", 12), padding=10)
    style.configure("Small.TButton", font=("Arial", 9), padding=4)

    Label(root, text="Drag & Drop PDF Resumes Below", font=("Arial", 14, "bold"), bg="#f2f2f2").pack(pady=10)

    drop_area = tk.Label(root, text="Drop PDFs Here", relief="ridge", borderwidth=3,
                         width=50, height=8, bg="white", fg="gray", font=("Arial", 11))
    drop_area.pack(pady=10, padx=40, fill="x")

    drop_area.drop_target_register(DND_FILES)
    drop_area.dnd_bind('<<Drop>>', lambda e: on_drop(e, drop_area))
    
    # NEW DELETE BUTTONS FRAME :
    delete_frame = ttk.Frame(root, style="TLabel")
    delete_frame.pack(fill="x", padx=40, pady=(0, 5))

    btn_delete_selected = ttk.Button(delete_frame, text="Delete Previous File", style="Small.TButton",
                                     command=lambda: delete_previous_file(drop_area))
    btn_delete_selected.pack(side="left", expand=True, fill="x", padx=2)
    
    btn_delete_all = ttk.Button(delete_frame, text="Delete All Files", style="Small.TButton",
                                     command=lambda: clear_all_resumes(drop_area))
    btn_delete_all.pack(side="left", expand=True, fill="x", padx=2)
    
    # PROGRESS BAR AND LABEL :
    progress_var = tk.IntVar()
    progress_bar = ttk.Progressbar(root, orient="horizontal", length=350, mode="determinate", variable=progress_var)
    progress_bar.pack(pady=15)

    progress_label = Label(root, text="Ready to analyze.", font=("Arial", 10), bg="#f2f2f2")
    progress_label.pack()

    analyze_btn = ttk.Button(root, text=" Start Analysis",
                             command=lambda: start_analysis_thread(root, analyze_btn, progress_label, progress_bar, drop_area)
    )
    analyze_btn.pack(pady=20)
    
    # POPULATE STAGED LIST ON START :
    try:
        existing_files = [f for f in os.listdir(RESUME_FOLDER) if f.lower().endswith(".pdf")]
        if existing_files:
            staged_files = existing_files
            drop_area.config(text=f"{len(staged_files)} file(s) in queue.")
            print(f"Loaded {len(existing_files)} existing resumes from {RESUME_FOLDER}")
    except Exception as e:
        print(f"Error loading existing files: {e}")


    # THIS IS THE BLOCKING CALL :
    root.mainloop()
    
else:
    print("GUI launch skipped because the model was not trained.")
    print("Please run full program successfully and then run this again.")

Launching GUI :
