In [20]:
import os
import joblib
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define Paths
DATA_PATH = r"D:\Speaker Verification System\extracted\vox1_dev_txt\txt"
MODEL_SAVE_PATH = r"D:\Speaker Verification System\models"

# Create Directories if Not Exist
os.makedirs(MODEL_SAVE_PATH, exist_ok=True)

# Read Text Function (Handles Encoding Issues)
def read_text(file_path):
    if not os.path.isfile(file_path):
        return None
    try:
        with open(file_path, "rb") as file:
            raw_data = file.read()
        try:
            return raw_data.decode("utf-8").strip()
        except UnicodeDecodeError:
            return raw_data.decode("latin-1").strip()
    except Exception:
        return None

# Train Vectorizer & Speaker Models
def train_all_speakers():
    speakers = os.listdir(DATA_PATH)
    texts, labels = [], []

    for speaker in speakers:
        speaker_path = os.path.join(DATA_PATH, speaker)
        if not os.path.isdir(speaker_path):
            continue
        for subdir in os.listdir(speaker_path):
            subdir_path = os.path.join(speaker_path, subdir)
            if not os.path.isdir(subdir_path):
                continue
            for file in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, file)
                text = read_text(file_path)
                if text:
                    texts.append(text)
                    labels.append(speaker)

    if not texts:
        print("No valid training data found.")
        return

    vectorizer = TfidfVectorizer()
    X_train = vectorizer.fit_transform(texts)
    joblib.dump(vectorizer, os.path.join(MODEL_SAVE_PATH, "vectorizer.pkl"))

    unique_speakers = set(labels)
    for speaker in unique_speakers:
        speaker_texts = [texts[i] for i in range(len(labels)) if labels[i] == speaker]
        speaker_features = vectorizer.transform(speaker_texts).toarray()
        joblib.dump(speaker_features, os.path.join(MODEL_SAVE_PATH, f"model_{speaker}.pkl"))

# Speaker Verification Function
def verify_speaker(test_text):
    if not test_text:
        return None
    vectorizer_path = os.path.join(MODEL_SAVE_PATH, "vectorizer.pkl")
    if not os.path.exists(vectorizer_path):
        return None
    vectorizer = joblib.load(vectorizer_path)
    test_features = vectorizer.transform([test_text]).toarray()
    best_match, best_score = None, -1
    for model_file in os.listdir(MODEL_SAVE_PATH):
        if model_file.startswith("model_") and model_file.endswith(".pkl"):
            speaker_features = joblib.load(os.path.join(MODEL_SAVE_PATH, model_file))
            score = np.mean(np.dot(speaker_features, test_features.T))
            if score > best_score:
                best_score = score
                best_match = model_file.replace("model_", "").replace(".pkl", "")
    return best_match if best_score > 0 else None

# Evaluate Model
def evaluate_model(test_texts, ground_truth, is_target_speaker):
    valid_texts = [t for t in test_texts if t]
    predictions = [verify_speaker(text) for text in valid_texts]
    predictions = [p if p is not None else "Unknown" for p in predictions]  # Replace None with "Unknown"
    ground_truth = [g if g is not None else "Unknown" for g in ground_truth]

    # Print verification results
    for i, pred in enumerate(predictions):
        if pred == "Unknown":
            print(f"Test Sample {i+1}: Speaker NOT Verified (Unknown)")
        elif pred == ground_truth[i]:
            print(f"Test Sample {i+1}: Speaker Verified - Targeted Speaker ({pred}) ✅")
        else:
            print(f"Test Sample {i+1}: Speaker Verified - Non-Targeted Speaker ({pred}) ❌")

    # Compute and print metrics as percentages
    accuracy = accuracy_score(ground_truth, predictions) * 100
    precision = precision_score(ground_truth, predictions, average='macro', zero_division=1) * 100
    recall = recall_score(ground_truth, predictions, average='macro', zero_division=1) * 100
    f1 = f1_score(ground_truth, predictions, average='macro', zero_division=1) * 100

    print(f"\nMetrics:\nAccuracy: {accuracy:.2f}%\nPrecision: {precision:.2f}%\nRecall: {recall:.2f}%\nF1-Score: {f1:.2f}%")

# Main Execution
if __name__ == "__main__":
    train_all_speakers()
    test_files = [
        r"D:\\Speaker Verification System\\extracted\\vox1_dev_txt\\txt\\id10001\\1zcIwhmdeo4\\00001.txt",
        r"D:\\Speaker Verification System\\extracted\\vox1_dev_txt\\txt\\id10001\\1zcIwhmdeo4\\00002.txt",
        r"D:\\Speaker Verification System\\Speaker_0017_00017.wav"
    ]
    test_texts = [read_text(file) for file in test_files]
    ground_truth = ["id10001", "id10001", None]
    is_target_speaker = [True, True, False]
    evaluate_model(test_texts, ground_truth, is_target_speaker)


Test Sample 1: Speaker Verified - Targeted Speaker (id10001) ✅
Test Sample 2: Speaker Verified - Targeted Speaker (id10001) ✅
Test Sample 3: Speaker Verified - Non-Targeted Speaker (id10132) ❌

Metrics:
Accuracy: 66.67%
Precision: 66.67%
Recall: 66.67%
F1-Score: 33.33%
