In [1]:
import csv
import shutil
import os

from metrics_helpers import process_folder, load_data_time, compute_der
from format_helpers import get_files

# Predictions vs References: Performance evaluation (ASR & Diarization)

### Preparing evaluation folder (Predictions vs References)

In [4]:
# The csv file that you want to compare
directories = ['../results/Compassion', '../results/OBE1', '../results/OBE2']
# Specify the name if trying to compare different models of ASR & Diarization
pred_folder = '../performance_evaluation/predictions'

pred_files = []
for directory in directories:
    pred_files.append(get_files(directory, 'csv'))
pred_files = [item for sublist in pred_files for item in sublist]

os.makedirs(pred_folder, exist_ok=True)
for file in pred_files:
    shutil.copy(file, pred_folder)

### ASR and Diarization metrics + Visual Tool to be better compare & understand

In [2]:
# Hands-on parameters to better estimate the performance of the ASR system
# As the diariasation task may skip whole sentences worse unfairly the WER metric
max_insert_length = 3  # Exclude insertions longer than 3 words from the WER calculation

prediction_folder = "../performance_evaluation/predictions"
reference_folder = "../performance_evaluation/references"

metric = process_folder(prediction_folder, reference_folder, max_insert_length)
metric.to_csv('../performance_evaluation/WER_metric.csv', index=False)

Combined HTML file saved as ../performance_evaluation/predictions\visual comparison\Diarization_S301final.html
Processed file: S301final.csv
Combined HTML file saved as ../performance_evaluation/predictions\visual comparison\Diarization_S302con.html
Processed file: S302con.csv


### (Optional) Dialogue DER Analysis in CSV instead than in HTML

In [6]:
def dialgue_DER(reference_file, prediction_file, output_file):
    df_ref, df_pred = load_data_time(reference_file, prediction_file)
    dialogue_df, error_durations = compute_der(df_ref, df_pred)

    # Display error durations and DER
    print("\nError Durations and DER:")
    for key, value in error_durations.items():
        if key == 'DER':
            print(f"{key}: {value:.2%}")
        else:
            print(f"{key}: {value:.3f} seconds")

    # Optionally, save the DataFrame to a CSV file
    dialogue_df.to_csv(output_file, index=False)

In [7]:
reference_file = '../S301final_C.csv'
prediction_file = '../S301final.csv'
output_file = 'dia_S301final.csv'

dialgue_DER(reference_file, prediction_file, output_file)


Error Durations and DER:
DER: 7.18%
Reference Speech Duration: 342.280 seconds
Missed Duration: 1.060 seconds
False Alarm Duration: 0.260 seconds
Confusion Duration: 23.240 seconds


# (To think about) Conventional way to calculate WER
Don't take in acount the fact that the diarization model may skip complete sentences due to bad quality of audio and overlapp & thereffore worsen unfairly the WER from the ASR.

**TODO:** *Further test with different models to try to solve this problem*

In [10]:
from evaluate import load

wer_metric = load("wer")

def calculate_wer_for_csv(hypothesis_file, reference_file):
    """
    Calculate the WER for the entire content of the 'Content' column in a CSV file.
    """
    with open(hypothesis_file, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        hyp = ' '.join(row['Content'] for row in reader)

    with open(reference_file, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        ref = ' '.join(row['Content'] for row in reader)

    return wer_metric.compute(references=[ref], predictions=[hyp])

In [11]:
reference_file= "../S301final_C.csv"
hypothesis_file  = "../results/Compassion/S301final.csv"

calculate_wer_for_csv(hypothesis_file, reference_file)

0.03026634382566586

In [12]:
hypothesis_file = "../results/Compassion/S302con.csv"
reference_file = "../S302con_C.csv"

calculate_wer_for_csv(hypothesis_file, reference_file)

0.23446893787575152

# (Useful) Problems with excel format

In [16]:
def convert_csv_semicolon_to_comma(input_file, output_file):
    """
    Converts a CSV file with semicolons as delimiters to a comma-separated CSV.
    """
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', newline='', encoding='utf-8') as outfile:
        reader = csv.reader(infile, delimiter=';')
        writer = csv.writer(outfile, delimiter=',')
        
        for row in reader:
            writer.writerow(row)

def find_problematic_line(input_file):
    """
    Reads a file line by line to find the line that causes a UnicodeDecodeError.
    """
    with open(input_file, 'r', encoding='utf-8', errors='replace') as file:
        for line_number, line in enumerate(file, start=1):
            # If the line contains the problematic character, print the line number and content
            if '\ufffd' in line:  # '\ufffd' is the replacement character for decoding errors
                print(f"Problematic line at line {line_number}: {line}")
                break
        else:
            print("No problematic lines found.")

def remove_bom_from_file(file_path):
    """
    Removes the Byte Order Mark (BOM) from a file if it exists.
    """
    with open(file_path, 'rb') as file:
        content = file.read()

    # Check for BOM (UTF-8 BOM is \xef\xbb\xbf)
    if content.startswith(b'\xef\xbb\xbf'):
        print(f"BOM found in {file_path}, removing it...")
        content = content[3:]  # Remove the first three bytes (BOM)
        with open(file_path, 'wb') as file:
            file.write(content)
        print(f"BOM successfully removed from {file_path}.")
    else:
        print(f"No BOM found in {file_path}.")