In [None]:
!pip install jiwer

In [17]:
import pandas as pd

def calculate_wer(csv_file_path, ground_truth_path, conf_level=0.7):
    """
    Calculate the Word Error Rate (WER) for the given OCR CSV file against the ground truth.

    Parameters:
    csv_file_path (str): Path to the CSV file containing OCR results (Text, Confidence).
    ground_truth_path (str): Path to the ground truth text file.
    conf_level (float): Confidence threshold for filtering OCR results.

    Returns:
    float: WER value for the given CSV file.
    """
    # Load OCR results
    ocr_df = pd.read_csv(csv_file_path)
    
    # Filter by confidence level
    filtered_ocr_df = ocr_df[ocr_df['Confidence'] >= conf_level]
    
    # Combine filtered OCR results into one string
    ocr_text = " ".join(filtered_ocr_df['Text'].astype(str))

    # Load ground truth words
    with open(ground_truth_path, "r", encoding="utf-8") as gt_file:
        ground_truth_text = gt_file.read().strip()

    # Calculate WER
    wer_score = wer(ground_truth_text, ocr_text)
    return wer_score

# Paths
easyocr_csv_path = r"C:\Users\AZMI\Desktop\Izzet Ahmet\Kodlar\OCR\easyocr_results.csv"
pytesseract_csv_path = r"C:\Users\AZMI\Desktop\Izzet Ahmet\Kodlar\OCR\pytesseract_results.csv"
ground_truth_path = r"C:\Users\AZMI\Desktop\Izzet Ahmet\Kodlar\OCR\words.txt"

# Calculate WER for confidence levels from 0.0 to 1.0 with a step of 0.1
confidence_levels = [round(x * 0.1, 1) for x in range(11)]  # [0.0, 0.1, ..., 1.0]
results = []

for conf in confidence_levels:
    easyocr_wer = calculate_wer(easyocr_csv_path, ground_truth_path, conf_level=conf)
    pytesseract_wer = calculate_wer(pytesseract_csv_path, ground_truth_path, conf_level=conf)
    results.append({'Confidence Level': conf, 'EasyOCR WER': easyocr_wer, 'Pytesseract WER': pytesseract_wer})

# Save results to a CSV file
results_df = pd.DataFrame(results)
output_csv_path = r"C:\Users\AZMI\Desktop\Izzet Ahmet\Kodlar\OCR\wer_results.csv"
results_df.to_csv(output_csv_path, index=False)

print(f"WER results saved to: {output_csv_path}")


WER results saved to: C:\Users\AZMI\Desktop\Izzet Ahmet\Kodlar\OCR\wer_results.csv
