In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [14]:
import os
from collections import defaultdict

def collect_common_cer_by_id(main_folder1, method_name1, main_folder2, method_name2):
    """
    Percorre os arquivos cer.txt correspondentes das duas pastas (mesmo nome de subpasta).
    Só adiciona os valores de um ID se aquele ID estiver presente no mesmo arquivo em ambas as pastas.
    """

    values1 = defaultdict(list)
    values2 = defaultdict(list)

    subfolders1 = sorted(os.listdir(main_folder1))
    subfolders2 = sorted(os.listdir(main_folder2))

    common_subfolders = sorted(set(subfolders1) & set(subfolders2))

    for folder_name in common_subfolders:
        path1 = os.path.join(main_folder1, folder_name, 'cer.txt')
        path2 = os.path.join(main_folder2, folder_name, 'cer.txt')

        if os.path.isfile(path1) and os.path.isfile(path2):
            id_values1 = {}
            id_values2 = {}

            with open(path1, 'r') as f1:
                for line in f1:
                    parts = line.strip().split()
                    if len(parts) == 2:
                        try:
                            id_ = int(parts[0])
                            value = float(parts[1])
                            if id_ <= 9:
                                id_values1[id_] = value
                        except ValueError:
                            continue

            with open(path2, 'r') as f2:
                for line in f2:
                    parts = line.strip().split()
                    if len(parts) == 2:
                        try:
                            id_ = int(parts[0])
                            value = float(parts[1])
                            if id_ <= 9:
                                id_values2[id_] = value
                        except ValueError:
                            continue

            common_ids = set(id_values1.keys()) & set(id_values2.keys())
            for id_ in common_ids:
                values1[id_].append(id_values1[id_])
                values2[id_].append(id_values2[id_])

    globals()[method_name1] = dict(sorted(values1.items()))
    globals()[method_name2] = dict(sorted(values2.items()))

In [15]:
collect_common_cer_by_id('/content/drive/MyDrive/TR OCR Base/Front/metricas', 'cer_trocr_baseVSlarge', '/content/drive/MyDrive/TR OCR Large/Front/metricas', 'cer_trocr_largeVSbase')
collect_common_cer_by_id('/content/drive/MyDrive/TR OCR Large/Front/metricas', 'cer_trocr_largeVSgpt', '/content/drive/MyDrive/GPT/Front/metricas_gpt', 'cer_gptVSlarge')
collect_common_cer_by_id('/content/drive/MyDrive/TR OCR Large/Front/metricas', 'cer_trocr_large_gtVSyolo', '/content/drive/MyDrive/TR OCR Large/Front/metricas_yolo', 'cer_trocr_large_yoloVSgt')

In [6]:
from scipy.stats import wilcoxon

In [16]:
for i in range(10):
    print(f"\nID: {i}")

    # Base vs Large
    print("Base vs Large")
    stat_base, p_base = wilcoxon(cer_trocr_baseVSlarge[i], cer_trocr_largeVSbase[i])
    print(f'Estatística: {stat_base}')
    print(f'p-valor: {p_base:.4f}')
    print("Valores baseVSlarge:", cer_trocr_baseVSlarge[i])
    print("Valores largeVSbase:", cer_trocr_largeVSbase[i])
    print("________________________________________________________________________")

    # Large vs GPT
    print("GPT")
    stat_gpt, p_gpt = wilcoxon(cer_trocr_largeVSgpt[i], cer_gptVSlarge[i])
    print(f'Estatística: {stat_gpt}')
    print(f'p-valor: {p_gpt:.4f}')
    print("Valores largeVSgpt:", cer_trocr_largeVSgpt[i])
    print("Valores gptVSlarge:", cer_gptVSlarge[i])
    print("________________________________________________________________________")

    # Large: GT vs Yolo
    print("Large: GT vs Yolo")
    stat_yolo, p_yolo = wilcoxon(cer_trocr_large_gtVSyolo[i], cer_trocr_large_yoloVSgt[i])
    print(f'Estatística: {stat_yolo}')
    print(f'p-valor: {p_yolo:.4f}')
    print("Valores gtVSyolo:", cer_trocr_large_gtVSyolo[i])
    print("Valores yoloVSgt:", cer_trocr_large_yoloVSgt[i])
    print("________________________________________________________________________")




ID: 0
Base vs Large
Estatística: 0.0
p-valor: 0.0041
Valores baseVSlarge: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [17]:
collect_common_cer_by_id('/content/drive/MyDrive/TR OCR Base/Back/metricas_back', 'cer_trocr_baseVSlarge', '/content/drive/MyDrive/TR OCR Large/Back/metricas_back', 'cer_trocr_largeVSbase')
collect_common_cer_by_id('/content/drive/MyDrive/TR OCR Large/Back/metricas_back', 'cer_trocr_largeVSgpt', '/content/drive/MyDrive/GPT/Back/metricas_gpt_back', 'cer_gptVSlarge')
collect_common_cer_by_id('/content/drive/MyDrive/TR OCR Large/Back/metricas_back', 'cer_trocr_large_gtVSyolo', '/content/drive/MyDrive/TR OCR Large/Back/metricas_yolo_back', 'cer_trocr_large_yoloVSgt')

In [18]:
for i in range(7):
    print(f"\nID: {i}")

    # Base vs Large
    print("Base vs Large")
    stat_base, p_base = wilcoxon(cer_trocr_baseVSlarge[i], cer_trocr_largeVSbase[i])
    print(f'Estatística: {stat_base}')
    print(f'p-valor: {p_base:.4f}')
    print("Valores baseVSlarge:", cer_trocr_baseVSlarge[i])
    print("Valores largeVSbase:", cer_trocr_largeVSbase[i])
    print("________________________________________________________________________")

    # Large vs GPT
    print("GPT")
    stat_gpt, p_gpt = wilcoxon(cer_trocr_largeVSgpt[i], cer_gptVSlarge[i])
    print(f'Estatística: {stat_gpt}')
    print(f'p-valor: {p_gpt:.4f}')
    print("Valores largeVSgpt:", cer_trocr_largeVSgpt[i])
    print("Valores gptVSlarge:", cer_gptVSlarge[i])
    print("________________________________________________________________________")

    # Large: GT vs Yolo
    print("Large: GT vs Yolo")
    stat_yolo, p_yolo = wilcoxon(cer_trocr_large_gtVSyolo[i], cer_trocr_large_yoloVSgt[i])
    print(f'Estatística: {stat_yolo}')
    print(f'p-valor: {p_yolo:.4f}')
    print("Valores gtVSyolo:", cer_trocr_large_gtVSyolo[i])
    print("Valores yoloVSgt:", cer_trocr_large_yoloVSgt[i])
    print("________________________________________________________________________")


ID: 0
Base vs Large
Estatística: 25.5
p-valor: 0.0124
Valores baseVSlarge: [0.0, 0.0, 0.0, 0.0, 0.0, 0.18181818181818182, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09090909090909091, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09090909090909091, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 