In [3]:
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import zscore
import numpy as np

In [4]:

scaler = MinMaxScaler()


base_folder = Path('./result/ud-pipe')
nilc_reference_complete = pd.read_csv(base_folder / 'reference-complete.csv')
nilc_reference_simplified = pd.read_csv(base_folder / "reference-simplified.csv")
nilc_generated = pd.read_csv(base_folder / "generated-simplified.csv")

In [5]:
nilc_generated

Unnamed: 0,id,name,year,model,generated_with,non_svo_ratio,passive_voice_ratio,words_before_main_verb_mean,personal_pronoun_ratio,coreference_pronoun_ratio,demonstrative_pronoun_ratio,logical_operator_ratio,connective_ratio,long_sentence_ratio,foreign_word_ratio
0,5,2025_ufc_inova_5_stripped,2025,portuguese-porttinari-ud-2.15-241121,qwen2.5:14b,0.039301,0.139738,2.725191,0.002392,2.25,1.125000,0.073581,0.100978,0.113537,0.003899
1,7,2025_ufc_inova_7_stripped,2025,portuguese-porttinari-ud-2.15-241121,qwen2.5:14b,0.000000,0.079365,2.000000,0.001319,0.00,0.000000,0.060423,0.072508,0.111111,0.002591
2,2,2025_ufc_inova_2_stripped,2025,portuguese-porttinari-ud-2.15-241121,qwen2.5:14b,0.068966,0.186207,2.704545,0.003534,1.00,1.200000,0.057939,0.086351,0.137931,0.001319
3,1,2025_ufc_inova_1_simplificado_stripped,2025,portuguese-porttinari-ud-2.15-241121,qwen2.5:14b,0.000000,0.105263,2.750000,0.004890,0.00,0.000000,0.031884,0.060870,0.052632,0.000000
4,8,2025_ufc_inova_8_stripped,2025,portuguese-porttinari-ud-2.15-241121,qwen2.5:14b,0.028169,0.183099,2.973684,0.005208,0.00,1.000000,0.061144,0.065089,0.028169,0.003096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,3,2025_ufc_inova_3_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.088889,0.222222,5.789474,0.003130,3.00,1.000000,0.031915,0.065603,0.133333,0.000000
139,6,2025_ufc_inova_6_simplificado_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.066265,0.168675,4.549020,0.004420,2.00,1.066667,0.046452,0.073118,0.204819,0.004598
140,1,2025_ufc_inova_1_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.056122,0.306122,5.215686,0.006092,2.50,1.597222,0.052505,0.084778,0.494898,0.000688
141,5,2025_ufc_inova_5_simplificado_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.065693,0.102190,2.300000,0.007964,0.00,1.200000,0.048083,0.076673,0.102190,0.004444


In [6]:
nilc_reference = pd.concat([nilc_reference_complete,nilc_reference_simplified])

In [7]:
nilc_reference["generated_with"] = "Original"

In [8]:
nilc_reference = nilc_reference[(nilc_reference["name"]!= "2025_ufc_inova_6_stripped")]
nilc_generated = nilc_generated[
    (nilc_generated["name"]!= "2025_ufc_inova_6_stripped") &
    (nilc_generated["generated_with"].str.find("code")==-1)
    ]

In [9]:
merged = pd.concat([nilc_reference,nilc_generated])

In [10]:
proportional_metrics = [
 'personal_pronoun_ratio', # can also be a positive sign according to Unicamp,
]

inverse_proportional_metrics = [

    "coreference_pronoun_ratio",
    'demonstrative_pronoun_ratio',
    'non_svo_ratio',
    'passive_voice_ratio',
    'words_before_main_verb_mean', # use different scaling for this metric?
    "long_sentence_ratio",
    "foreign_word_ratio",
]

complete_metrics = proportional_metrics + inverse_proportional_metrics

In [11]:
merged

Unnamed: 0,id,name,year,model,generated_with,non_svo_ratio,passive_voice_ratio,words_before_main_verb_mean,personal_pronoun_ratio,coreference_pronoun_ratio,demonstrative_pronoun_ratio,logical_operator_ratio,connective_ratio,long_sentence_ratio,foreign_word_ratio
0,5,2025_ufc_inova_5_stripped,2025,portuguese-porttinari-ud-2.15-241121,Original,0.041534,0.188498,6.732283,0.002124,0.0,1.652174,0.061731,0.086533,0.329073,0.006298
1,7,2025_ufc_inova_7_stripped,2025,portuguese-porttinari-ud-2.15-241121,Original,0.040000,0.186667,4.312500,0.001313,0.0,0.166667,0.056689,0.071807,0.253333,0.003322
2,2,2025_ufc_inova_2_stripped,2025,portuguese-porttinari-ud-2.15-241121,Original,0.067485,0.306748,6.460674,0.001185,5.0,1.666667,0.046055,0.073146,0.398773,0.000000
3,8,2025_ufc_inova_8_stripped,2025,portuguese-porttinari-ud-2.15-241121,Original,0.109589,0.232877,5.600000,0.003857,0.0,1.000000,0.042530,0.051254,0.205479,0.002179
4,3,2025_ufc_inova_3_stripped,2025,portuguese-porttinari-ud-2.15-241121,Original,0.045455,0.204545,5.611111,0.001548,0.0,1.000000,0.026643,0.051510,0.159091,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,3,2025_ufc_inova_3_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.088889,0.222222,5.789474,0.003130,3.0,1.000000,0.031915,0.065603,0.133333,0.000000
139,6,2025_ufc_inova_6_simplificado_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.066265,0.168675,4.549020,0.004420,2.0,1.066667,0.046452,0.073118,0.204819,0.004598
140,1,2025_ufc_inova_1_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.056122,0.306122,5.215686,0.006092,2.5,1.597222,0.052505,0.084778,0.494898,0.000688
141,5,2025_ufc_inova_5_simplificado_stripped,2025,portuguese-porttinari-ud-2.15-241121,gemma3:4b,0.065693,0.102190,2.300000,0.007964,0.0,1.200000,0.048083,0.076673,0.102190,0.004444


### Teste T

In [28]:
import numpy as np 
import scipy.stats as st
import csv
from typing import Any
import math 
confidence = 0.95
confidence_intervals: list[dict[str,Any]] = []




for metric in inverse_proportional_metrics:
    per_group = merged.groupby("generated_with")[metric]
    for row in per_group.agg(["mean","count",st.sem]).iterrows():
        range_min,range_max = st.t.interval(confidence=confidence,df=row[1]["count"]-1,loc=row[1]["mean"],scale=row[1]["sem"])
        confidence_intervals.append({"metric":metric,"model":row[0], "range_min":range_min,"mean": row[1]["mean"], "range_max":range_max})

df_inverse_proportional = pd.DataFrame(confidence_intervals)
df_inverse_proportional[["range_min","mean","range_max"]] = df_inverse_proportional[["range_min","mean","range_max"]].map(lambda x: math.trunc(10000*x) / 10000)
df_inverse_proportional.to_csv("confidence_intervals_nilc_inverse_proportional.csv",index=False)

confidence_intervals = []
for metric in proportional_metrics:
    per_group = merged.groupby("generated_with")[metric]
    for row in per_group.agg(["mean","count",st.sem]).iterrows():
        range_min,range_max = st.t.interval(confidence=confidence,df=row[1]["count"]-1,loc=row[1]["mean"],scale=row[1]["sem"])
        confidence_intervals.append({"metric":metric,"model":row[0], "range_min":range_min,"mean": row[1]["mean"], "range_max":range_max})

df_proportional = pd.DataFrame(confidence_intervals)
df_proportional[["range_min","mean","range_max"]] = df_proportional[["range_min","mean","range_max"]].map(lambda x: math.trunc(10000*x) / 10000)
df_proportional.to_csv("confidence_intervals_nilc_proportional.csv",index=False)
