In [6]:
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import zscore

In [7]:
base_folder = Path('./result/readability-indexes')

scaler = MinMaxScaler()

generated_simplified = pd.read_csv(base_folder / 'generated-simplified.csv')
reference_simplified  = pd.read_csv(base_folder / 'reference-simplified.csv')
reference_complete = pd.read_csv(base_folder / 'reference-complete.csv')

In [8]:
reference = pd.concat([reference_complete,reference_simplified])

In [9]:
scale_readability_cols = ['flesch_ease','gulpease'] 
grade_level_readability_cols = ['flesch_kincaid','ari','gunning_fog','coleman_liau']
metrics_cols = scale_readability_cols + grade_level_readability_cols


In [10]:
### Comparando arquivos originais com os gerados por cada modelo
merged = reference.merge(generated_simplified,on='name',how='inner',suffixes=("",'_generated'))

for col in scale_readability_cols:
    merged[col+"_improvement"] = ((merged[col+"_generated"]- merged[col]) / merged[col+"_generated"]) * 100
for col in grade_level_readability_cols:
    merged[col+"_improvement"] = ((merged[col] -merged[col+"_generated"]) / merged[col]) * 100 

improvement_methods = [col+"_improvement" for col in metrics_cols]
agg_methods = {col:"mean" for col in improvement_methods}

average_improvement_per_model = merged.groupby('generated_with_generated').agg(agg_methods)

zscores = average_improvement_per_model[improvement_methods].apply(zscore)

# exclude_invalid_models = ~((zscores.index.str.startswith('gemini')) | (zscores.index.str.startswith('phi3')))
zscores['zscore_mean'] = zscores.sum(axis=1)
best_zscore = zscores.loc[zscores['zscore_mean'].idxmax()]

minmaxes = pd.DataFrame(scaler.fit_transform(average_improvement_per_model[improvement_methods]), columns = improvement_methods, index = average_improvement_per_model.index)
# exclude_invalid_models = ~(minmaxes.index.str.startswith('gemini') |(minmaxes.index.str.startswith("phi3")))
minmaxes['minmax_mean'] = minmaxes.sum(axis=1)
best_minmax = minmaxes.loc[minmaxes['minmax_mean'].idxmax()]

zscores.sort_values(by='zscore_mean',ascending=False)['zscore_mean'].to_csv("z_scores_readability.csv")
minmaxes.sort_values(by='minmax_mean',ascending=False)['minmax_mean'].to_csv("minmax_score_readability.csv")

zscores.sort_values(by='zscore_mean',ascending=False)['zscore_mean'],minmaxes.sort_values(by='minmax_mean',ascending=False)['minmax_mean']

(generated_with_generated
 gemini-2.5-flash-preview-04-17    11.246540
 gemini-2.5-pro-preview-05-06       9.910799
 phi3:latest                        2.963330
 phi4:latest                        1.195588
 qwen2.5:14b                        1.174810
 deepseek-r1:14b                   -0.087657
 qwen2.5-coder:32b                 -1.340955
 gemma3:4b                         -1.948779
 llama3.2:latest                   -3.266148
 granite3-dense:8b                 -3.430959
 granite3-dense:2b                 -4.564604
 cow/gemma2_tools:2b               -5.370179
 granite-code:8b                   -6.481787
 Name: zscore_mean, dtype: float64,
 generated_with_generated
 gemini-2.5-flash-preview-04-17    5.741867
 gemini-2.5-pro-preview-05-06      5.338947
 phi3:latest                       3.422659
 phi4:latest                       2.723968
 qwen2.5:14b                       2.719230
 deepseek-r1:14b                   2.337071
 qwen2.5-coder:32b                 1.965612
 gemma3:4b         