In [7]:
from pathlib import Path

import polars as pl
from statsmodels.api import MixedLM

from utils.metrics_process import read_metrics, get_assistant_data, aggregate_df

In [8]:
version  = 3.0

In [9]:
metrics_dir = Path.cwd().parents[0] / "metrics"

## Text Stats

In [10]:
df = read_metrics(metrics_path=metrics_dir, version=version, metric_types=["text_stats"])
assistant_df = get_assistant_data(df)

In [11]:
assistant_df

role,content,model,group,id,fernandez_huerta,szigriszt_pazos,gutierrez_polini,crawford,flesch_kincaid_grade,total_message_number
str,str,str,enum,str,f64,f64,f64,f64,f64,i64
"""assistant""","""Hola! ¿Cómo estás hoy? ¡Bienve…","""Mistral 7B Instruct v0.3""","""A1""","""20250322-203105.json""",95.97,91.03,46.98,2.7,7.9,1
"""assistant""","""Hola Delia, un placer conocert…","""Mistral 7B Instruct v0.3""","""A1""","""20250322-203105.json""",95.35,93.7,45.53,2.6,8.1,2
"""assistant""","""Hola Carlos, un placer conocer…","""Mistral 7B Instruct v0.3""","""A1""","""20250322-203105.json""",102.27,98.9,47.92,2.0,6.6,3
"""assistant""","""Hola Carlos, encantado de cono…","""Mistral 7B Instruct v0.3""","""A1""","""20250322-203105.json""",88.33,85.36,44.72,3.3,9.7,4
"""assistant""","""Hola Carlos, me parece que est…","""Mistral 7B Instruct v0.3""","""A1""","""20250322-203105.json""",92.5,86.05,45.08,3.4,9.2,5
…,…,…,…,…,…,…,…,…,…,…
"""assistant""","""¡Perfecto! Eso suena como una …","""Qwen 2.5 7B Instruct""","""C1""","""20250326-024438.json""",102.27,99.78,44.16,1.9,6.6,5
"""assistant""","""¡Muy bien hecho! Has corregido…","""Qwen 2.5 7B Instruct""","""C1""","""20250326-024438.json""",95.66,89.54,40.12,2.8,8.0,6
"""assistant""","""¡Felicidades! Has hecho un exc…","""Qwen 2.5 7B Instruct""","""C1""","""20250326-024438.json""",74.91,73.2,33.82,4.4,12.6,7
"""assistant""","""¡Perfecto! Practicar con el ve…","""Qwen 2.5 7B Instruct""","""C1""","""20250326-024438.json""",96.48,92.74,40.28,2.4,7.7,8


In [12]:
assistant_df = assistant_df.filter(pl.col("model") == "Qwen 2.5 7B Instruct")

model = MixedLM.from_formula("fernandez_huerta ~ group",  assistant_df.to_pandas(), groups="id", re_formula="~1")
results = model.fit()

print("\nSummary:")
print(results.summary())

p_values = results.pvalues
print("\nP-values with more precision:")
print(p_values.round(6))


Summary:
            Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: fernandez_huerta
No. Observations: 810     Method:             REML            
No. Groups:       90      Scale:              80.2019         
Min. group size:  9       Log-Likelihood:     -2957.8821      
Max. group size:  9       Converged:          Yes             
Mean group size:  9.0                                         
--------------------------------------------------------------
                 Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
--------------------------------------------------------------
Intercept        99.063    0.817 121.229 0.000  97.462 100.665
group[T.B1]      -5.248    1.156  -4.541 0.000  -7.513  -2.983
group[T.C1]     -10.650    1.156  -9.216 0.000 -12.915  -8.385
id Var           11.121    0.359                              


P-values with more precision:
Intercept      0.000000
group[T.B1]    0.000006
group[T.C1]    0.000000
id Var         0.

Interpretation
- Intercept: Value of fernandez_huerta when group is A1
- group[T.B1]: Difference between fernandez_huerta between A1 and B1 
- group [T.C1]: Difference between fernandez_huerta between A1 and C1



In [13]:
p_values = results.pvalues
print("\nP-values with more precision:")
print(p_values.round(6))


P-values with more precision:
Intercept      0.000000
group[T.B1]    0.000006
group[T.C1]    0.000000
id Var         0.000543
dtype: float64


In [14]:
model = MixedLM.from_formula("fernandez_huerta ~ group + total_message_number",  assistant_df.to_pandas(), groups="id", re_formula="~1")
results = model.fit()

print(results.summary())

               Mixed Linear Model Regression Results
Model:               MixedLM  Dependent Variable:  fernandez_huerta
No. Observations:    810      Method:              REML            
No. Groups:          90       Scale:               63.8394         
Min. group size:     9        Log-Likelihood:      -2876.5360      
Max. group size:     9        Converged:           Yes             
Mean group size:     9.0                                           
-------------------------------------------------------------------
                      Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-------------------------------------------------------------------
Intercept            106.469    0.981 108.480 0.000 104.545 108.392
group[T.B1]           -5.248    1.156  -4.541 0.000  -7.513  -2.983
group[T.C1]          -10.650    1.156  -9.216 0.000 -12.915  -8.385
total_message_number  -1.481    0.109 -13.622 0.000  -1.694  -1.268
id Var                12.938    0.402                          

Interpretation
- Intercept: Value of fernandez_huerta when group is A1 and total message number is 0 (which never happens for us - need to fix)
