# Satisfaction

In [13]:
import pandas as pd
import sqlite3

from helpers.statistical_tests import run_t_test_on_gender

conn = sqlite3.connect("../../giicg.db")
users = pd.read_sql("Select c.*, u.gender from conversations c JOIN users u on c.user_id == u.user_id", conn)
conn.close()
users = users[(users['gender'].isin(['Man (cisgender)', 'Woman (cisgender)']))]

In [14]:
satisfaction_map = {
    'very dissatisfied': 1,
    'mostly dissatisfied': 2,
    'neutral': 3,
    'somewhat satisfied': 4,
    'very satisfied': 5
}

complexity_map = {
    'very simple': 1,
    'relatively simple': 2,
    'neither complex nor simple': 3,
    'relatively complex': 4,
    'very complex': 5
}

# Create a new column with coded values
users['satisfaction_code'] = users['satisfaction'].map(satisfaction_map)
users['complexity_code'] = users['rated_complexity'].map(complexity_map)

users

Unnamed: 0,conversation_id,user_id,mode,text,share_link,llm_version,satisfaction,explain_satisfaction,rated_complexity,gender,satisfaction_code,complexity_code
0,1,6,Share link,,https://chatgpt.com/share/682b3298-bd58-8000-b...,,neutral,,neither complex nor simple,Man (cisgender),3,3
1,2,8,JSON/Text,I am working on the problem of reconstruction ...,,ChatGPT-4o-mini-high,somewhat satisfied,,relatively complex,Man (cisgender),4,4
2,3,11,Share link,,https://chatgpt.com/share/682b387d-16ec-8010-8...,,very satisfied,,very simple,Woman (cisgender),5,1
3,4,14,Share link,,https://chatgpt.com/share/682b48e3-94c0-8005-8...,,somewhat satisfied,,neither complex nor simple,Man (cisgender),4,3
4,5,15,JSON/Text,SET_ALL_TABLES action is currently not fetchin...,,Claude 3.7 Sonnet,somewhat satisfied,,relatively complex,Man (cisgender),4,4
...,...,...,...,...,...,...,...,...,...,...,...,...
85,86,60,Share link,,https://chatgpt.com/share/68358d62-9b20-800d-a...,ChatGPT-4o,very satisfied,,very simple,Woman (cisgender),5,1
86,87,73,Share link,,https://chatgpt.com/share/68385b69-0548-800c-9...,ChatGPT-4o,somewhat satisfied,,neither complex nor simple,Woman (cisgender),4,3
87,88,77,Share link,,https://chatgpt.com/share/683edda1-2be8-800d-9...,ChatGPT-4,somewhat satisfied,,very simple,Man (cisgender),4,1
88,89,79,Share link,,https://chatgpt.com/share/683eecd0-d638-800d-8...,gpt-40,somewhat satisfied,,relatively complex,Woman (cisgender),4,4


## Satisfaction

In [15]:
user_satisfaction = users.groupby(['user_id', 'gender'])['satisfaction_code'].mean().reset_index()
user_satisfaction.rename(columns={'satisfaction_code': 'mean_satisfaction'}, inplace=True)
user_satisfaction

Unnamed: 0,user_id,gender,mean_satisfaction
0,6,Man (cisgender),3.666667
1,8,Man (cisgender),4.0
2,11,Woman (cisgender),5.0
3,14,Man (cisgender),4.0
4,15,Man (cisgender),4.0
5,16,Woman (cisgender),4.5
6,25,Man (cisgender),3.8
7,28,Woman (cisgender),3.75
8,29,Woman (cisgender),4.0
9,31,Man (cisgender),3.333333


In [16]:
gender_means = user_satisfaction.groupby(['gender']).mean().reset_index()
gender_means

Unnamed: 0,gender,user_id,mean_satisfaction
0,Man (cisgender),49.625,4.011458
1,Woman (cisgender),54.307692,4.203846


In [17]:
from helpers.statistical_tests import run_t_test_on_gender
run_t_test_on_gender(user_satisfaction, "mean_satisfaction", True)

mean_satisfaction:
  T-statistic: -1.1695, p-value: 0.2576
  Female: n=13, variance=0.2810
  Male:   n=16, variance=0.0871


In [18]:
overall_mean = user_satisfaction['mean_satisfaction'].mean()
overall_mean

np.float64(4.097701149425288)

In [19]:
import scipy.stats as stats

# Suppose female and male are pandas Series of your group means
F = 0.2810 / 0.0871  # larger/smaller
df1 = 13 - 1
df2 = 16 - 1

# Two-tailed p-value
p_value = 2 * min(stats.f.cdf(F, df1, df2), 1 - stats.f.cdf(F, df1, df2))
print(f"F-statistic: {F:.4f}, p-value: {p_value:.4f}")


F-statistic: 3.2262, p-value: 0.0351


In [20]:
import statsmodels.formula.api as smf

model = smf.mixedlm(
    "satisfaction_code ~ gender",         # fixed effect
    users,                               # prompt-level DataFrame
    groups=users["user_id"]              # random user intercept
)
result = model.fit()
print(result.summary())


                Mixed Linear Model Regression Results
Model:              MixedLM   Dependent Variable:   satisfaction_code
No. Observations:   86        Method:               REML             
No. Groups:         29        Scale:                0.6627           
Min. group size:    1         Log-Likelihood:       -108.1488        
Max. group size:    5         Converged:            Yes              
Mean group size:    3.0                                              
---------------------------------------------------------------------
                            Coef. Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------------------
Intercept                   3.982    0.141 28.244 0.000  3.706  4.259
gender[T.Woman (cisgender)] 0.196    0.197  0.992 0.321 -0.191  0.582
Group Var                   0.047    0.081                           



- **Per-user variance**: Small, but still larger than the (not significant) gender effect.
- **Gender effect**: Very minor and not reliable (the confidence interval includes zero).
- **Most variation** in satisfaction is **unexplained by either gender or user identity** (most variance is “residual”).


## Complexity

In [21]:
complexity = users.groupby(['user_id', 'gender'])['complexity_code'].mean().reset_index()
complexity.rename(columns={'complexity_code': 'mean_complexity'}, inplace=True)
complexity

Unnamed: 0,user_id,gender,mean_complexity
0,6,Man (cisgender),3.333333
1,8,Man (cisgender),4.0
2,11,Woman (cisgender),1.0
3,14,Man (cisgender),3.0
4,15,Man (cisgender),4.0
5,16,Woman (cisgender),3.0
6,25,Man (cisgender),2.6
7,28,Woman (cisgender),2.75
8,29,Woman (cisgender),3.0
9,31,Man (cisgender),3.333333


In [22]:
compl_gender_means = complexity.groupby(['gender']).mean().reset_index()
compl_gender_means

Unnamed: 0,gender,user_id,mean_complexity
0,Man (cisgender),49.625,2.995833
1,Woman (cisgender),54.307692,2.496154


In [23]:
from helpers.statistical_tests import run_t_test_on_gender
run_t_test_on_gender(complexity, "mean_complexity", True)

mean_complexity:
  T-statistic: 1.4082, p-value: 0.1738
  Female: n=13, variance=1.1677
  Male:   n=16, variance=0.5773
