In [6]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
from scipy import stats
import numpy as np
import glob


diamond_template = go.layout.Template()
pio.templates.default = diamond_template


## Plot bias results

In [14]:
dimension_dict = {'educated': 'Uneducated',
                  'urban': 'Rural',
                  'calm': 'Temper',
                  'friendly': 'Unfriendly',
                  'open_to_experience': 'Closed-Minded',
                  'conscientiousness': 'Careless'
                  }

df = pd.read_csv('../output/implicit/eval/final.csv')
df = df[df['nones'] == 0]


group_stats = df.groupby(['model_name', 'dimension'])['bias'].agg(['mean', 'std', 'count']).reset_index()

group_stats['se'] = group_stats['std'] / np.sqrt(group_stats['count'])
group_stats['t_score'] = group_stats['count'].apply(lambda n: stats.t.ppf(0.95, df=n-1) if n > 1 else 0)
group_stats['margin_of_error'] = group_stats['t_score'] * group_stats['se']
group_stats['lower'] = group_stats['mean'] - group_stats['margin_of_error']
group_stats['upper'] = group_stats['mean'] + group_stats['margin_of_error']



for model in group_stats.model_name.unique():
    colors = ['#4A68D9', '#60B177', '#F09235', '#F9D949', '#377E7F', '#BFB5D7']

    fig = go.Figure()
    print(model)

    subset = group_stats[group_stats['model_name'] == model]
    k = 0
    for i, row in subset.iterrows():
        fig.add_trace(go.Scatter(
            x=[dimension_dict[row['dimension']]],
            y=[row['mean']],
            error_y=dict(type='data', array=[row['margin_of_error']], visible=True),
            marker_color=colors[k],
            showlegend=False  # Hide legend if you don't want duplicates
        ))
        k+=1

    # Create color-coded tick labels
    dimensions = subset['dimension'].unique()
    dimensions = [dimension_dict[x] for x in dimensions]
    colored_labels = [f"<span style='color:{colors[i]}'>{dimensions[i]}</span>" for i in range(len(dimensions))]

    fig.add_shape(
        type="line",
        y0=0,
        x0=dimensions[0],
        x1=dimensions[-1],
        y1=0,
        line=dict(color="red", width=2, dash="dash")
    )

    # Update layout
    fig.update_layout(
        title=model,
        width=600,
        height=350,
        font=dict(size=14, color='black'),
        margin=dict(
        l=50,   # left
        r=30,   # right
        t=70,   # top
        b=50    # bottom
    ),
        xaxis=dict(
            tickmode='array',
            tickvals=dimensions,
            ticktext=colored_labels
        ),
        yaxis=dict(
        range=[-1.15, 1.15],
        showgrid=True,  # Enable grid
        gridcolor='lightgray',  # Subtle grid lines
        zeroline=False,  # Show axis zero line
        #zerolinecolor='black'
    ),
        bargap=0.3,
    )
    fig.show()
    fig.write_image(f"../img/implicit_{model}.pdf")



Aya 8B


In [16]:
dimension_dict = {'educated': 'Uneducated',
                  'urban': 'Rural',
                  'calm': 'Temper',
                  'friendly': 'Unfriendly',
                  'open_to_experience': 'Closed-Minded',
                  'conscientiousness': 'Careless'
                  }

df = pd.read_csv('../output/implicit_explicit/eval/final.csv')
df = df[df['nones'] == 0]


group_stats = df.groupby(['model_name', 'dimension'])['bias'].agg(['mean', 'std', 'count']).reset_index()

group_stats['se'] = group_stats['std'] / np.sqrt(group_stats['count'])
group_stats['t_score'] = group_stats['count'].apply(lambda n: stats.t.ppf(0.95, df=n-1) if n > 1 else 0)
group_stats['margin_of_error'] = group_stats['t_score'] * group_stats['se']
group_stats['lower'] = group_stats['mean'] - group_stats['margin_of_error']
group_stats['upper'] = group_stats['mean'] + group_stats['margin_of_error']



for model in group_stats.model_name.unique():
    colors = ['#4A68D9', '#60B177', '#F09235', '#F9D949', '#377E7F', '#BFB5D7']

    fig = go.Figure()
    print(model)

    subset = group_stats[group_stats['model_name'] == model]
    k = 0
    for i, row in subset.iterrows():
        fig.add_trace(go.Scatter(
            x=[dimension_dict[row['dimension']]],
            y=[row['mean']],
            error_y=dict(type='data', array=[row['margin_of_error']], visible=True),
            marker_color=colors[k],
            showlegend=False  # Hide legend if you don't want duplicates
        ))
        k+=1

    # Create color-coded tick labels
    dimensions = subset['dimension'].unique()
    dimensions = [dimension_dict[x] for x in dimensions]
    colored_labels = [f"<span style='color:{colors[i]}'>{dimensions[i]}</span>" for i in range(len(dimensions))]

    fig.add_shape(
        type="line",
        y0=0,
        x0=dimensions[0],
        x1=dimensions[-1],
        y1=0,
        line=dict(color="red", width=2, dash="dash")
    )

    # Update layout
    fig.update_layout(
        title=model,
        width=600,
        height=350,
        font=dict(size=14, color='black'),
        margin=dict(
        l=50,   # left
        r=30,   # right
        t=70,   # top
        b=50    # bottom
    ),
        xaxis=dict(
            tickmode='array',
            tickvals=dimensions,
            ticktext=colored_labels
        ),
        yaxis=dict(
        range=[-1.15, 1.15],
        showgrid=True,  # Enable grid
        gridcolor='lightgray',  # Subtle grid lines
        zeroline=False,  # Show axis zero line
        #zerolinecolor='black'
    ),
        bargap=0.3,
    )
    fig.show()
    fig.write_image(f"../img/explicit_{model}.pdf")


Aya 32b


In [12]:
dimension_dict = {'educated': 'Uneducated',
                  'urban': 'Rural',
                  'calm': 'Temper',
                  'friendly': 'Unfriendly',
                  'open_to_experience': 'Closed-Minded',
                  'conscientiousness': 'Careless'
                  }

df = pd.read_csv('../output/implicit_robustness_0.9/eval/final.csv')
df = df[df['nones'] == 0]


group_stats = df.groupby(['model_name', 'dimension'])['bias'].agg(['mean', 'std', 'count']).reset_index()

group_stats['se'] = group_stats['std'] / np.sqrt(group_stats['count'])
group_stats['t_score'] = group_stats['count'].apply(lambda n: stats.t.ppf(0.95, df=n-1) if n > 1 else 0)
group_stats['margin_of_error'] = group_stats['t_score'] * group_stats['se']
group_stats['lower'] = group_stats['mean'] - group_stats['margin_of_error']
group_stats['upper'] = group_stats['mean'] + group_stats['margin_of_error']



for model in group_stats.model_name.unique():
    colors = ['#4A68D9', '#60B177', '#F09235', '#F9D949', '#377E7F', '#BFB5D7']

    fig = go.Figure()
    print(model)

    subset = group_stats[group_stats['model_name'] == model]
    k = 0
    for i, row in subset.iterrows():
        fig.add_trace(go.Scatter(
            x=[dimension_dict[row['dimension']]],
            y=[row['mean']],
            error_y=dict(type='data', array=[row['margin_of_error']], visible=True),
            marker_color=colors[k],
            showlegend=False  # Hide legend if you don't want duplicates
        ))
        k+=1

    # Create color-coded tick labels
    dimensions = subset['dimension'].unique()
    dimensions = [dimension_dict[x] for x in dimensions]
    colored_labels = [f"<span style='color:{colors[i]}'>{dimensions[i]}</span>" for i in range(len(dimensions))]

    fig.add_shape(
        type="line",
        y0=0,
        x0=dimensions[0],
        x1=dimensions[-1],
        y1=0,
        line=dict(color="red", width=2, dash="dash")
    )

    # Update layout
    fig.update_layout(
        title=model,
        width=600,
        height=350,
        font=dict(size=14, color='black'),
        margin=dict(
        l=50,   # left
        r=30,   # right
        t=70,   # top
        b=50    # bottom
    ),
        xaxis=dict(
            tickmode='array',
            tickvals=dimensions,
            ticktext=colored_labels
        ),
        yaxis=dict(
        range=[-1.15, 1.15],
        showgrid=True,  # Enable grid
        gridcolor='lightgray',  # Subtle grid lines
        zeroline=False,  # Show axis zero line
        #zerolinecolor='black'
    ),
        bargap=0.3,
    )
    fig.show()
    fig.write_image(f"../img/robustness_{model}.pdf")


Llama-3.1-8B


Qwen2.5-72B


In [21]:
df['dimension_cut'] = df['dimension'].str[:-2]

In [34]:
df.model_name

0        Qwen2.5-72B
1        Qwen2.5-72B
2        Qwen2.5-72B
3        Qwen2.5-72B
4        Qwen2.5-72B
            ...     
37795         Aya 8B
37796         Aya 8B
37797         Aya 8B
37798         Aya 8B
37799         Aya 8B
Name: model_name, Length: 37800, dtype: object

In [37]:
df2 = pd.read_csv('../output/decision_extracted/Qwen2.5-7B-Instruct.csv')

In [38]:
len(df2)

6300

In [61]:
subset = df[(df['model_name'] == 'Qwen2.5-72B') & (df['nones'] == 0) & (df['dimension_cut'] == 'friendly')]

biases = subset['bias'].tolist()
t_stat, p_value = stats.ttest_1samp(biases, 0)
mean_bias = np.mean(biases)
std_bias = np.std(biases)#, ddof=1)
# Compute the 95% confidence interval
n = len(biases)
se = std_bias / np.sqrt(n)  # Standard error
confidence = 0.95
t_score = stats.t.ppf((1 + confidence) / 2, df=n-1)  # t-score for 95% confidence interval
margin_of_error = t_score * se
confidence_interval = (mean_bias - margin_of_error, mean_bias + margin_of_error)
print(confidence_interval)
print(mean_bias)
print(std_bias)
print(se)
print(t_score)



(np.float64(0.43671987404532103), np.float64(0.5423277450022981))
0.4895238095238095
0.8719899310825195
0.026910193499849286
1.9622280114330741


In [58]:
group_stats[group_stats['model_name'] == 'Qwen2.5-72B']

Unnamed: 0,model_name,dimension_cut,mean,std,count,se,t_score,margin_of_error,lower,upper
24,Qwen2.5-72B,calm,0.387035,0.921987,1049,0.028467,1.646309,0.046865,0.34017,0.4339
25,Qwen2.5-72B,conscientiousness,0.624762,0.748788,1050,0.023108,1.646308,0.038043,0.586719,0.662805
26,Qwen2.5-72B,educated,0.704918,0.652959,1037,0.020277,1.646326,0.033382,0.671536,0.7383
27,Qwen2.5-72B,friendly,0.489524,0.872405,1050,0.026923,1.646308,0.044324,0.4452,0.533847
28,Qwen2.5-72B,open_to_experience,0.266667,0.963259,1050,0.029727,1.646308,0.048939,0.217727,0.315606
29,Qwen2.5-72B,urban,0.613333,0.788993,1050,0.024349,1.646308,0.040086,0.573248,0.653419


In [13]:
import scipy.stats as stats

dimension_dict = {'educated': 'Uneducated',
                  'urban': 'Rural',
                  'calm': 'Temper',
                  'friendly': 'Unfriendly',
                  'open_to_experience': 'Closed-Minded',
                  'conscientiousness': 'Careless'
                  }

df = pd.read_csv('../output/decision_extracted/eval/final.csv')
df['dimension_cut'] = df['dimension'].str[:-2]
df = df[df['nones'] == 0]


group_stats = df.groupby(['model_name', 'dimension_cut'])['bias'].agg(['mean', 'std', 'count']).reset_index()

group_stats['se'] = group_stats['std'] / np.sqrt(group_stats['count'])
group_stats['t_score'] = group_stats['count'].apply(lambda n: stats.t.ppf(0.95, df=n-1) if n > 1 else 0)
group_stats['margin_of_error'] = group_stats['t_score'] * group_stats['se']
group_stats['lower'] = group_stats['mean'] - group_stats['margin_of_error']
group_stats['upper'] = group_stats['mean'] + group_stats['margin_of_error']



for model in group_stats.model_name.unique():
    colors = ['#4A68D9', '#60B177', '#F09235', '#F9D949', '#377E7F', '#BFB5D7']

    fig = go.Figure()
    print(model)

    subset = group_stats[group_stats['model_name'] == model]
    k = 0
    for i, row in subset.iterrows():
        fig.add_trace(go.Scatter(
            x=[dimension_dict[row['dimension_cut']]],
            y=[row['mean']],
            error_y=dict(type='data', array=[row['margin_of_error']], visible=True),
            marker_color=colors[k],
            showlegend=False  # Hide legend if you don't want duplicates
        ))
        k+=1

    # Create color-coded tick labels
    dimensions = subset['dimension_cut'].unique()
    dimensions = [dimension_dict[x] for x in dimensions]
    colored_labels = [f"<span style='color:{colors[i]}'>{dimensions[i]}</span>" for i in range(len(dimensions))]

    fig.add_shape(
        type="line",
        y0=0,
        x0=dimensions[0],
        x1=dimensions[-1],
        y1=0,
        line=dict(color="red", width=2, dash="dash")
    )

    # Update layout
    fig.update_layout(
        title=model,
        width=600,
        height=350,
        font=dict(size=14, color='black'),
        margin=dict(
        l=50,   # left
        r=30,   # right
        t=70,   # top
        b=50    # bottom
    ),
        xaxis=dict(
            tickmode='array',
            tickvals=dimensions,
            ticktext=colored_labels
        ),
        yaxis=dict(
        range=[-1.15, 1.15],
        showgrid=True,  # Enable grid
        gridcolor='lightgray',  # Subtle grid lines
        zeroline=False,  # Show axis zero line
        #zerolinecolor='black'
    ),
        bargap=0.3,
    )
    fig.show()
    fig.write_image(f"../img/decision_{model}.pdf")


Aya 32b


Aya 8B


Gemma-3 12B


Llama-3.1-8B


Qwen2.5-72B


Qwen2.5-7B


## Plot marked personas results

In [12]:
df = pd.read_csv('../output/decision_stories/eval/results.csv')

In [13]:
df['dimension'] = df['Task'].str[:-2]

In [14]:
group = df.groupby(['Target Group','Model', 'dimension'])['Word'].count()

In [15]:
grouped = df.groupby(['Target Group','Model', 'dimension', 'Word']).agg(
    Count=("Word", "count"),
    Total_Value=("Value", "sum")
).reset_index()

In [33]:
df.dimension.unique()

array(['urban', 'open_to_experience', 'friendly', 'educated',
       'conscientiousness', 'calm'], dtype=object)

In [40]:
for model in df['Model'].unique():
    for dimension in ['educated']:
        subset = grouped[(grouped['Model'] == model) & (grouped['dimension'] == dimension)]
        subset = subset[subset['Count'] > 1]
        subset['Total_Value'] = subset['Total_Value'].round(2)

        # Create traces
        fig = go.Figure()

        # Group 1 → bars to the right
        group1 = subset[subset["Target Group"] == 'standard']
        group1 = group1.sort_values(by='Total_Value', ascending=False)
        fig.add_trace(go.Bar(
            y=group1["Word"],
            x=-group1["Total_Value"],
            name="Standard",
            orientation='h',
            marker=dict(
                color='rgba(173, 216, 230, 0.8)',  # pastel blue with opacity
                line=dict(color='rgba(0, 0, 139, 0.6)', width=1.5),
            ),
            text=group1["Total_Value"],
            textposition='auto'
        ))

        # Group 2 → bars to the left (negated values)
        group2 = subset[subset["Target Group"] == 'dialect']
        group2 = group2.sort_values(by='Total_Value')
        fig.add_trace(go.Bar(
            y=group2["Word"],
            x=group2["Total_Value"],  # Negative for left direction
            name="Dialect",
            orientation='h',
            marker=dict(
                color='rgba(255, 182, 193, 0.8)',  # pastel red with opacity
                line=dict(color='rgba(139, 0, 0, 0.6)', width=1.5),
            ),
            text=group2["Total_Value"],  # Still show positive value
            textposition='auto'
        ))

        # Layout tweaks
        fig.update_layout(
            title=model,
            width=400,
            height=550,
            barmode='relative',
            barcornerradius=5,
            xaxis=dict(
                zeroline=True,
                zerolinewidth=2,
                zerolinecolor='gray'
            ),
            legend=dict(
                orientation="h",     # Horizontal legend
                yanchor="bottom",
                y=-0.2,              # Push legend below the plot
                xanchor="center",
                x=0.5
            ),
            margin=dict(l=100, r=10, t=40, b=20),
        )

        fig.show()

In [30]:
df.head()

Unnamed: 0,Target Group,Model,Task,Word,Value,Word+Value,dimension
0,standard,gemma-3-12b-it,urban-3,concise,5.55,"concise (5.55),",urban
1,standard,gemma-3-12b-it,urban-3,technical,5.11,"technical (5.11),",urban
2,standard,gemma-3-12b-it,urban-3,standard,4.86,"standard (4.86),",urban
3,standard,gemma-3-12b-it,urban-3,suitable,4.57,"suitable (4.57),",urban
4,standard,gemma-3-12b-it,urban-3,clear,4.48,"clear (4.48),",urban


In [31]:
for model in df['Model'].unique():
    for task in ['calm-1']:
        subset = df[(df['Model'] == model) & (df['Task'] == task)]
        #subset = subset[subset['Count'] > 1]

        # Create traces
        fig = go.Figure()

        # Group 1 → bars to the right
        group1 = subset[subset["Target Group"] == 'standard']
        group1 = group1.sort_values(by='Value', ascending=False)
        fig.add_trace(go.Bar(
            y=group1["Word"],
            x=-group1["Value"],
            name="Standard",
            orientation='h',
            marker=dict(
                color='rgba(173, 216, 230, 0.8)',  # pastel blue with opacity
                line=dict(color='rgba(0, 0, 139, 0.6)', width=1.5),
            ),
            text=group1["Value"],
            textposition='auto'
        ))

        # Group 2 → bars to the left (negated values)
        group2 = subset[subset["Target Group"] == 'dialect']
        group2 = group2.sort_values(by='Value')
        fig.add_trace(go.Bar(
            y=group2["Word"],
            x=group2["Value"],  # Negative for left direction
            name="Dialect",
            orientation='h',
            marker=dict(
                color='rgba(255, 182, 193, 0.8)',  # pastel red with opacity
                line=dict(color='rgba(139, 0, 0, 0.6)', width=1.5),
            ),
            text=group2["Value"],  # Still show positive value
            textposition='auto'
        ))

        # Layout tweaks
        fig.update_layout(
            title=model,
            width=400,
            height=550,
            barmode='relative',
            barcornerradius=5,
            xaxis=dict(
                zeroline=True,
                zerolinewidth=2,
                zerolinecolor='gray'
            )
        )

        fig.show()