In [None]:
import numpy as np
import pandas as pd
from statsmodels.formula.api import wls
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.api.types import CategoricalDtype

### Functions

In [None]:
def preprocess_profiles(profiles):
    profiles['Saved'] = profiles['Saved'].astype(float)
    profiles['ScenarioType'] = profiles['ScenarioType'].astype('category')
    profiles['AttributeLevel'] = pd.Categorical(profiles['AttributeLevel'], categories=["Rand", "Male", "Female", "Fat", "Fit", "Low", "High", "Old", "Young", "Less", "More", "Pets", "Hoomans"], ordered=True)
    profiles['Barrier'] = pd.Categorical(profiles['Barrier'], categories=[1, 0], ordered=True)
    profiles['CrossingSignal'] = pd.Categorical(profiles['CrossingSignal'], categories=[0, 2, 1], ordered=True)
    profiles['ScenarioType'] = profiles['ScenarioType'].astype('category')
    profiles['ScenarioTypeStrict'] = profiles['ScenarioTypeStrict'].astype('category')
    return profiles

In [None]:
def calc_weights_actual(Tr, X):
    T10 = np.where(Tr == np.unique(Tr)[1], 1, 0)
    d = np.array([len(X[X == x_i]) for x_i in np.unique(X)])
    w = np.max(d) / d3N
    return w

In [None]:
def calcWeightsTheoretical(profiles):
    # print(profiles)
    # p = np.apply_along_axis(CalcTheoreticalInt, axis=1, arr=profiles)
    p = profiles.apply(CalcTheoreticalInt, axis=1)
    return 1/p

def CalcTheoreticalInt(X):
    # print(X)
    # print(type(X))
    if X["Intervention"] == 0:
        if X["Barrier"] == 0:
            if X["PedPed"] == 1:
                p = 0.48
            else:
                p = 0.32

            if X["CrossingSignal"] == 0:
                p *= 0.48
            elif X["CrossingSignal"] == 1:
                p *= 0.2
            else:
                p *= 0.32
        else:
            p = 0.2
    else:
        if X["Barrier"] == 0:
            if X["PedPed"] == 1:
                p = 0.48
                if X["CrossingSignal"] == 0:
                    p *= 0.48
                elif X["CrossingSignal"] == 1:
                    p *= 0.32
                else:
                    p *= 0.2
            else:
                p = 0.2
                if X["CrossingSignal"] == 0:
                    p *= 0.48
                elif X["CrossingSignal"] == 1:
                    p *= 0.2
                else:
                    p *= 0.32
        else:
            p = 0.32
    return p

In [None]:
def GetMainEffectSizes(profiles, savedata, r):
    Coeffs = np.full((r, 2), np.nan)
    AttLevels = list(profiles['AttributeLevel'].unique().categories)
    lev = profiles['ScenarioType'].unique()
    if lev[0] == "":
        lev = lev[1:8]
    lev = ["Gender", "Fitness", "Social Value", "Age", "Utilitarian", "Species"]

    # For intervention
    profiles['BC.weights'] = calcWeightsTheoretical(profiles)
    lm_Int = wls('Saved ~ C(Intervention)', data=profiles, weights=profiles['BC.weights']).fit()
    Coeffs[0, 0] = lm_Int.params.iloc[1]
    Coeffs[0, 1] = lm_Int.bse.iloc[1]

    # # For relationship to vehicle
    profile_Relation = profiles[(profiles['CrossingSignal'] == 0) & (profiles['PedPed'] == 0)].copy()
    profile_Relation['BC.weights'] = calcWeightsTheoretical(profile_Relation)
    lm_Rel = wls('Saved ~ C(Barrier)', data=profile_Relation, weights=profile_Relation['BC.weights']).fit()
    Coeffs[1, 0] = lm_Rel.params.iloc[1]
    Coeffs[1, 1] = lm_Rel.bse.iloc[1]

    # # Legality
    profile_Legality = profiles[(profiles['CrossingSignal'] != 0) & (profiles['PedPed'] == 1)].copy()
    profile_Legality['CrossingSignal'] = profile_Legality['CrossingSignal'].astype(CategoricalDtype(categories=[1, 2]))#list(profile_Legality['CrossingSignal'].unique())))
    profile_Legality['BC.weights'] = calcWeightsTheoretical(profile_Legality)
    lm_Leg = wls('Saved ~ C(CrossingSignal)', data=profile_Legality, weights=profile_Legality['BC.weights']).fit()
    Coeffs[2, 0] = lm_Leg.params.iloc[1]
    Coeffs[2, 1] = lm_Leg.bse.iloc[1]

    # Six factors
    for i in range(6):
        Temp = profiles[(profiles['ScenarioType'] == lev[i]) & (profiles['ScenarioTypeStrict'] == lev[i])].copy()
        # Temp['AttributeLevel'] = Temp['AttributeLevel'].astype('category', categories=AttLevels[(i*2):(i*2+1)])

        Temp['AttributeLevel'] = Temp['AttributeLevel'].astype(CategoricalDtype(categories=list(AttLevels[(1+i*2):(1+(i+1)*2)])))
        Temp['BC.weights'] = calcWeightsTheoretical(Temp)
        lm_Temp = wls('Saved ~ C(AttributeLevel)', data=Temp, weights=Temp['BC.weights']).fit()
        Coeffs[i+3, 0] = lm_Temp.params.iloc[1]
        Coeffs[i+3, 1] = lm_Temp.bse.iloc[1]

    #     if savedata:
    #         var_name = f"profile.{lev[i].replace(' ', '')}"
    #         exec(f"{var_name} = Temp")

    return Coeffs

In [None]:
def GetPlotData(Coeffs, isMainFig, r):
    # Convert to dataframe and add labels
    plotdata = pd.DataFrame(Coeffs, columns=["Estimates", "se"])
    plotdata["Label"] = [
        "Preference for action -> \n Preference for inaction",
        "Sparing Passengers -> \n Sparing Pedestrians",
        "Sparing the Unlawful -> \n Sparing the Lawful",
        "Sparing Males -> \n Sparing Females",
        "Sparing the Large -> \n Sparing the Fit",
        "Sparing Lower Status -> \n Sparing Higher Status",
        "Sparing the Elderly -> \n Sparing the Young",
        "Sparing Fewer Characters -> \n Sparing More Characters",
        "Sparing Pets -> \n Sparing Humans"
    ]

    if isMainFig:
        plotdata["Label"] = [
            "Intervention",
            "Relation to AV",
            "Law",
            "Gender",
            "Fitness",
            "Social Status",
            "Age",
            "No. Characters",
            "Species"
        ]

    sorted_labels = plotdata["Label"].iloc[:r][plotdata["Estimates"].iloc[:r].argsort()].tolist()

    plotdata["Label"] = pd.Categorical(plotdata["Label"], categories=sorted_labels, ordered=True)
    plotdata["Label"] = plotdata["Label"].cat.reorder_categories(sorted_labels[::-1], ordered=True)

    plotdata["Estimates"] = pd.to_numeric(plotdata["Estimates"], errors='coerce')
    plotdata["se"] = pd.to_numeric(plotdata["se"], errors='coerce')

    return plotdata

In [None]:
def GetMainEffectSizes_Util(profiles):
    Coeffs = np.full((4, 2), np.nan)
    AttLevels = list(profiles['AttributeLevel'].astype('category').cat.categories)
    for i in range(1, 5):
        Temp = profiles[(profiles['ScenarioType'] == "Utilitarian") &
                        (profiles['ScenarioTypeStrict'] == "Utilitarian") &
                        (profiles['DiffNumberOFCharacters'] == i)].copy()
        Temp['AttributeLevel'] = pd.Categorical(Temp['AttributeLevel'], categories=AttLevels[9:11])
        Temp['BC_weights'] = calcWeightsTheoretical(Temp)
        if len(Temp):
          model = wls('Saved ~ C(AttributeLevel)', data=Temp, weights=Temp['BC.weights']).fit()
          Coeffs[i-1, 0] = model.params.iloc[1]
          Coeffs[i-1, 1] = model.bse.iloc[1]
        else:
          Coeffs[i-1, 0] = 0
          Coeffs[i-1, 1] = 0
    return Coeffs



In [None]:
def GetPlotData_Util(Coeffs):
    plotdata = pd.DataFrame(Coeffs, columns=["Estimates", "se"])
    plotdata['Variant'] = pd.Categorical(range(1, 5), ordered=True)
    plotdata['Label'] = pd.Categorical(["No. Characters"] * 4)
    return plotdata

In [None]:
def PlotAndSave(plotdata_main, isMainFig, filename, plotdata_util, title=None):
    plotdata_main_human = pd.DataFrame({
        'Estimates': ESTIMATES,
        'Label': LABELS
    })

    plotdata_bars = plotdata_main[plotdata_main['Label'] != "No. Characters"].copy()
    plotdata_points = plotdata_main[plotdata_main['Label'] == "No. Characters"].copy()
    plt.figure(figsize=(9, 6))
    sns.barplot(data=plotdata_bars, x='Estimates', y='Label', color='gray', edgecolor='black', order=plotdata_bars['Label'])
    plt.errorbar(plotdata_bars['Estimates'], plotdata_bars['Label'],
                 xerr=plotdata_bars['se'], fmt='none', c='black', capsize=2)

    max_util = plotdata_util.loc[plotdata_util['Estimates'].abs().idxmax()]
    sns.barplot(data=plotdata_util[plotdata_util['Estimates'] == max_util['Estimates']],
                x='Estimates', y='Label', color='gray', edgecolor='black')
    plt.errorbar(plotdata_points['Estimates'], plotdata_points['Label'],
                 xerr=plotdata_points['se'], fmt='none', c='black', capsize=2)
    plt.scatter(plotdata_points['Estimates'], plotdata_points['Label'], color='black', s=200)
    # plt.errorbar(plotdata_util['Estimates'], plotdata_util['Label'],
    #              xerr=plotdata_util['se'], fmt='none', c='black', capsize=2)
    plt.scatter(plotdata_util['Estimates'], plotdata_util['Label'], color='black', s=200, facecolors='white')
    for i, row in plotdata_util.iterrows():
        plt.text(row['Estimates'], row['Label'], str(row['Variant']),
                 ha='center', va='center', color='black', size=10)

    plt.axvline(x=0, color='black', linewidth=0.4)
    plt.scatter(plotdata_main_human['Estimates'], plotdata_main_human['Label'], color='red', s=100, marker='|')
    plt.xlim(-1.2, 1.2)
    plt.xlabel("\nΔP")
    plt.yticks(rotation=45)
    plt.ylabel("")
    plt.gca().invert_yaxis()

    annotations = {
        "Intervention": ("Action", "Inaction"),
        "Relation to AV": ("Passengers", "Pedestrians"),
        "Law": ("Unlawful", "Lawful"),
        "Gender": ("Males", "Females"),
        "Fitness": ("Large", "Fit"),
        "Social Status": ("Low status", "High status"),
        "Age": ("Old", "Young"),
        "No. Characters": ("Few", "More"),
        "Species": ("Pets", "Humans")
    }

    for label, (left, right) in annotations.items():
        plt.annotate(left, xy=(-1.18, label), ha='left', va='center', color='black', size=12)
        plt.annotate(right, xy=(1.18, label), ha='right', va='center', color='black', size=12)

    plt.tight_layout()
    plt.savefig(f"{filename}.png")
    if title:
      plt.title(title)
    plt.show()

### Real Code

In [None]:
profiles = pd.read_csv("eng_gigachat_shared_responses_1.5k.csv")
profiles = preprocess_profiles(profiles)

In [None]:
estimates_dict = {
    "eng": [0.061, 0.097, 0.353, 0.119, 0.160, 0.345, 0.497, 0.651, 0.585],
    "rus": [0.068, 0.045, 0.385, 0.037, 0.156, 0.340, 0.449, 0.455, 0.606],
}

ESTIMATES = estimates_dict["eng"]
LABELS = ["Intervention", "Relation to AV", "Law", "Gender", "Fitness", "Social Status", "Age", "No. Characters", "Species"]

In [None]:
Coeffs_main = GetMainEffectSizes(profiles, True, 9)
plotdata_main = GetPlotData(Coeffs_main, True, 9)
plotdata_main

Unnamed: 0,Estimates,se,Label
0,-0.095816,0.003097,Intervention
1,-0.650319,0.003932,Relation to AV
2,0.004492,0.006332,Law
3,0.350938,0.007053,Gender
4,-0.297202,0.007266,Fitness
5,-0.317237,0.007263,Social Status
6,0.290208,0.007345,Age
7,-0.393917,0.006881,No. Characters
8,0.376322,0.006997,Species


In [None]:
Coeffs_util = GetMainEffectSizes_Util(profiles)
plotdata_util = GetPlotData_Util(Coeffs_util)

In [None]:
PlotAndSave(plotdata_main, True, "MainChangePr", plotdata_util, title="Phi3.1 Ru-Eng difference")