In [1]:
import pandas as pd

In [2]:
data_path = "/data_nas/cehou/LLM_safety/PlacePulse2.0/metadata/final_data.csv"
data = pd.read_csv(data_path)

In [None]:
import numpy as np

# Define the function to calculate P_i and N_i for each image
def calculate_P_N(pi, ni, ei):
    """
    Calculate the positive (P) and negative (N) rates for image i
    :param pi: Number of times image i was selected
    :param ni: Number of times image i was not selected
    :param ei: Number of times image i was equal to another image in comparison
    :return: P_i, N_i
    """
    P_i = pi / (pi + ei + ni)
    N_i = ni / (pi + ei + ni)
    return P_i, N_i

# Define the function to calculate Q_i for each image
def calculate_Q_i(pi, ni, ei, p_k, n_k, nk1, nk2):
    """
    Calculate the Q-score for image i
    :param pi: Number of times image i was selected
    :param ni: Number of times image i was not selected
    :param ei: Number of times image i was equal to another image in comparison
    :param p_k: Sum of P_k1 for images selected in comparisons (array or list)
    :param n_k: Sum of N_k2 for images not selected in comparisons (array or list)
    :param nk1: Number of k1 comparisons (for selected)
    :param nk2: Number of k2 comparisons (for not selected)
    :return: Q_i
    """
    P_i, N_i = calculate_P_N(pi, ni, ei)
    Q_i = (10 / 3) * (P_i + (1 / pi) * np.sum(p_k) - (1 / ni) * np.sum(n_k) + 1)
    return Q_i

# Example usage
# Assume some example data for pi, ni, ei, p_k, n_k, nk1, nk2
pi = 5   # Example: selected 5 times
ni = 3   # Example: not selected 3 times
ei = 2   # Example: equal in 2 comparisons
p_k = [0.4, 0.5, 0.6]  # Example: positive rates of other images
n_k = [0.3, 0.2, 0.4]  # Example: negative rates of other images
nk1 = len(p_k)
nk2 = len(n_k)

# Calculate Q_i for this example
Q_i = calculate_Q_i(pi, ni, ei, p_k, n_k, nk1, nk2)
Q_i


In [42]:
select_id = "51413b2ffdc9f049260058ca"
data_test = data[(data['left_id'] == select_id) | (data['right_id'] == select_id)]

data_test_group = data_test.groupby('category')
test_group = data_test_group.get_group('safety')

In [44]:
for i,line in test_group.iterrows():
    if line['winner'] == 'left':
        test_group.loc[i, "winner_id"] = line["left_id"]
    elif line['winner'] == 'right':
        test_group.loc[i, "winner_id"] = line["right_id"]
    else:
        test_group.loc[i, "winner_id"] = "equal"

pi = len(test_group[test_group['winner_id'] == select_id])
ei = len(test_group[test_group['winner_id'] == "equal"])
ni = len(test_group) - pi - ei

0 3 1
