In [49]:
#imports
import pandas as pd

In [50]:
# Read the CSV file into a DataFrame
df = pd.read_csv('./output.csv')

# Create additional columns
df['diff_2c'] = df['score_granular_2c'] - df['score_binary_2c']
df['diff_3c'] = df['score_granular_3c'] - df['score_binary_3c']

# Define a function to calculate the absolute difference range
def get_diff_abs(diff):
    if diff == 0:
        return "0"
    elif 0 < diff <= 100:
        return "1to100"
    elif 100 < diff <= 500:
        return "101to500"
    elif 500 < diff <= 1000:
        return "501to1000"
    elif 1000 < diff <= 2000:
        return "1001to2000"
    elif diff > 2000:
        return "greaterThan2000"
    else:
        return "invalid"

# Calculate absolute difference ranges
df['diff_2c_abs'] = df['diff_2c'].apply(get_diff_abs)
df['diff_3c_abs'] = df['diff_3c'].apply(get_diff_abs)

# Calculate the relative difference
df['diff_2c_rel'] = round(df['diff_2c']/df['score_binary_2c'],3)
df['diff_3c_rel'] = round(df['diff_3c']/df['score_binary_3c'],3)


# Display the DataFrame (which will be rendered as a table)
df.head()

Unnamed: 0,u_cost_rel,u_consent_rel,u_content_rel,s_cost_rel,s_consent_rel,s_content_rel,score_binary_2c,score_binary_3c,score_granular_2c,score_granular_3c,diff_2c,diff_3c,diff_2c_abs,diff_3c_abs,diff_2c_rel,diff_3c_rel
0,0.0,0.1,0.9,0.0,0.1,0.9,9000,9000,9025,9025,25,25,1to100,1to100,0.003,0.003
1,0.0,0.1,0.9,0.0,0.2,0.8,9000,9000,9000,9000,0,0,0,0,0.0,0.0
2,0.0,0.1,0.9,0.0,0.3,0.7,9000,9000,9000,9000,0,0,0,0,0.0,0.0
3,0.0,0.1,0.9,0.0,0.4,0.6,9000,9000,9000,9000,0,0,0,0,0.0,0.0
4,0.0,0.1,0.9,0.0,0.5,0.5,9000,9000,9000,9000,0,0,0,0,0.0,0.0


In [51]:
# distribution of absolute differences
# 2C
print('How many points are gained in 2C negotiations?')
print(df['diff_2c_abs'].value_counts())

# 3C
print('\nHow many points are gained in 3C negotiations?')
print(df['diff_3c_abs'].value_counts())

How many points are gained in 2C negotiations?
0                  1171
1to100              236
101to500            200
501to1000            60
1001to2000           13
greaterThan2000       1
Name: diff_2c_abs, dtype: int64

How many points are gained in 3C negotiations?
0                  1182
1to100              241
101to500            198
501to1000            46
1001to2000           13
greaterThan2000       1
Name: diff_3c_abs, dtype: int64


In [52]:
print("Statistics for increases of creating more granular consent and content options:")

# Print nicely formatted output for 'diff_2c_rel' column
print("\nIn 2C")
print(f"Mean: {df['diff_2c_rel'].mean():.3f}")
print(f"Standard Deviation: {df['diff_2c_rel'].std():.3f}")
print(f"Minimum: {df['diff_2c_rel'].min():.3f}")
print(f"25th Percentile: {df['diff_2c_rel'].quantile(0.25):.3f}")
print(f"Median: {df['diff_2c_rel'].median():.3f}")
print(f"75th Percentile: {df['diff_2c_rel'].quantile(0.75):.3f}")
print(f"Maximum: {df['diff_2c_rel'].max():.3f}")

# Print nicely formatted output for 'diff_3c_rel' column
print("\nIn 3C:")
print(f"Mean: {df['diff_3c_rel'].mean():.3f}")
print(f"Standard Deviation: {df['diff_3c_rel'].std():.3f}")
print(f"Minimum: {df['diff_3c_rel'].min():.3f}")
print(f"25th Percentile: {df['diff_3c_rel'].quantile(0.25):.3f}")
print(f"Median: {df['diff_3c_rel'].median():.3f}")
print(f"75th Percentile: {df['diff_3c_rel'].quantile(0.75):.3f}")
print(f"Maximum: {df['diff_3c_rel'].max():.3f}")

Statistics for increases of creating more granular consent and content options:

In 2C
Mean: 0.028
Standard Deviation: 0.109
Minimum: 0.000
25th Percentile: 0.000
Median: 0.000
75th Percentile: 0.006
Maximum: 2.025

In 3C:
Mean: 0.019
Standard Deviation: 0.083
Minimum: 0.000
25th Percentile: 0.000
Median: 0.000
75th Percentile: 0.003
Maximum: 2.025


In [53]:
# Create columns to analyze combinations of issue relevancies
df['u_issue_rel'] = df.apply(lambda row: f"{row['u_cost_rel']}-{row['u_consent_rel']}-{row['u_content_rel']}", axis=1)
df['s_issue_rel'] = df.apply(lambda row: f"{row['s_cost_rel']}-{row['s_consent_rel']}-{row['s_content_rel']}", axis=1)

# Print head for verification
df.head()

Unnamed: 0,u_cost_rel,u_consent_rel,u_content_rel,s_cost_rel,s_consent_rel,s_content_rel,score_binary_2c,score_binary_3c,score_granular_2c,score_granular_3c,diff_2c,diff_3c,diff_2c_abs,diff_3c_abs,diff_2c_rel,diff_3c_rel,u_issue_rel,s_issue_rel
0,0.0,0.1,0.9,0.0,0.1,0.9,9000,9000,9025,9025,25,25,1to100,1to100,0.003,0.003,0.0-0.1-0.9,0.0-0.1-0.9
1,0.0,0.1,0.9,0.0,0.2,0.8,9000,9000,9000,9000,0,0,0,0,0.0,0.0,0.0-0.1-0.9,0.0-0.2-0.8
2,0.0,0.1,0.9,0.0,0.3,0.7,9000,9000,9000,9000,0,0,0,0,0.0,0.0,0.0-0.1-0.9,0.0-0.3-0.7
3,0.0,0.1,0.9,0.0,0.4,0.6,9000,9000,9000,9000,0,0,0,0,0.0,0.0,0.0-0.1-0.9,0.0-0.4-0.6
4,0.0,0.1,0.9,0.0,0.5,0.5,9000,9000,9000,9000,0,0,0,0,0.0,0.0,0.0-0.1-0.9,0.0-0.5-0.5


In [54]:
# user and site give relevancies to issues (3C). 
# Which are the combinations leading to the highest gains of granular negotiation?
filtered_2c_df = df[df['diff_2c_rel']>0.2]
filtered_3c_df = df[df['diff_3c_rel']>0.2]

print('most common relevancies by user')
print('\n2C')
print(filtered_2c_df['u_issue_rel'].value_counts().head(), '\n')
print('3C')
print(filtered_3c_df['u_issue_rel'].value_counts().head(), '\n')

print('most common relevancies by site')
print('\n2C')
print(filtered_2c_df['s_issue_rel'].value_counts().head(), '\n')
print('3C')
print(filtered_3c_df['s_issue_rel'].value_counts().head(), '\n')


most common relevancies by user

2C
0.0-0.7-0.3    11
0.0-0.8-0.2    11
0.1-0.7-0.2    11
0.1-0.8-0.1    11
0.0-0.9-0.1     9
Name: u_issue_rel, dtype: int64 

3C
0.0-0.7-0.3    5
0.0-0.8-0.2    5
0.0-0.9-0.1    5
0.1-0.7-0.2    5
0.1-0.8-0.1    5
Name: u_issue_rel, dtype: int64 

most common relevancies by site

2C
0.0-0.7-0.3    5
0.0-0.8-0.2    5
0.0-0.9-0.1    5
0.1-0.7-0.2    5
0.1-0.8-0.1    5
Name: s_issue_rel, dtype: int64 

3C
0.0-0.7-0.3    6
0.0-0.8-0.2    5
0.0-0.9-0.1    5
0.1-0.7-0.2    5
0.1-0.8-0.1    5
Name: s_issue_rel, dtype: int64 



In [55]:
# user and site give relevancies to issues (3C). 
# Which are the combinations leading to the lowest gains (=0) of granular negotiation?
filtered_2c_df = df[df['diff_2c_rel']==0]
filtered_3c_df = df[df['diff_3c_rel']==0]

print('most common relevancies by user')
print('\n2C')
print(filtered_2c_df['u_issue_rel'].value_counts().head(), '\n')
print('3C')
print(filtered_3c_df['u_issue_rel'].value_counts().head(), '\n')

print('most common relevancies by site')
print('\n2C')
print(filtered_2c_df['s_issue_rel'].value_counts().head(), '\n')
print('3C')
print(filtered_3c_df['s_issue_rel'].value_counts().head(), '\n')


most common relevancies by user

2C
0.0-0.1-0.9    40
0.1-0.1-0.8    40
0.7-0.1-0.2    40
0.6-0.1-0.3    40
0.5-0.1-0.4    40
Name: u_issue_rel, dtype: int64 

3C
0.8-0.1-0.1    38
0.6-0.1-0.3    38
0.5-0.1-0.4    38
0.1-0.1-0.8    38
0.7-0.1-0.2    36
Name: u_issue_rel, dtype: int64 

most common relevancies by site

2C
0.2-0.1-0.7    41
0.4-0.1-0.5    41
0.3-0.1-0.6    41
0.1-0.1-0.8    41
0.6-0.1-0.3    35
Name: s_issue_rel, dtype: int64 

3C
0.8-0.1-0.1    38
0.7-0.1-0.2    37
0.6-0.1-0.3    37
0.0-0.1-0.9    37
0.1-0.1-0.8    36
Name: s_issue_rel, dtype: int64 

