### use the stimuli of study 2

In [328]:
import pandas as pd
dat = pd.read_csv('final_data_2.csv')
df = pd.DataFrame(dat)



In [None]:

filtered_df = df[(df['test_part'].isin(['cs'])) & (df['skew'].isin(['lr', 'rl', 'ns'])) & (df['Prolific_ID'].isin(['5638e8a444e8c8000ee86a35']))]

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    # Mean (expected value)
    mu = np.sum(outcomes * probabilities)
    
    # Variance
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    
    # Third Central Moment
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    
    # Skewness
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared != 0 else 0  # To handle division by zero
    
    return skewness


selected_columns = ['skew', 'P_A1', 'O_A1', 'P_A2', 'O_A2', 'P_B1', 'O_B1', 'P_B2', 'O_B2']
filtered_df = filtered_df[selected_columns]



final_df = filtered_df.assign(
    EVA=lambda x: x['P_A1'] * x['O_A1'] + x['P_A2'] * x['O_A2'],
    EVB=lambda x: x['P_B1'] * x['O_B1'] + x['P_B2'] * x['O_B2'],
    EVD=lambda x: x['EVA'] - x['EVB'],
    SDA=lambda x: np.sqrt(x['P_A1'] * (x['O_A1'] - x['EVA'])**2 + x['P_A2'] * (x['O_A2'] - x['EVA'])**2),
    SDB=lambda x: np.sqrt(x['P_B1'] * (x['O_B1'] - x['EVB'])**2 + x['P_B2'] * (x['O_B2'] - x['EVB'])**2),
    SDD=lambda x: x['SDA'] - x['SDB'],
    skewness_a=lambda x: x.apply(lambda row: calculate_skewness(
        np.array([row['P_A1'], row['P_A2']]), 
        np.array([row['O_A1'], row['O_A2']])
    ), axis=1),
    skewness_b=lambda x: x.apply(lambda row: calculate_skewness(
        np.array([row['P_B1'], row['P_B2']]), 
        np.array([row['O_B1'], row['O_B2']])
    ), axis=1),
    skewness_diff=lambda x: x['skewness_a'] - x['skewness_b']
)



# Bin the 'EVD' column
final_df['evd_bins'] = pd.cut(
    final_df['EVD'], 
    bins=[-float('inf'), -15, -9, 1, 11, 21], 
    labels=["-21 to -19", "-11 to -9", "-1 to 1", "9 to 11", "19 to 21"],
    right=True,
    include_lowest=True
)

# Bin the 'SDD' column
final_df['sdd_bins'] = pd.cut(
    final_df['SDD'], 
    bins=[-float('inf'), 8, 13, 18], 
    labels=["4 to 6", "9 to 11", "14 to 16"],
    right=True,
    include_lowest=True
)

# Sort the DataFrame by 'evd_bins' and then 'sdd_bins'
final_df = final_df.sort_values(by=['evd_bins', 'sdd_bins'])




# Reset the index to remove the existing one and create a new sequential index
final_df = final_df.reset_index(drop=True)

# Add a new 'index' column starting from 1 to 45
final_df['index'] = range(1, 46)

# Move the 'index' column to the beginning of the DataFrame
final_df = final_df[['index'] + [col for col in final_df.columns if col != 'index']]

final_df_new = final_df


In [235]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 100  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 50  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n

    # List to store acceptable results
    acceptable_results = []

    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV, SD, and skewness are within acceptable limits
        if (
            abs(final_ev - original_ev) < 1.5 and
            abs(final_sd - original_sd) < 1.5 and
            abs(final_skew - original_skew) < 2  # Skewness difference constraint
        ):
            skewness_difference = abs(final_skew - original_skew)
            # Store the acceptable results
            acceptable_results.append({
                'skewness_difference': skewness_difference,
                'outcomes': optimized_outcomes.copy(),
                'probabilities': fixed_probs.copy(),
                'final_ev': final_ev,
                'final_sd': final_sd,
                'final_skew': final_skew
            })

    # After all iterations, return the list of acceptable results
    return acceptable_results



# Initialize an empty list to store new rows
new_rows = []

# Loop over each row in the DataFrame
for idx, row in final_df_new.iterrows():
    # For A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])
    acceptable_results_A = optimize_lottery(probs_A, outcomes_A)

    # For B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])
    acceptable_results_B = optimize_lottery(probs_B, outcomes_B)

    # If no acceptable results, set to None
    if not acceptable_results_A:
        acceptable_results_A = [None]
    if not acceptable_results_B:
        acceptable_results_B = [None]

    # For each combination of acceptable_results_A and acceptable_results_B
    for result_A in acceptable_results_A:
        for result_B in acceptable_results_B:
            # Create a copy of the original row
            new_row = row.copy()
            # Add new outcomes and probabilities for A
            if result_A is not None:
                for i in range(len(result_A['outcomes'])):
                    new_row[f'new_OA{i+1}'] = result_A['outcomes'][i]
                    new_row[f'new_PA{i+1}'] = result_A['probabilities'][i]
            else:
                # If result_A is None, set to NaN
                for i in range(7):
                    new_row[f'new_OA{i+1}'] = np.nan
                    new_row[f'new_PA{i+1}'] = np.nan
            # Add new outcomes and probabilities for B
            if result_B is not None:
                for i in range(len(result_B['outcomes'])):
                    new_row[f'new_OB{i+1}'] = result_B['outcomes'][i]
                    new_row[f'new_PB{i+1}'] = result_B['probabilities'][i]
            else:
                # If result_B is None, set to NaN
                for i in range(7):
                    new_row[f'new_OB{i+1}'] = np.nan
                    new_row[f'new_PB{i+1}'] = np.nan
            # Append the new row to the list
            new_rows.append(new_row)

# Create a new DataFrame from new_rows
new_df = pd.DataFrame(new_rows)

# Display the updated DataFrame
print(new_df)

    index skew  P_A1  O_A1  P_A2  O_A2  P_B1  O_B1  P_B2  O_B2  ...  new_OB3  \
0       1   rl  0.86    72  0.14   199  0.16    19  0.84   128  ...    131.0   
0       1   rl  0.86    72  0.14   199  0.16    19  0.84   128  ...    121.0   
0       1   rl  0.86    72  0.14   199  0.16    19  0.84   128  ...    142.0   
0       1   rl  0.86    72  0.14   199  0.16    19  0.84   128  ...    134.0   
0       1   rl  0.86    72  0.14   199  0.16    19  0.84   128  ...    130.0   
..    ...  ...   ...   ...   ...   ...   ...   ...   ...   ...  ...      ...   
44     45   rl  0.12   163  0.88    58  0.17     7  0.83    59  ...     63.0   
44     45   rl  0.12   163  0.88    58  0.17     7  0.83    59  ...     56.0   
44     45   rl  0.12   163  0.88    58  0.17     7  0.83    59  ...     64.0   
44     45   rl  0.12   163  0.88    58  0.17     7  0.83    59  ...     64.0   
44     45   rl  0.12   163  0.88    58  0.17     7  0.83    59  ...     64.0   

    new_PB3  new_OB4  new_PB4  new_OB5 

In [None]:

# Step 1: Drop rows with any NaN values
df_cleaned = new_df.dropna()

# Step 2: Count rows for each unique value in the 'index' column
index_counts = df_cleaned['index'].value_counts()

# Step 3: Sort the counts by the index values in ascending order
index_counts = index_counts.sort_index()




In [406]:


# Adjust O_A1, O_A2, P_A1, P_A2 to ensure O_A1 is always the greater outcome
df_cleaned[['O_A1', 'O_A2', 'P_A1', 'P_A2']] = df_cleaned.apply(
    lambda row: (
        row['O_A1'], row['O_A2'], row['P_A1'], row['P_A2']
    ) if row['O_A1'] >= row['O_A2'] else (
        row['O_A2'], row['O_A1'], row['P_A2'], row['P_A1']
    ),
    axis=1,
    result_type='expand'
)





# Adjust O_A1, O_A2, P_A1, P_A2 to ensure O_A1 is always the greater outcome
df_cleaned[['O_B1', 'O_B2', 'P_B1', 'P_B2']] = df_cleaned.apply(
    lambda row: (
        row['O_B1'], row['O_B2'], row['P_B1'], row['P_B2']
    ) if row['O_B1'] >= row['O_B2'] else (
        row['O_B2'], row['O_B1'], row['P_B2'], row['P_B1']
    ),
    axis=1,
    result_type='expand'
)

df_cleaned




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned[['O_A1', 'O_A2', 'P_A1', 'P_A2']] = df_cleaned.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned[['O_B1', 'O_B2', 'P_B1', 'P_B2']] = df_cleaned.apply(


Unnamed: 0,index,skew,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,...,new_OB3,new_PB3,new_OB4,new_PB4,new_OB5,new_PB5,new_OB6,new_PB6,new_OB7,new_PB7
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,131.0,0.17,114.0,0.04,97.0,0.16,76.0,0.20,48.0,0.14
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,121.0,0.25,101.0,0.03,81.0,0.14,56.0,0.21,24.0,0.02
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,142.0,0.30,119.0,0.20,94.0,0.18,66.0,0.14,35.0,0.10
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,134.0,0.21,114.0,0.12,93.0,0.25,67.0,0.04,39.0,0.15
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,130.0,0.21,111.0,0.16,90.0,0.17,65.0,0.06,38.0,0.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,63.0,0.06,55.0,0.28,47.0,0.20,33.0,0.18,7.0,0.09
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,56.0,0.11,50.0,0.03,44.0,0.18,31.0,0.05,15.0,0.18
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,64.0,0.12,57.0,0.13,51.0,0.25,39.0,0.05,23.0,0.28
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,64.0,0.08,57.0,0.18,51.0,0.24,38.0,0.10,20.0,0.22


In [None]:
df_cleaned.to_csv('new_trials_raw.csv', index=False)
df_cleaned_new = df_cleaned

In [None]:

import pandas as pd
import numpy as np

# Define functions
def calculate_expected_value(prob_cols, outcome_cols, df):
    return sum(df[prob_col] * df[outcome_col] for prob_col, outcome_col in zip(prob_cols, outcome_cols))

def calculate_standard_deviation(prob_cols, outcome_cols, expected_value, df):
    variance = sum(df[prob_col] * (df[outcome_col] - expected_value) ** 2 for prob_col, outcome_col in zip(prob_cols, outcome_cols))
    return np.sqrt(variance)



# Define column lists
# For original probabilities and outcomes (Option A)
prob_cols_A = ['P_A1', 'P_A2']
outcome_cols_A = ['O_A1', 'O_A2']

# For original probabilities and outcomes (Option B)
prob_cols_B = ['P_B1', 'P_B2']
outcome_cols_B = ['O_B1', 'O_B2']

# For new probabilities and outcomes (Option A)
new_prob_cols_A = [f'new_PA{i}' for i in range(1, 8)]
new_outcome_cols_A = [f'new_OA{i}' for i in range(1, 8)]

# For new probabilities and outcomes (Option B)
new_prob_cols_B = [f'new_PB{i}' for i in range(1, 8)]
new_outcome_cols_B = [f'new_OB{i}' for i in range(1, 8)]

# Calculate EVA and EVB
df_cleaned_new['EVA'] = df_cleaned_new.apply(lambda row: calculate_expected_value(prob_cols_A, outcome_cols_A, row), axis=1)
df_cleaned_new['EVB'] = df_cleaned_new.apply(lambda row: calculate_expected_value(prob_cols_B, outcome_cols_B, row), axis=1)

# Calculate EVD
df_cleaned_new['EVD'] = df_cleaned_new['EVA'] - df_cleaned_new['EVB']

# Calculate EVA_new and EVB_new
df_cleaned_new['EVA_new'] = df_cleaned_new.apply(lambda row: calculate_expected_value(new_prob_cols_A, new_outcome_cols_A, row), axis=1)
df_cleaned_new['EVB_new'] = df_cleaned_new.apply(lambda row: calculate_expected_value(new_prob_cols_B, new_outcome_cols_B, row), axis=1)

# Calculate sum_prob_A
df_cleaned_new['sum_prob_A'] = df_cleaned_new[new_prob_cols_A].sum(axis=1)

# Calculate EVD_NEW and round to two decimal places
df_cleaned_new['EVD_NEW'] = (df_cleaned_new['EVA_new'] - df_cleaned_new['EVB_new']).round(2)

# Calculate SDA and SDB
df_cleaned_new['SDA'] = df_cleaned_new.apply(
    lambda row: calculate_standard_deviation(prob_cols_A, outcome_cols_A, row['EVA'], row), axis=1
)
df_cleaned_new['SDB'] = df_cleaned_new.apply(
    lambda row: calculate_standard_deviation(prob_cols_B, outcome_cols_B, row['EVB'], row), axis=1
)

# Calculate SDD
df_cleaned_new['SDD'] = df_cleaned_new['SDA'] - df_cleaned_new['SDB']

# Calculate SDA_new and SDB_new
df_cleaned_new['SDA_new'] = df_cleaned_new.apply(
    lambda row: calculate_standard_deviation(new_prob_cols_A, new_outcome_cols_A, row['EVA_new'], row), axis=1
)
df_cleaned_new['SDB_new'] = df_cleaned_new.apply(
    lambda row: calculate_standard_deviation(new_prob_cols_B, new_outcome_cols_B, row['EVB_new'], row), axis=1
)

# Calculate SDD_new
df_cleaned_new['SDD_new'] = df_cleaned_new['SDA_new'] - df_cleaned_new['SDB_new']

# Display the updated DataFrame
print(df_cleaned_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_new['EVA'] = df_cleaned_new.apply(lambda row: calculate_expected_value(prob_cols_A, outcome_cols_A, row), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_new['EVB'] = df_cleaned_new.apply(lambda row: calculate_expected_value(prob_cols_B, outcome_cols_B, row), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/in

    index skew  P_A1   O_A1  P_A2  O_A2  P_B1   O_B1  P_B2  O_B2  ...  \
0       1   rl  0.14  199.0  0.86  72.0  0.84  128.0  0.16  19.0  ...   
0       1   rl  0.14  199.0  0.86  72.0  0.84  128.0  0.16  19.0  ...   
0       1   rl  0.14  199.0  0.86  72.0  0.84  128.0  0.16  19.0  ...   
0       1   rl  0.14  199.0  0.86  72.0  0.84  128.0  0.16  19.0  ...   
0       1   rl  0.14  199.0  0.86  72.0  0.84  128.0  0.16  19.0  ...   
..    ...  ...   ...    ...   ...   ...   ...    ...   ...   ...  ...   
44     45   rl  0.12  163.0  0.88  58.0  0.83   59.0  0.17   7.0  ...   
44     45   rl  0.12  163.0  0.88  58.0  0.83   59.0  0.17   7.0  ...   
44     45   rl  0.12  163.0  0.88  58.0  0.83   59.0  0.17   7.0  ...   
44     45   rl  0.12  163.0  0.88  58.0  0.83   59.0  0.17   7.0  ...   
44     45   rl  0.12  163.0  0.88  58.0  0.83   59.0  0.17   7.0  ...   

    new_PB6  new_OB7  new_PB7  EVA_new  EVB_new  sum_prob_A  EVD_NEW  \
0      0.20     48.0     0.14    89.75   110.70    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_new['SDB_new'] = df_cleaned_new.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_new['SDD_new'] = df_cleaned_new['SDA_new'] - df_cleaned_new['SDB_new']


In [508]:
import pandas as pd
import numpy as np
import math



def Pweight(p, alpha=1, gamma=0.6):
    if p >= 1:
        return 1
    elif p <= 0:
        return 0
    else:
        return math.exp(-alpha * ((-math.log(p)) ** gamma))
    
def apply_probability_weighting(probs):
    weighted_probs = []
    cumulative_prob = 0
    for prob in probs:
        # Since cumulative_prob + prob might exceed 1, ensure it does not
        cumulative_prob_next = min(cumulative_prob + prob, 1)
        weighted_prob = Pweight(cumulative_prob_next) - Pweight(cumulative_prob)
        weighted_probs.append(weighted_prob)
        cumulative_prob = cumulative_prob_next
    return weighted_probs



In [509]:



def calculate_EU(weighted_probs, outcomes):
    return sum(w * o for w, o in zip(weighted_probs, outcomes))

def validate_probabilities(probs):
    # Check if all probabilities are within the valid range (0, 1]
    return all(0 < p <= 1 for p in probs)

def apply_probability_weighting_and_calculate_EUA(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['new_PA1', 'new_PA2', 'new_PA3', 'new_PA4', 'new_PA5', 'new_PA6', 'new_PA7']].astype(float).tolist()
    outcomes = row[['new_OA1', 'new_OA2', 'new_OA3', 'new_OA4', 'new_OA5', 'new_OA6', 'new_OA7']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUA = calculate_EU(weighted_probs, outcomes)
    
    return EUA

def apply_probability_weighting_and_calculate_EUB(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['new_PB1', 'new_PB2', 'new_PB3', 'new_PB4', 'new_PB5', 'new_PB6', 'new_PB7']].astype(float).tolist()
    outcomes = row[['new_OB1', 'new_OB2', 'new_OB3', 'new_OB4', 'new_OB5', 'new_OB6', 'new_OB7']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUB = calculate_EU(weighted_probs, outcomes)
    
    return EUB

def apply_probability_weighting_and_calculate_EUA_simple(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['P_A1', 'P_A2']].astype(float).tolist()
    outcomes = row[['O_A1', 'O_A2']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUA_simple = calculate_EU(weighted_probs, outcomes)
    
    return EUA_simple

def apply_probability_weighting_and_calculate_EUB_simple(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['P_B1', 'P_B2']].astype(float).tolist()
    outcomes = row[['O_B1', 'O_B2']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUB_simple = calculate_EU(weighted_probs, outcomes)
    
    return EUB_simple


In [510]:

# Apply the functions row-wise to compute the new columns
df_cleaned_new['EUA'] = df_cleaned_new.apply(apply_probability_weighting_and_calculate_EUA, axis=1)
df_cleaned_new['EUB'] = df_cleaned_new.apply(apply_probability_weighting_and_calculate_EUB, axis=1)
df_cleaned_new['EUA_simple'] = df_cleaned_new.apply(apply_probability_weighting_and_calculate_EUA_simple, axis=1)
df_cleaned_new['EUB_simple'] = df_cleaned_new.apply(apply_probability_weighting_and_calculate_EUB_simple, axis=1)

# Remove rows with NaN in EUA or EUB due to invalid probabilities, or handle as desired
df_cleaned_new = df_cleaned_new.dropna(subset=['EUA', 'EUB', 'EUA_simple', 'EUB_simple'])

# Compute the differences
df_cleaned_new['EUD_CC'] = df_cleaned_new['EUA'] - df_cleaned_new['EUB']
df_cleaned_new['EUD_SC'] = df_cleaned_new['EUA_simple'] - df_cleaned_new['EUB']
df_cleaned_new['EUD_CS'] = df_cleaned_new['EUA'] - df_cleaned_new['EUB_simple']
df_cleaned_new['EUD_SS'] = df_cleaned_new['EUA_simple'] - df_cleaned_new['EUB_simple']

df_cleaned_new['after_diff_CC'] = df_cleaned_new['EUD_CC'] - df_cleaned_new['EVD']
df_cleaned_new['after_diff_CS'] = df_cleaned_new['EUD_CS'] - df_cleaned_new['EVD']
df_cleaned_new['after_diff_SC'] = df_cleaned_new['EUD_SC'] - df_cleaned_new['EVD']
df_cleaned_new['after_diff_SS'] = df_cleaned_new['EUD_SS'] - df_cleaned_new['EVD']

In [511]:


# Define the function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    # Mean (expected value)
    mu = np.sum(np.array(outcomes) * np.array(probabilities))
    
    # Variance
    sigma_squared = np.sum((np.array(outcomes)**2) * np.array(probabilities)) - mu**2
    
    # Third Central Moment
    mu_3 = np.sum(((np.array(outcomes) - mu)**3) * np.array(probabilities))
    
    # Skewness
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared > 0 else np.nan
    
    return skewness

# Apply the skewness calculation row-wise
df_cleaned_new['skewness_a'] = df_cleaned_new.apply(
    lambda row: calculate_skewness([row['P_A1'], row['P_A2']], [row['O_A1'], row['O_A2']]),
    axis=1
)

df_cleaned_new['skewness_b'] = df_cleaned_new.apply(
    lambda row: calculate_skewness([row['P_B1'], row['P_B2']], [row['O_B1'], row['O_B2']]),
    axis=1
)

# Calculate skewness difference
df_cleaned_new['skewness_diff'] = df_cleaned_new['skewness_a'] - df_cleaned_new['skewness_b']

# Apply the skewness calculation row-wise
df_cleaned_new['new_skewness_A'] = df_cleaned_new.apply(
    lambda row: calculate_skewness([row['new_PA1'], row['new_PA2'], row['new_PA3'], row['new_PA4'], row['new_PA5'], row['new_PA6'], row['new_PA7']], [row['new_OA1'], row['new_OA2'], row['new_OA3'], row['new_OA4'], row['new_OA5'], row['new_OA6'], row['new_OA7']]),
    axis=1
)

df_cleaned_new['new_skewness_B'] = df_cleaned_new.apply(
    lambda row: calculate_skewness([row['new_PB1'], row['new_PB2'], row['new_PB3'], row['new_PB4'], row['new_PB5'], row['new_PB6'], row['new_PB7']], [row['new_OB1'], row['new_OB2'], row['new_OB3'], row['new_OB4'], row['new_OB5'], row['new_OB6'], row['new_OB7']]),
    axis=1
)

# Calculate skewness difference
df_cleaned_new['new_skewness_D'] = df_cleaned_new['new_skewness_A'] - df_cleaned_new['new_skewness_B']
df_cleaned_new




Unnamed: 0,index,skew,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,...,EUD_SC,EUD_CS,EUD_SS,after_diff_CC,after_diff_CS,after_diff_SC,after_diff_SS,new_skewness_A,new_skewness_B,new_skewness_D
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,-5.334408,-9.125736,4.568249,1.751608,11.654264,15.445592,25.348249,0.399024,-0.017762,0.416786
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,-2.377232,-9.125736,4.568249,4.708784,11.654264,18.402768,25.348249,0.399024,-0.284488,0.683512
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,-5.685756,-9.125736,4.568249,1.400259,11.654264,15.094244,25.348249,0.399024,-0.243275,0.642299
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,-4.498333,-9.125736,4.568249,2.587683,11.654264,16.281667,25.348249,0.399024,-0.373615,0.772639
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,-3.937782,-9.125736,4.568249,3.148234,11.654264,16.842218,25.348249,0.399024,-0.394229,0.793253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,32.228162,25.236532,36.747955,0.276739,4.796532,11.788162,16.307955,0.442349,-0.242688,0.685037
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,33.938134,25.236532,36.747955,1.986711,4.796532,13.498134,16.307955,0.442349,-0.604211,1.046560
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,30.937014,25.236532,36.747955,-1.014410,4.796532,10.497014,16.307955,0.442349,0.085978,0.356372
44,45,rl,0.12,163.0,0.88,58.0,0.83,59.0,0.17,7.0,...,30.989548,25.236532,36.747955,-0.961876,4.796532,10.549548,16.307955,0.442349,-0.005847,0.448197


In [None]:
import pandas as pd



# Filter rows with skew == 'rl' and apply the cutoff filter
skewness_new_cutoff = 0.7
rl_filtered = df_cleaned_new[
    (df_cleaned_new['skew'] == 'rl') &
    # (df_cleaned_new['after_diff_CC'] > cutoff_value_rl) &
    # (df_cleaned_new['after_diff_CS'] > cutoff_value_rl) &
    # (df_cleaned_new['after_diff_SC'] > cutoff_value_rl) &
    # (df_cleaned_new['after_diff_SS'] > cutoff_value_rl) &
    (df_cleaned_new['new_skewness_A'] > skewness_new_cutoff) &
    (df_cleaned_new['new_skewness_B'] < -skewness_new_cutoff)
]

# Filter rows with skew == 'lr' and apply the cutoff filter
lr_filtered = df_cleaned_new[
    (df_cleaned_new['skew'] == 'lr') &
    # (df_cleaned_new['after_diff_CC'] < -cutoff_value_rl) &
    # (df_cleaned_new['after_diff_CS'] < -cutoff_value_rl) &
    # (df_cleaned_new['after_diff_SC'] < -cutoff_value_rl) &
    # (df_cleaned_new['after_diff_SS'] < -cutoff_value_rl) &
    (df_cleaned_new['new_skewness_A'] < -skewness_new_cutoff) &
    (df_cleaned_new['new_skewness_B'] > skewness_new_cutoff)
]

# Randomly select 1 sample per group for lr_filtered, grouped by 'index'
randomly_selected_lr = lr_filtered.groupby('index').sample(n=1).reset_index(drop=True)

# Randomly select 1 sample per group for rl_filtered, grouped by 'index'
randomly_selected_rl = rl_filtered.groupby('index').sample(n=1).reset_index(drop=True)





In [513]:
# Filter rows with skew == 'lr' and apply the cutoff filter
cutoff_value_rl = 1.5


skewness_new_cutoff_ns = 0.7
ns_filtered = df_cleaned_new[
    (df_cleaned_new['skew'] == 'ns') &
    # (df_cleaned_new['after_diff_CC'] > -cutoff_value_rl) & (df_cleaned_new['after_diff_CC'] < cutoff_value_rl) &
    # (df_cleaned_new['after_diff_CS'] > -cutoff_value_rl) & (df_cleaned_new['after_diff_CS'] < cutoff_value_rl) &
    # (df_cleaned_new['after_diff_SC'] > -cutoff_value_rl) & (df_cleaned_new['after_diff_SC'] < cutoff_value_rl) &
    # (df_cleaned_new['after_diff_SS'] > -cutoff_value_rl) & (df_cleaned_new['after_diff_SS'] < cutoff_value_rl) &
    (df_cleaned_new['new_skewness_A'] > -skewness_new_cutoff_ns) & (df_cleaned_new['new_skewness_A'] < skewness_new_cutoff_ns) &
    (df_cleaned_new['new_skewness_B'] < skewness_new_cutoff_ns) & (df_cleaned_new['new_skewness_B'] > -skewness_new_cutoff_ns)
]

In [514]:
# Define the cutoff value for new probabilities
min_probability = 0.1

# Filter rows with skew == 'ns' and apply the cutoff filter for each 'new_P' column
ns_filtered = ns_filtered[
    (ns_filtered[['new_PA1',  'new_PA7',
                  'new_PB1', 'new_PB7']] > min_probability).all(axis=1)
]



# Randomly select 1 sample per group for ns_filtered, grouped by 'index'
randomly_selected_ns = ns_filtered.groupby('index').sample(n=1).reset_index(drop=True)



In [515]:
import pandas as pd

# Concatenate the three dataframes
merged_df = pd.concat([randomly_selected_rl, randomly_selected_lr, randomly_selected_ns], ignore_index=True)
merged_df.to_csv('study3_trials_old.csv', index=False)
merged_df

Unnamed: 0,index,skew,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,...,EUD_SC,EUD_CS,EUD_SS,after_diff_CC,after_diff_CS,after_diff_SC,after_diff_SS,new_skewness_A,new_skewness_B,new_skewness_D
0,1,rl,0.14,199.0,0.86,72.0,0.84,128.0,0.16,19.0,...,0.039898,-5.25747,4.568249,10.994179,15.52253,20.819898,25.348249,0.725305,-0.764267,1.489572
1,4,rl,0.17,138.0,0.83,59.0,0.86,101.0,0.14,42.0,...,-9.295904,-12.437383,-6.485224,5.061937,7.872617,11.014096,13.824776,0.742843,-0.807256,1.550098
2,7,rl,0.13,115.0,0.87,48.0,0.83,80.0,0.17,58.0,...,-12.088383,-13.020302,-10.822968,5.264284,6.529698,7.461617,8.727032,1.295632,-0.837892,2.133524
3,13,rl,0.19,108.0,0.81,66.0,0.87,87.0,0.13,68.0,...,-5.827514,-7.014654,-5.159978,2.86781,3.535346,4.722486,5.390022,0.850946,-1.147141,1.998086
4,18,rl,0.18,195.0,0.82,89.0,0.88,127.0,0.12,46.0,...,6.872727,2.531138,9.067394,9.53647,11.731138,16.072727,18.267394,0.733957,-1.129097,1.863054
5,21,rl,0.09,134.0,0.91,97.0,0.86,103.0,0.14,87.0,...,5.60367,3.532379,5.196155,4.369894,3.962379,6.03367,5.626155,1.706841,-1.846656,3.553497
6,24,rl,0.14,116.0,0.86,52.0,0.89,66.0,0.11,28.0,...,8.45395,6.12677,9.422796,6.017925,6.98677,9.31395,10.282796,0.982384,-1.345234,2.327618
7,27,rl,0.13,149.0,0.87,75.0,0.84,89.0,0.16,61.0,...,8.668179,7.091373,10.242053,5.417499,6.991373,8.568179,10.142053,1.197093,-0.817097,2.01419
8,30,rl,0.18,167.0,0.82,99.0,0.81,111.0,0.19,59.0,...,21.512695,18.507372,21.96526,7.934807,8.387372,11.392695,11.84526,0.904472,-1.181513,2.085985
9,31,rl,0.19,193.0,0.81,108.0,0.83,125.0,0.17,63.0,...,21.25995,19.264265,23.864413,6.969802,9.574265,11.56995,14.174413,0.873249,-1.349461,2.222711


In [516]:

# Ensure the columns are present in the merged_df DataFrame
columns_to_check = ['index','skew','EVA', 'EVB', 'EVD',
                    'EVA_new', 'EVB_new', 'EVD_NEW',
                    'SDA', 'SDB', 'SDD',
                    'SDA_new', 'SDB_new', 'SDD_new',
                    'skewness_a', 'new_skewness_A', 'skewness_b', 'new_skewness_B',
                    'skewness_diff', 'new_skewness_D']

# Check which columns are missing
missing_columns = [col for col in columns_to_check if col not in merged_df.columns]

# Print missing columns
print(f"Missing columns: {missing_columns}")

# Select only the columns that are present
columns_to_select = [col for col in columns_to_check if col in merged_df.columns]

df_selected = merged_df[columns_to_select]

df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)

df_selected_rounded.to_csv('study3_trials_old_short.csv', index=False)

df_selected_rounded


Missing columns: []


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


Unnamed: 0,index,skew,EVA,EVB,EVD,EVA_new,EVB_new,EVD_NEW,SDA,SDB,SDD,SDA_new,SDB_new,SDD_new,skewness_a,new_skewness_A,skewness_b,new_skewness_B,skewness_diff,new_skewness_D
0,1,rl,89.78,110.56,-20.78,89.86,110.5,-20.64,44.07,39.96,4.11,44.32,40.24,4.07,2.08,0.73,-1.85,-0.76,3.93,1.49
1,4,rl,72.43,92.74,-20.31,72.24,92.78,-20.54,29.67,20.47,9.2,30.13,21.16,8.97,1.76,0.74,-2.08,-0.81,3.83,1.55
2,7,rl,56.71,76.26,-19.55,56.58,76.48,-19.9,22.53,8.26,14.27,23.1,9.38,13.72,2.2,1.3,-1.76,-0.84,3.96,2.13
3,13,rl,73.98,84.53,-10.55,74.11,84.39,-10.28,16.48,6.39,10.09,17.24,7.64,9.61,1.58,0.85,-2.2,-1.15,3.78,2.0
4,18,rl,108.08,117.28,-9.2,108.08,117.02,-8.94,40.72,26.32,14.4,41.1,26.82,14.28,1.67,0.73,-2.34,-1.13,4.0,1.86
5,21,rl,100.33,100.76,-0.43,100.37,100.43,-0.06,10.59,5.55,5.04,11.28,6.65,4.62,2.87,1.71,-2.08,-1.85,4.94,3.55
6,24,rl,60.96,61.82,-0.86,61.01,62.02,-1.01,22.21,11.89,10.32,22.6,12.58,10.02,2.08,0.98,-2.49,-1.35,4.57,2.33
7,27,rl,84.62,84.52,0.1,84.46,84.8,-0.34,24.89,10.26,14.62,25.44,11.18,14.26,2.2,1.2,-1.85,-0.82,4.06,2.01
8,30,rl,111.24,101.12,10.12,111.33,101.21,10.12,26.12,20.4,5.73,26.55,20.97,5.58,1.67,0.9,-1.58,-1.18,3.25,2.09
9,31,rl,124.15,114.46,9.69,124.04,114.39,9.65,33.35,23.29,10.06,33.72,23.78,9.93,1.58,0.87,-1.76,-1.35,3.34,2.22


### from now the new stimuli, the new_stimuli_study3.csv was generated in r

In [394]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

dat = pd.read_csv('new_stimuli_study3.csv')
df = pd.DataFrame(dat)

# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 120  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 50  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n

    # List to store acceptable results
    acceptable_results = []

    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1.5 and abs(final_sd - original_sd) < 1.5:
            skewness_difference = abs(final_skew - original_skew)
            # Store the acceptable results
            acceptable_results.append({
                'skewness_difference': skewness_difference,
                'outcomes': optimized_outcomes.copy(),
                'probabilities': fixed_probs.copy(),
                'final_ev': final_ev,
                'final_sd': final_sd,
                'final_skew': final_skew,
                'original_ev': original_ev,
                'original_sd': original_sd,
                'original_skew': original_skew
            })

    # After all iterations, select the best result based on minimal skewness difference
    if acceptable_results:
        # Sort the acceptable results by skewness_difference
        acceptable_results.sort(key=lambda x: x['skewness_difference'])
        best_result = acceptable_results[0]
        # Extract the best results
        best_outcomes = best_result['outcomes']
        best_probs = best_result['probabilities']
        final_ev = best_result['final_ev']
        final_sd = best_result['final_sd']
        final_skew = best_result['final_skew']
        # Return the best results and statistics
        return best_outcomes, best_probs, {
            'original_ev': best_result['original_ev'],
            'original_sd': best_result['original_sd'],
            'original_skew': best_result['original_skew'],
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return None, None, None

# Function to process each row of the DataFrame
def process_row(row):
    result_dict = {}
    # For Lottery A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])

    # Original statistics for Lottery A
    original_ev_A, original_sd_A, original_skew_A = lottery_stats(probs_A, outcomes_A)
    result_dict['original_EVA'] = original_ev_A
    result_dict['original_SDA'] = original_sd_A
    result_dict['original_skewness_A'] = original_skew_A

    # Optimize Lottery A
    best_outcomes_A, best_probs_A, stats_A = optimize_lottery(probs_A, outcomes_A)

    if best_outcomes_A is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_A)):
            result_dict[f'new_OA{i+1}'] = int(best_outcomes_A[i])  # Ensure integer outcomes
            result_dict[f'new_PA{i+1}'] = best_probs_A[i]
        # Store new statistics
        result_dict['new_EVA'] = stats_A['new_ev']
        result_dict['new_SDA'] = stats_A['new_sd']
        result_dict['new_skewness_A'] = stats_A['new_skew']
    else:
        for i in range(7):
            result_dict[f'new_OA{i+1}'] = np.nan
            result_dict[f'new_PA{i+1}'] = np.nan
        result_dict['new_EVA'] = np.nan
        result_dict['new_SDA'] = np.nan
        result_dict['new_skewness_A'] = np.nan

    # For Lottery B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])

    # Original statistics for Lottery B
    original_ev_B, original_sd_B, original_skew_B = lottery_stats(probs_B, outcomes_B)
    result_dict['original_EVB'] = original_ev_B
    result_dict['original_SDB'] = original_sd_B
    result_dict['original_skewness_B'] = original_skew_B

    # Optimize Lottery B
    best_outcomes_B, best_probs_B, stats_B = optimize_lottery(probs_B, outcomes_B)

    if best_outcomes_B is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_B)):
            result_dict[f'new_OB{i+1}'] = int(best_outcomes_B[i])  # Ensure integer outcomes
            result_dict[f'new_PB{i+1}'] = best_probs_B[i]
        # Store new statistics
        result_dict['new_EVB'] = stats_B['new_ev']
        result_dict['new_SDB'] = stats_B['new_sd']
        result_dict['new_skewness_B'] = stats_B['new_skew']
    else:
        for i in range(7):
            result_dict[f'new_OB{i+1}'] = np.nan
            result_dict[f'new_PB{i+1}'] = np.nan
        result_dict['new_EVB'] = np.nan
        result_dict['new_SDB'] = np.nan
        result_dict['new_skewness_B'] = np.nan

    # Differences between Lotteries A and B (Original)
    result_dict['original_EVD'] = result_dict['original_EVA'] - result_dict['original_EVB']
    result_dict['original_SDD'] = result_dict['original_SDA'] - result_dict['original_SDB']
    result_dict['original_skewness_D'] = result_dict['original_skewness_A'] - result_dict['original_skewness_B']

    # Differences between Lotteries A and B (New)
    if best_outcomes_A is not None and best_outcomes_B is not None:
        result_dict['new_EVD'] = result_dict['new_EVA'] - result_dict['new_EVB']
        result_dict['new_SDD'] = result_dict['new_SDA'] - result_dict['new_SDB']
        result_dict['new_skewness_D'] = result_dict['new_skewness_A'] - result_dict['new_skewness_B']
    else:
        result_dict['new_EVD'] = np.nan
        result_dict['new_SDD'] = np.nan
        result_dict['new_skewness_D'] = np.nan

    return pd.Series(result_dict)

# Apply the process_row function to each row
new_columns = df.apply(process_row, axis=1)

# Concatenate the new columns to the original DataFrame
df = pd.concat([df, new_columns], axis=1)

# Convert outcome columns to nullable integer type
outcome_columns = [f'new_OA{i+1}' for i in range(7)] + [f'new_OB{i+1}' for i in range(7)]
df[outcome_columns] = df[outcome_columns].astype('Int64')

# Display the updated DataFrame
print(df)

     Unnamed: 0  P_A1  O_A1  P_A2  O_A2  P_B1  O_B1  P_B2  O_B2     EVA  ...  \
0             1  0.08   192  0.92    88  0.95   111  0.05    30   96.32  ...   
1             2  0.08   121  0.92    97  0.96    90  0.04    78   98.92  ...   
2             3  0.08   147  0.92    86  0.94   102  0.06    71   90.88  ...   
3             4  0.03   165  0.97    67  0.91    82  0.09    61   69.94  ...   
4             5  0.11   153  0.89    81  0.88    76  0.12    23   88.92  ...   
..          ...   ...   ...   ...   ...   ...   ...   ...   ...     ...  ...   
341         342  0.45   105  0.55     5  0.49    91  0.51    11   50.00  ...   
342         343  0.60   180  0.40   149  0.66   163  0.34   150  167.60  ...   
343         344  0.46   187  0.54    49  0.49   156  0.51    30  112.48  ...   
344         345  0.49   135  0.51    44  0.53   123  0.47    51   88.59  ...   
345         346  0.65    86  0.35    13  0.59   105  0.41    45   60.45  ...   

     new_PB7  new_EVB    new_SDB  new_s

In [499]:
df_nona = df.dropna()

# Apply the functions row-wise to compute the new columns
df_nona['EUA'] = df_nona.apply(apply_probability_weighting_and_calculate_EUA, axis=1)
df_nona['EUB'] = df_nona.apply(apply_probability_weighting_and_calculate_EUB, axis=1)
df_nona['EUA_simple'] = df_nona.apply(apply_probability_weighting_and_calculate_EUA_simple, axis=1)
df_nona['EUB_simple'] = df_nona.apply(apply_probability_weighting_and_calculate_EUB_simple, axis=1)

# Remove rows with NaN in EUA or EUB due to invalid probabilities, or handle as desired
df_nona = df_nona.dropna(subset=['EUA', 'EUB', 'EUA_simple', 'EUB_simple'])

# Compute the differences
df_nona['EUD_CC'] = df_nona['EUA'] - df_nona['EUB']
df_nona['EUD_SC'] = df_nona['EUA_simple'] - df_nona['EUB']
df_nona['EUD_CS'] = df_nona['EUA'] - df_nona['EUB_simple']
df_nona['EUD_SS'] = df_nona['EUA_simple'] - df_nona['EUB_simple']

df_nona['after_diff_CC'] = df_nona['EUD_CC'] - df_nona['EVD']
df_nona['after_diff_CS'] = df_nona['EUD_CS'] - df_nona['EVD']
df_nona['after_diff_SC'] = df_nona['EUD_SC'] - df_nona['EVD']
df_nona['after_diff_SS'] = df_nona['EUD_SS'] - df_nona['EVD']

df_nona['Skewness_cs_diff_a'] = df_nona['original_skewness_A'] - df_nona['new_skewness_A']
df_nona['Skewness_cs_diff_b'] = df_nona['original_skewness_B'] - df_nona['new_skewness_B']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nona['EUA'] = df_nona.apply(apply_probability_weighting_and_calculate_EUA, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nona['EUB'] = df_nona.apply(apply_probability_weighting_and_calculate_EUB, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nona['EUA_simple'] = df_nona.a

In [524]:
import pandas as pd

# Assuming df_cleaned_new is your DataFrame

# Filter rows with skew == 'rl' and apply the cutoff filter
cutoff_value_rl = 5
rl_filtered = df_nona[
    (df_nona['skew'] == 'rl') &
    (df_nona['after_diff_CC'] > cutoff_value_rl) &
    (df_nona['after_diff_CS'] > cutoff_value_rl) &
    (df_nona['after_diff_SC'] > cutoff_value_rl) &
    (df_nona['after_diff_SS'] > cutoff_value_rl) &
    (df_nona['new_skewness_A'] > 1.5) &
    (df_nona['new_skewness_B'] < -1.5)
]

# Filter rows with skew == 'lr' and apply the cutoff filter
lr_filtered = df_nona[
    (df_nona['skew'] == 'lr') &
    (df_nona['after_diff_CC'] < -cutoff_value_rl) &
    (df_nona['after_diff_CS'] < -cutoff_value_rl) &
    (df_nona['after_diff_SC'] < -cutoff_value_rl) &
    (df_nona['after_diff_SS'] < -cutoff_value_rl) &
    (df_nona['new_skewness_A'] < -1.5) &
    (df_nona['new_skewness_B'] > 1.5)
]

# Filter rows with skew == 'lr' and apply the cutoff filter
ns_filtered = df_nona[
    (df_nona['skew'] == 'ns') &
    (df_nona['after_diff_CC'] < 1) & (df_nona['after_diff_CC'] > -1) &
    (df_nona['after_diff_CS'] < 1) & (df_nona['after_diff_CC'] > -1) &
    (df_nona['after_diff_SC'] < 1) & (df_nona['after_diff_CC'] > -1) &
    (df_nona['after_diff_SS'] < 1) & (df_nona['after_diff_CC'] > -1) &
    (df_nona['new_skewness_A'] > -0.7) &
    (df_nona['new_skewness_A'] < 0.7) &
    (df_nona['new_skewness_B'] > -0.7) &
    (df_nona['new_skewness_B'] < 0.7) 
]

randomly_selected_lr = lr_filtered.groupby(['evd_bins', 'sdd_bins']).sample(n=1).reset_index(drop=True)
randomly_selected_rl = rl_filtered.groupby(['evd_bins', 'sdd_bins']).sample(n=1).reset_index(drop=True)
randomly_selected_ns = ns_filtered.groupby(['evd_bins', 'sdd_bins']).sample(n=1).reset_index(drop=True)

# Concatenate the three dataframes
merged_df = pd.concat([randomly_selected_rl, randomly_selected_lr, randomly_selected_ns], ignore_index=True)
merged_df.to_csv('study3_trials_new.csv', index=False)

In [525]:

# Ensure the columns are present in the merged_df DataFrame
columns_to_check = ['skew','EVA', 'EVB', 'EVD',
                    'new_EVA', 'new_EVB', 'new_EVD',
                    'SDA', 'SDB', 'SDD',
                    'new_SDA', 'new_SDB', 'new_SDD',
                    'original_skewness_A', 'new_skewness_A', 'original_skewness_B', 'new_skewness_B',
                    'original_skewness_D', 'new_skewness_D',
                    'after_diff_CC', 'after_diff_CS', 'after_diff_SC', 'after_diff_SS'
                    ]

# Check which columns are missing
missing_columns = [col for col in columns_to_check if col not in merged_df.columns]

# Print missing columns
print(f"Missing columns: {missing_columns}")

# Select only the columns that are present
columns_to_select = [col for col in columns_to_check if col in merged_df.columns]

df_selected = merged_df[columns_to_select]

df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)

df_selected_rounded.to_csv('study3_trials_new_short.csv', index=False)

df_selected_rounded


Missing columns: []


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


Unnamed: 0,skew,EVA,EVB,EVD,new_EVA,new_EVB,new_EVD,SDA,SDB,SDD,...,original_skewness_A,new_skewness_A,original_skewness_B,new_skewness_B,original_skewness_D,new_skewness_D,after_diff_CC,after_diff_CS,after_diff_SC,after_diff_SS
0,rl,40.0,39.75,0.25,40.04,39.58,0.46,21.79,7.15,14.64,...,4.13,1.76,-2.87,-2.69,6.99,4.44,8.7,8.22,13.1,12.63
1,rl,97.28,96.85,0.43,97.32,97.14,0.18,15.02,9.37,5.65,...,2.49,1.95,-4.13,-3.02,6.62,4.97,7.02,7.17,8.69,8.84
2,rl,41.1,41.77,-0.67,41.18,41.58,-0.4,17.87,6.99,10.88,...,4.13,2.19,-5.51,-4.19,9.64,6.38,6.36,6.91,10.75,11.3
3,rl,108.74,117.95,-9.21,108.95,118.28,-9.33,18.76,4.58,14.18,...,3.71,1.94,-4.13,-3.84,7.83,5.79,6.59,6.99,9.36,9.76
4,rl,150.68,161.5,-10.82,150.58,161.35,-10.77,12.48,7.5,4.98,...,3.1,2.22,-2.67,-2.52,5.76,4.74,6.14,5.68,8.03,7.57
5,rl,90.88,100.14,-9.26,90.83,100.33,-9.5,16.55,7.36,9.19,...,3.1,1.9,-3.71,-3.04,6.8,4.94,7.47,7.54,9.18,9.25
6,rl,82.95,102.4,-19.45,82.94,102.56,-19.62,21.58,6.73,14.85,...,4.13,1.93,-2.2,-2.12,6.33,4.05,7.37,7.3,12.15,12.08
7,rl,54.6,74.2,-19.6,54.44,74.29,-19.85,11.45,6.94,4.51,...,2.87,2.22,-2.08,-2.06,4.94,4.28,5.48,5.41,6.52,6.45
8,rl,108.95,129.76,-20.81,108.91,129.64,-20.73,17.22,8.16,9.05,...,4.13,2.06,-3.37,-2.76,7.5,4.82,7.85,7.58,11.52,11.25
9,rl,76.72,55.75,20.97,76.8,55.9,20.9,22.79,8.41,14.38,...,3.1,1.71,-2.2,-2.15,5.3,3.86,7.67,7.55,11.44,11.32
