### use the stimuli of study 2

In [1]:
import pandas as pd
import numpy as np
dat = pd.read_csv('final_data_2.csv')
df = pd.DataFrame(dat)



In [2]:



filtered_df = df[(df['test_part'].isin(['cs'])) & (df['skew'].isin(['lr', 'rl', 'ns'])) & (df['Prolific_ID'].isin(['5638e8a444e8c8000ee86a35']))]

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    # Mean (expected value)
    mu = np.sum(outcomes * probabilities)
    
    # Variance
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    
    # Third Central Moment
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    
    # Skewness
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared != 0 else 0  # To handle division by zero
    
    return skewness


selected_columns = ['skew', 'P_A1', 'O_A1', 'P_A2', 'O_A2', 'P_B1', 'O_B1', 'P_B2', 'O_B2']
filtered_df = filtered_df[selected_columns]



final_df = filtered_df.assign(
    EVA=lambda x: x['P_A1'] * x['O_A1'] + x['P_A2'] * x['O_A2'],
    EVB=lambda x: x['P_B1'] * x['O_B1'] + x['P_B2'] * x['O_B2'],
    EVD=lambda x: x['EVA'] - x['EVB'],
    SDA=lambda x: np.sqrt(x['P_A1'] * (x['O_A1'] - x['EVA'])**2 + x['P_A2'] * (x['O_A2'] - x['EVA'])**2),
    SDB=lambda x: np.sqrt(x['P_B1'] * (x['O_B1'] - x['EVB'])**2 + x['P_B2'] * (x['O_B2'] - x['EVB'])**2),
    SDD=lambda x: x['SDA'] - x['SDB'],
    skewness_a=lambda x: x.apply(lambda row: calculate_skewness(
        np.array([row['P_A1'], row['P_A2']]), 
        np.array([row['O_A1'], row['O_A2']])
    ), axis=1),
    skewness_b=lambda x: x.apply(lambda row: calculate_skewness(
        np.array([row['P_B1'], row['P_B2']]), 
        np.array([row['O_B1'], row['O_B2']])
    ), axis=1),
    skewness_diff=lambda x: x['skewness_a'] - x['skewness_b']
)



# Bin the 'EVD' column
final_df['evd_bins'] = pd.cut(
    final_df['EVD'], 
    bins=[-float('inf'), -15, -9, 1, 11, 21], 
    labels=["-21 to -19", "-11 to -9", "-1 to 1", "9 to 11", "19 to 21"],
    right=True,
    include_lowest=True
)

# Bin the 'SDD' column
final_df['sdd_bins'] = pd.cut(
    final_df['SDD'], 
    bins=[-float('inf'), 8, 13, 18], 
    labels=["4 to 6", "9 to 11", "14 to 16"],
    right=True,
    include_lowest=True
)

# Sort the DataFrame by 'evd_bins' and then 'sdd_bins'
final_df = final_df.sort_values(by=['evd_bins', 'sdd_bins'])




# Reset the index to remove the existing one and create a new sequential index
final_df = final_df.reset_index(drop=True)

# Add a new 'index' column starting from 1 to 45
final_df['index'] = range(1, 46)

# Move the 'index' column to the beginning of the DataFrame
final_df = final_df[['index'] + [col for col in final_df.columns if col != 'index']]

final_df_new = final_df


In [3]:

from scipy.optimize import minimize
import random
random.seed(42)  # Set the random seed


# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 400  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 50  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n

    # List to store acceptable results
    acceptable_results = []

    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1 and abs(final_sd - original_sd) < 1:
            skewness_difference = abs(final_skew - original_skew)
            # Store the acceptable results
            acceptable_results.append({
                'skewness_difference': skewness_difference,
                'outcomes': optimized_outcomes.copy(),
                'probabilities': fixed_probs.copy(),
                'final_ev': final_ev,
                'final_sd': final_sd,
                'final_skew': final_skew,
                'original_ev': original_ev,
                'original_sd': original_sd,
                'original_skew': original_skew
            })

    # After all iterations, select the best result based on minimal skewness difference
    if acceptable_results:
        # Sort the acceptable results by skewness_difference
        acceptable_results.sort(key=lambda x: x['skewness_difference'])
        best_result = acceptable_results[0]
        # Extract the best results
        best_outcomes = best_result['outcomes']
        best_probs = best_result['probabilities']
        final_ev = best_result['final_ev']
        final_sd = best_result['final_sd']
        final_skew = best_result['final_skew']
        # Return the best results and statistics
        return best_outcomes, best_probs, {
            'original_ev': best_result['original_ev'],
            'original_sd': best_result['original_sd'],
            'original_skew': best_result['original_skew'],
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return None, None, None

# Function to process each row of the DataFrame
def process_row(row):
    result_dict = {}
    # For Lottery A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])

    # Original statistics for Lottery A
    original_ev_A, original_sd_A, original_skew_A = lottery_stats(probs_A, outcomes_A)
    result_dict['simple_EVA'] = original_ev_A
    result_dict['simple_SDA'] = original_sd_A
    result_dict['simple_skewness_A'] = original_skew_A

    # Optimize Lottery A
    best_outcomes_A, best_probs_A, stats_A = optimize_lottery(probs_A, outcomes_A)

    if best_outcomes_A is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_A)):
            result_dict[f'complex_OA{i+1}'] = int(best_outcomes_A[i])  # Ensure integer outcomes
            result_dict[f'complex_PA{i+1}'] = best_probs_A[i]
        # Store new statistics
        result_dict['complex_EVA'] = stats_A['new_ev']
        result_dict['complex_SDA'] = stats_A['new_sd']
        result_dict['complex_skewness_A'] = stats_A['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OA{i+1}'] = np.nan
            result_dict[f'complex_PA{i+1}'] = np.nan
        result_dict['complex_EVA'] = np.nan
        result_dict['complex_SDA'] = np.nan
        result_dict['complex_skewness_A'] = np.nan

    # For Lottery B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])

    # Original statistics for Lottery B
    original_ev_B, original_sd_B, original_skew_B = lottery_stats(probs_B, outcomes_B)
    result_dict['simple_EVB'] = original_ev_B
    result_dict['simple_SDB'] = original_sd_B
    result_dict['simple_skewness_B'] = original_skew_B

    # Optimize Lottery B
    best_outcomes_B, best_probs_B, stats_B = optimize_lottery(probs_B, outcomes_B)

    if best_outcomes_B is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_B)):
            result_dict[f'complex_OB{i+1}'] = int(best_outcomes_B[i])  # Ensure integer outcomes
            result_dict[f'complex_PB{i+1}'] = best_probs_B[i]
        # Store new statistics
        result_dict['complex_EVB'] = stats_B['new_ev']
        result_dict['complex_SDB'] = stats_B['new_sd']
        result_dict['complex_skewness_B'] = stats_B['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OB{i+1}'] = np.nan
            result_dict[f'complex_PB{i+1}'] = np.nan
        result_dict['complex_EVB'] = np.nan
        result_dict['complex_SDB'] = np.nan
        result_dict['complex_skewness_B'] = np.nan

    # Differences between Lotteries A and B (Original)
    result_dict['simple_EVD'] = result_dict['simple_EVA'] - result_dict['simple_EVB']
    result_dict['simple_SDD'] = result_dict['simple_SDA'] - result_dict['simple_SDB']
    result_dict['simple_skewness_D'] = result_dict['simple_skewness_A'] - result_dict['simple_skewness_B']

    # Differences between Lotteries A and B (New)
    if best_outcomes_A is not None and best_outcomes_B is not None:
        result_dict['complex_EVD'] = result_dict['complex_EVA'] - result_dict['complex_EVB']
        result_dict['complex_SDD'] = result_dict['complex_SDA'] - result_dict['complex_SDB']
        result_dict['complex_skewness_D'] = result_dict['complex_skewness_A'] - result_dict['complex_skewness_B']
    else:
        result_dict['complex_EVD'] = np.nan
        result_dict['complex_SDD'] = np.nan
        result_dict['complex_skewness_D'] = np.nan

    return pd.Series(result_dict)

# Apply the process_row function to each row
new_columns = final_df_new.apply(process_row, axis=1)

# Concatenate the new columns to the original DataFrame
final_df_new = pd.concat([final_df_new, new_columns], axis=1)

# Convert outcome columns to nullable integer type
outcome_columns = [f'complex_OA{i+1}' for i in range(7)] + [f'complex_OB{i+1}' for i in range(7)]
final_df_new[outcome_columns] = final_df_new[outcome_columns].astype('Int64')



In [7]:

# Ensure the columns are present in the merged_df DataFrame
columns_to_check = ['skew','EVA', 'EVB', 'EVD',
                    'complex_EVA', 'complex_EVB', 'complex_EVD',
                    'SDA', 'SDB', 'SDD',
                    'complex_SDA', 'complex_SDB', 'complex_SDD',
                    'simple_skewness_A', 'complex_skewness_A', 'simple_skewness_B', 'complex_skewness_B',
                    'simple_skewness_D', 'complex_skewness_D'
                    ]

# Check which columns are missing
missing_columns = [col for col in columns_to_check if col not in final_df_new.columns]

# Print missing columns
print(f"Missing columns: {missing_columns}")

# Select only the columns that are present
columns_to_select = [col for col in columns_to_check if col in final_df_new.columns]

df_selected = final_df_new[columns_to_select]

df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)




df_selected_rounded.sort_values(by='skew', ascending=True)


Missing columns: []


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


Unnamed: 0,skew,EVA,EVB,EVD,complex_EVA,complex_EVB,complex_EVD,SDA,SDB,SDD,complex_SDA,complex_SDB,complex_SDD,simple_skewness_A,complex_skewness_A,simple_skewness_B,complex_skewness_B,simple_skewness_D,complex_skewness_D
16,lr,82.34,91.96,-9.62,82.32,91.77,-9.45,33.42,18.85,14.58,33.84,19.68,14.16,-1.67,-1.06,2.34,1.94,-4.0,-3.0
31,lr,41.4,31.55,9.85,41.71,31.48,10.23,15.69,5.63,10.06,16.31,6.59,9.72,-1.58,-1.16,1.76,1.76,-3.34,-2.92
21,lr,71.72,71.97,-0.25,71.67,72.04,-0.37,18.04,8.45,9.6,18.82,9.3,9.52,-2.08,-1.59,2.49,2.51,-4.57,-4.09
14,lr,117.66,127.23,-9.57,117.8,127.09,-9.29,33.74,23.88,9.86,34.18,24.51,9.68,-1.58,-1.13,2.2,1.48,-3.78,-2.61
43,lr,68.0,47.42,20.58,68.11,47.65,20.46,24.37,9.77,14.61,24.95,10.75,14.19,-2.34,-1.27,1.76,1.74,-4.1,-3.01
36,lr,83.35,63.36,19.99,83.28,63.39,19.89,16.9,12.25,4.66,17.68,13.2,4.48,-1.76,-1.69,3.37,2.65,-5.13,-4.34
11,lr,51.33,60.88,-9.55,51.32,60.81,-9.49,19.16,14.11,5.05,19.78,14.71,5.07,-1.76,-1.3,4.69,3.35,-6.45,-4.65
8,lr,93.48,113.01,-19.53,93.35,113.11,-19.76,34.98,19.91,15.07,35.2,20.54,14.66,-2.2,-1.16,1.76,1.37,-3.96,-2.53
25,lr,89.33,89.08,0.25,89.11,89.12,-0.01,19.84,4.77,15.08,20.45,5.69,14.76,-2.2,-1.54,1.85,1.71,-4.06,-3.25
41,lr,64.82,45.36,19.46,64.74,45.4,19.34,19.59,9.1,10.49,20.14,9.96,10.18,-1.67,-1.38,2.34,2.27,-4.0,-3.64


In [8]:


import math



def Pweight(p, alpha=1, gamma=0.6):
    if p >= 1:
        return 1
    elif p <= 0:
        return 0
    else:
        return math.exp(-alpha * ((-math.log(p)) ** gamma))
    
def apply_probability_weighting(probs):
    weighted_probs = []
    cumulative_prob = 0
    for prob in probs:
        # Since cumulative_prob + prob might exceed 1, ensure it does not
        cumulative_prob_next = min(cumulative_prob + prob, 1)
        weighted_prob = Pweight(cumulative_prob_next) - Pweight(cumulative_prob)
        weighted_probs.append(weighted_prob)
        cumulative_prob = cumulative_prob_next
    return weighted_probs



def calculate_EU(weighted_probs, outcomes):
    return sum(w * o for w, o in zip(weighted_probs, outcomes))

def validate_probabilities(probs):
    # Check if all probabilities are within the valid range (0, 1]
    return all(0 < p <= 1 for p in probs)

def apply_probability_weighting_and_calculate_EUA(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['complex_PA1', 'complex_PA2', 'complex_PA3', 'complex_PA4', 'complex_PA5', 'complex_PA6', 'complex_PA7']].astype(float).tolist()
    outcomes = row[['complex_OA1', 'complex_OA2', 'complex_OA3', 'complex_OA4', 'complex_OA5', 'complex_OA6', 'complex_OA7']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUA = calculate_EU(weighted_probs, outcomes)
    
    return EUA

def apply_probability_weighting_and_calculate_EUB(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['complex_PB1', 'complex_PB2', 'complex_PB3', 'complex_PB4', 'complex_PB5', 'complex_PB6', 'complex_PB7']].astype(float).tolist()
    outcomes = row[['complex_OB1', 'complex_OB2', 'complex_OB3', 'complex_OB4', 'complex_OB5', 'complex_OB6', 'complex_OB7']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUB = calculate_EU(weighted_probs, outcomes)
    
    return EUB

def apply_probability_weighting_and_calculate_EUA_simple(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['P_A1', 'P_A2']].astype(float).tolist()
    outcomes = row[['O_A1', 'O_A2']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUA_simple = calculate_EU(weighted_probs, outcomes)
    
    return EUA_simple

def apply_probability_weighting_and_calculate_EUB_simple(row):
    # Extract the probabilities and outcomes from the row
    probs = row[['P_B1', 'P_B2']].astype(float).tolist()
    outcomes = row[['O_B1', 'O_B2']].astype(float).tolist()
    
    # Validate probabilities
    if not validate_probabilities(probs):
        # Invalid probabilities detected, return NaN or handle as desired
        return np.nan
    
    # Apply the probability weighting function
    weighted_probs = apply_probability_weighting(probs)
    
    # Calculate the expected utility
    EUB_simple = calculate_EU(weighted_probs, outcomes)
    
    return EUB_simple


In [9]:
df_nona = final_df_new.dropna()

# Apply the functions row-wise to compute the new columns
df_nona['EUA'] = df_nona.apply(apply_probability_weighting_and_calculate_EUA, axis=1)
df_nona['EUB'] = df_nona.apply(apply_probability_weighting_and_calculate_EUB, axis=1)
df_nona['EUA_simple'] = df_nona.apply(apply_probability_weighting_and_calculate_EUA_simple, axis=1)
df_nona['EUB_simple'] = df_nona.apply(apply_probability_weighting_and_calculate_EUB_simple, axis=1)

# Remove rows with NaN in EUA or EUB due to invalid probabilities, or handle as desired
df_nona = df_nona.dropna(subset=['EUA', 'EUB', 'EUA_simple', 'EUB_simple'])

# Compute the differences
df_nona['EUD_CC'] = df_nona['EUA'] - df_nona['EUB']
df_nona['EUD_SC'] = df_nona['EUA_simple'] - df_nona['EUB']
df_nona['EUD_CS'] = df_nona['EUA'] - df_nona['EUB_simple']
df_nona['EUD_SS'] = df_nona['EUA_simple'] - df_nona['EUB_simple']

df_nona['after_diff_CC'] = df_nona['EUD_CC'] - df_nona['EVD']
df_nona['after_diff_CS'] = df_nona['EUD_CS'] - df_nona['EVD']
df_nona['after_diff_SC'] = df_nona['EUD_SC'] - df_nona['EVD']
df_nona['after_diff_SS'] = df_nona['EUD_SS'] - df_nona['EVD']




In [10]:

df_nona_sorted = df_nona.sort_values(by='skew', ascending=True)
df_nona_sorted.to_csv('study3_trials_old.csv', index=False)

In [11]:

# Ensure the columns are present in the merged_df DataFrame
columns_to_check = ['skew','EVA', 'EVB', 'EVD',
                    'complex_EVA', 'complex_EVB', 'complex_EVD',
                    'SDA', 'SDB', 'SDD',
                    'complex_SDA', 'complex_SDB', 'complex_SDD',
                    'simple_skewness_A', 'complex_skewness_A', 'simple_skewness_B', 'complex_skewness_B',
                    'simple_skewness_D', 'complex_skewness_D',
                    'after_diff_CC', 'after_diff_CS', 'after_diff_SC', 'after_diff_SS'
                    ]

# Check which columns are missing
missing_columns = [col for col in columns_to_check if col not in df_nona.columns]

# Print missing columns
print(f"Missing columns: {missing_columns}")

# Select only the columns that are present
columns_to_select = [col for col in columns_to_check if col in df_nona.columns]

df_selected = df_nona[columns_to_select]

df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)




df_selected_rounded_sorted = df_selected_rounded.sort_values(by='skew', ascending=True)

df_selected_rounded_sorted.to_csv('study3_trials_old_short.csv', index=False)


Missing columns: []


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


In [2]:
import pandas as pd
import numpy as np


dat = pd.read_csv('study3_trials_old.csv')
df = pd.DataFrame(dat)

In [16]:
df

def print_original_and_new_outcomes(data, row_index):
    # Extract the row data for the specified index
    row_data = data.iloc[row_index]

    # Extract original and new outcomes and probabilities for options A and B
    outcomes_A_orig = [row_data['O_A1'], row_data['O_A2']]
    probabilities_A_orig = [row_data['P_A1'], row_data['P_A2']]
    outcomes_B_orig = [row_data['O_B1'], row_data['O_B2']]
    probabilities_B_orig = [row_data['P_B1'], row_data['P_B2']]

    outcomes_A_new = [row_data[f'complex_OA{i}'] for i in range(1, 8)]
    probabilities_A_new = [row_data[f'complex_PA{i}'] for i in range(1, 8)]
    outcomes_B_new = [row_data[f'complex_OB{i}'] for i in range(1, 8)]
    probabilities_B_new = [row_data[f'complex_PB{i}'] for i in range(1, 8)]
    skew = row_data['skew']

    # Define a helper function to print outcomes and probabilities in a formatted way
    def print_outcomes_and_probabilities(outcomes, probabilities, label):
        print(f"{label}:")
        for outcome, probability in zip(outcomes, probabilities):
            print(f"  Outcome: {outcome}, Probability: {probability:.2f}")
        print()

    # Print original and new outcomes and probabilities
    print(f"Outcomes and Probabilities for Row {row_index} {skew}")
    print("====================================")

    print_outcomes_and_probabilities(outcomes_A_orig, probabilities_A_orig, "simple A")
    print_outcomes_and_probabilities(outcomes_B_orig, probabilities_B_orig, "simple B")
    print_outcomes_and_probabilities(outcomes_A_new, probabilities_A_new, "complex A")
    print_outcomes_and_probabilities(outcomes_B_new, probabilities_B_new, "complex B")



print_original_and_new_outcomes(df, 1)



Outcomes and Probabilities for Row 1 lr
simple A:
  Outcome: 49, Probability: 0.81
  Outcome: 9, Probability: 0.19

simple B:
  Outcome: 44, Probability: 0.17
  Outcome: 29, Probability: 0.83

complex A:
  Outcome: 59, Probability: 0.25
  Outcome: 48, Probability: 0.28
  Outcome: 43, Probability: 0.02
  Outcome: 39, Probability: 0.26
  Outcome: 30, Probability: 0.04
  Outcome: 19, Probability: 0.06
  Outcome: 2, Probability: 0.09

complex B:
  Outcome: 62, Probability: 0.02
  Outcome: 37, Probability: 0.26
  Outcome: 34, Probability: 0.17
  Outcome: 32, Probability: 0.05
  Outcome: 30, Probability: 0.12
  Outcome: 28, Probability: 0.13
  Outcome: 24, Probability: 0.25



new stimuli

In [33]:
import numpy as np
import pandas as pd
import random

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared != 0 else 0
    return skewness

# Generate a set of lottery pairs with given constraints
def generate_lottery_pairs():
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right", "no_skew_vs_no_skew"]

    data = []

    for ev in ev_levels:
        for sd in sd_levels:
            for skew in skew_levels:
                while True:
                    # Generate outcomes and probabilities for Option A
                    O_A1, O_A2 = sorted(random.sample(range(2, 200), 2))
                    P_A1 = round(random.uniform(0.02, 0.98), 2)
                    P_A2 = round(1 - P_A1, 2)

                    # Ensure probabilities are not 0.5
                    if P_A1 == 0.5 or P_A2 == 0.5:
                        continue

                    # Generate outcomes and probabilities for Option B
                    O_B1, O_B2 = sorted(random.sample(range(2, 200), 2))
                    P_B1 = round(random.uniform(0.02, 0.98), 2)
                    P_B2 = round(1 - P_B1, 2)

                    # Ensure probabilities are not 0.5
                    if P_B1 == 0.5 or P_B2 == 0.5:
                        continue
                    
                    # Ensure P_A1 != P_B1 and P_A1 != P_B2
                    if P_A1 == P_B1 or P_A1 == P_B2:
                        continue

                    # Ensure O_A1 != O_B1 and O_A1 != O_B2
                    if O_A1 == O_B1 or O_A1 == O_B2:
                        continue

                    # Calculate expected values and differences
                    EV_A = P_A1 * O_A1 + P_A2 * O_A2
                    EV_B = P_B1 * O_B1 + P_B2 * O_B2
                    EV_diff = round(EV_A - EV_B, 2)

                    # Calculate standard deviations and differences
                    SD_A = np.sqrt(P_A1 * (O_A1 - EV_A)**2 + P_A2 * (O_A2 - EV_A)**2)
                    SD_B = np.sqrt(P_B1 * (O_B1 - EV_B)**2 + P_B2 * (O_B2 - EV_B)**2)
                    SD_diff = round(SD_A - SD_B, 2)

                    # Calculate skewness
                    skew_A = calculate_skewness([P_A1, P_A2], [O_A1, O_A2])
                    skew_B = calculate_skewness([P_B1, P_B2], [O_B1, O_B2])
                    skew_diff = skew_A - skew_B

                    # Define skewness levels
                    if skew == "right_vs_left" and (2 < skew_A < 5 and -5 < skew_B < -2) and (abs(skew_A) - abs(skew_B) < 1):
                        skew_valid = True
                    elif skew == "left_vs_right" and (-5 < skew_A < -2 and 2 < skew_B < 5) and (abs(skew_A) - abs(skew_B) < 1):
                        skew_valid = True
                    elif skew == "no_skew_vs_no_skew" and (-0.5 <= skew_A <= 0.5 and -0.5 <= skew_B <= 0.5 and abs(skew_A) - abs(skew_B) < 0.5):
                        skew_valid = True
                    else:
                        skew_valid = False

                    # Check EV_diff and SD_diff ranges
                    if skew_valid and \
                       abs(EV_diff - ev) <= 1 and \
                       abs(SD_diff - sd) <= 1:
                        data.append({
                            "P_A1": P_A1, "O_A1": O_A1, "P_A2": P_A2, "O_A2": O_A2,
                            "P_B1": P_B1, "O_B1": O_B1, "P_B2": P_B2, "O_B2": O_B2,
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, 'skew_diff': skew_A - skew_B,
                            "skew_level": skew,
                            "ev_level": ev,
                            "sd_level": sd
                        })
                        break

    return pd.DataFrame(data)

# Generate 45 lottery pairs
lottery_pairs = generate_lottery_pairs()
lottery_pairs

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
0,0.9,88,0.1,194,0.12,47,0.88,129,98.6,119.16,-20.56,31.8,26.646846,5.15,2.666667,-2.338738,5.005405,right_vs_left,-20,5
1,0.1,29,0.9,71,0.93,84,0.07,115,66.8,86.17,-19.37,12.6,7.909558,4.69,-2.666667,3.370606,-6.037272,left_vs_right,-20,5
2,0.4,53,0.6,160,0.51,91,0.49,185,117.2,137.06,-19.86,52.41908,46.990599,5.43,-0.408248,0.040008,-0.448256,no_skew_vs_no_skew,-20,5
3,0.89,120,0.11,176,0.14,129,0.86,149,126.16,146.2,-20.04,17.521826,6.939741,10.58,2.492891,-2.075006,4.567896,right_vs_left,-20,10
4,0.06,10,0.94,108,0.96,120,0.04,183,102.12,122.52,-20.4,23.27371,12.345428,10.93,-3.705468,4.694855,-8.400324,left_vs_right,-20,10
5,0.38,36,0.62,56,0.61,68,0.39,69,48.4,68.39,-19.99,9.707729,0.48775,9.22,-0.494451,0.451051,-0.945502,no_skew_vs_no_skew,-20,10
6,0.88,56,0.12,139,0.04,26,0.96,89,65.96,86.48,-20.52,26.971808,12.345428,14.63,2.338738,-4.694855,7.033594,right_vs_left,-20,15
7,0.09,73,0.91,138,0.94,152,0.06,163,132.15,152.66,-20.51,18.601814,2.612355,15.99,-2.865312,3.705468,-6.57078,left_vs_right,-20,15
8,0.52,79,0.48,173,0.58,117,0.42,181,124.12,143.88,-19.76,46.962385,31.587744,15.37,0.080064,0.324176,-0.244112,no_skew_vs_no_skew,-20,15
9,0.9,106,0.1,162,0.09,83,0.91,125,111.6,121.22,-9.62,16.8,12.019634,4.78,2.666667,-2.865312,5.531979,right_vs_left,-10,5


In [38]:

lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'], ascending=[True, True, True])
lottery_pairs_sorted

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
1,0.1,29,0.9,71,0.93,84,0.07,115,66.8,86.17,-19.37,12.6,7.909558,4.69,-2.666667,3.370606,-6.037272,left_vs_right,-20,5
4,0.06,10,0.94,108,0.96,120,0.04,183,102.12,122.52,-20.4,23.27371,12.345428,10.93,-3.705468,4.694855,-8.400324,left_vs_right,-20,10
7,0.09,73,0.91,138,0.94,152,0.06,163,132.15,152.66,-20.51,18.601814,2.612355,15.99,-2.865312,3.705468,-6.57078,left_vs_right,-20,15
10,0.1,17,0.9,69,0.86,69,0.14,98,63.8,73.06,-9.26,15.6,10.062624,5.54,-2.666667,2.075006,-4.741672,left_vs_right,-10,5
13,0.13,90,0.87,159,0.88,155,0.12,193,150.03,159.56,-9.53,23.204937,12.348538,10.86,-2.200394,2.338738,-4.539132,left_vs_right,-10,10
16,0.13,83,0.87,142,0.96,144,0.04,173,134.33,145.16,-10.83,19.841903,5.682816,14.16,-2.200394,4.694855,-6.895249,left_vs_right,-10,15
19,0.1,135,0.9,164,0.93,160,0.07,174,161.1,160.98,0.12,8.7,3.572058,5.13,-2.666667,3.370606,-6.037272,left_vs_right,0,5
22,0.1,2,0.9,106,0.94,91,0.06,183,95.6,96.52,-0.92,31.2,21.848789,9.35,-2.666667,3.705468,-6.372135,left_vs_right,0,10
25,0.13,38,0.87,114,0.91,101,0.09,136,104.12,104.15,-0.03,25.559061,10.016362,15.54,-2.200394,2.865312,-5.065706,left_vs_right,0,15
28,0.1,52,0.9,71,0.96,59,0.04,64,69.1,59.2,9.9,5.7,0.979796,4.72,-2.666667,4.694855,-7.361522,left_vs_right,10,5


In [45]:
import numpy as np
import pandas as pd
import random

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared != 0 else 0
    return skewness

# Generate a set of lottery pairs with given constraints
def generate_lottery_pairs():
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right", "no_skew_vs_no_skew"]

    data = []

    for ev in ev_levels:
        for sd in sd_levels:
            for skew in skew_levels:
                max_attempts = 1000000
                for _ in range(max_attempts):
                    # Generate outcomes and probabilities for Option A
                    O_A1, O_A2 = sorted(random.sample(range(2, 200), 2))
                    P_A1 = round(random.uniform(0.02, 0.98), 2)
                    P_A2 = round(1 - P_A1, 2)

                    # Generate outcomes and probabilities for Option B
                    O_B1, O_B2 = sorted(random.sample(range(2, 200), 2))
                    P_B1 = round(random.uniform(0.02, 0.98), 2)
                    P_B2 = round(1 - P_B1, 2)

                    # Calculate expected values and differences
                    EV_A = P_A1 * O_A1 + P_A2 * O_A2
                    EV_B = P_B1 * O_B1 + P_B2 * O_B2
                    EV_diff = round(EV_A - EV_B, 2)

                    # Calculate standard deviations and differences
                    SD_A = np.sqrt(P_A1 * (O_A1 - EV_A)**2 + P_A2 * (O_A2 - EV_A)**2)
                    SD_B = np.sqrt(P_B1 * (O_B1 - EV_B)**2 + P_B2 * (O_B2 - EV_B)**2)
                    SD_diff = round(SD_A - SD_B, 2)

                    # Calculate skewness
                    skew_A = calculate_skewness([P_A1, P_A2], [O_A1, O_A2])
                    skew_B = calculate_skewness([P_B1, P_B2], [O_B1, O_B2])
                    skew_diff = skew_A - skew_B

                    # Define skewness levels with corrected absolute difference
                    skew_valid = False
                    skew_abs_diff = abs(abs(skew_A) - abs(skew_B))
                    if skew == "right_vs_left":
                        if (2 < skew_A and skew_B < -2) and skew_abs_diff < 1:
                            skew_valid = True
                    elif skew == "left_vs_right":
                        if (skew_A < -2 and 2 < skew_B) and skew_abs_diff < 1:
                            skew_valid = True
                    elif skew == "no_skew_vs_no_skew":
                        if (-0.5 <= skew_A <= 0.5 and -0.5 <= skew_B <= 0.5) and skew_abs_diff < 0.5:
                            skew_valid = True

                    # Check EV_diff and SD_diff ranges with relaxed thresholds
                    if skew_valid and \
                       abs(EV_diff - ev) <= 1 and \
                       abs(SD_diff - sd) <= 1:
                        data.append({
                            "P_A1": P_A1, "O_A1": O_A1, "P_A2": P_A2, "O_A2": O_A2,
                            "P_B1": P_B1, "O_B1": O_B1, "P_B2": P_B2, "O_B2": O_B2,
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, 'skew_diff': skew_diff,
                            "skew_level": skew,
                            "ev_level": ev,
                            "sd_level": sd
                        })
                        break
                else:
                    print(f"No valid lottery pair found for ev={ev}, sd={sd}, skew={skew}")

    return pd.DataFrame(data)

# Generate 45 lottery pairs
lottery_pairs = generate_lottery_pairs()

lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'], ascending=[True, True, True])
lottery_pairs_sorted

No valid lottery pair found for ev=-20, sd=15, skew=left_vs_right
No valid lottery pair found for ev=-10, sd=15, skew=left_vs_right


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
1,0.06,79,0.94,130,0.91,144,0.09,168,126.94,146.16,-19.22,12.111829,6.868362,5.24,-3.705468,2.865312,-6.57078,left_vs_right,-20,5
4,0.09,73,0.91,108,0.92,125,0.08,128,104.85,125.24,-20.39,10.016362,0.81388,9.2,-2.865312,3.096281,-5.961593,left_vs_right,-20,10
9,0.03,18,0.97,62,0.96,71,0.04,79,60.68,71.32,-10.64,7.505838,1.567673,5.94,-5.510378,4.694855,-10.205233,left_vs_right,-10,5
12,0.04,33,0.96,120,0.97,125,0.03,161,116.52,126.08,-9.56,17.048449,6.14114,10.91,-4.694855,5.510378,-10.205233,left_vs_right,-10,10
17,0.13,93,0.87,146,0.88,134,0.12,173,139.11,138.68,0.43,17.824082,12.6735,5.15,-2.200394,2.338738,-4.539132,left_vs_right,0,5
20,0.07,71,0.93,148,0.94,140,0.06,180,142.61,142.4,0.21,19.64632,9.499474,10.15,-3.370606,3.705468,-7.076074,left_vs_right,0,10
23,0.14,26,0.86,133,0.86,109,0.14,172,118.02,117.82,0.2,37.127612,21.860183,15.27,-2.075006,2.075006,-4.150011,left_vs_right,0,15
26,0.09,10,0.91,97,0.94,74,0.06,155,89.17,78.86,10.31,24.897813,19.236434,5.66,-2.865312,3.705468,-6.57078,left_vs_right,10,5
29,0.1,88,0.9,169,0.91,147,0.09,194,160.9,151.23,9.67,24.3,13.450543,10.85,-2.666667,2.865312,-5.531979,left_vs_right,10,10
32,0.05,36,0.95,145,0.95,127,0.05,170,139.55,129.15,10.4,23.755999,9.371633,14.38,-4.129483,4.129483,-8.258966,left_vs_right,10,15


## now generate multiple stimuli for each level

In [None]:
import numpy as np
import pandas as pd
import random

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared > 0 else 0
    return skewness

# Generate a set of lottery pairs with given constraints
def generate_lottery_pairs():
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right"]

    data = []

    for skew in skew_levels:
        for ev in ev_levels:
            for sd in sd_levels:
                max_trials = 30 if skew in ["right_vs_left", "left_vs_right"]
                trials_found = 0
                while trials_found < max_trials:
                    O_A1, O_A2 = sorted(random.sample(range(2, 200), 2))
                    P_A1 = round(random.uniform(0.02, 0.98), 2)
                    P_A2 = round(1 - P_A1, 2)

                    O_B1, O_B2 = sorted(random.sample(range(2, 200), 2))
                    P_B1 = round(random.uniform(0.02, 0.98), 2)
                    P_B2 = round(1 - P_B1, 2)

                    EV_A = P_A1 * O_A1 + P_A2 * O_A2
                    EV_B = P_B1 * O_B1 + P_B2 * O_B2
                    EV_diff = round(EV_A - EV_B, 2)

                    SD_A = np.sqrt(P_A1 * (O_A1 - EV_A)**2 + P_A2 * (O_A2 - EV_A)**2)
                    SD_B = np.sqrt(P_B1 * (O_B1 - EV_B)**2 + P_B2 * (O_B2 - EV_B)**2)
                    SD_diff = round(SD_A - SD_B, 2)

                    skew_A = calculate_skewness([P_A1, P_A2], [O_A1, O_A2])
                    skew_B = calculate_skewness([P_B1, P_B2], [O_B1, O_B2])
                    skew_diff = skew_A - skew_B

                    skew_valid = False
                    skew_abs_diff = abs(abs(skew_A) - abs(skew_B))

                    if skew == "right_vs_left":
                        if (skew_A > 1.5 and skew_A < 3 and skew_B < -1.5 and skew_B > -3) and skew_abs_diff < 0.5:
                            skew_valid = True
                    elif skew == "left_vs_right":
                        if (skew_A < -1.5 and skew_A > -3 and skew_B > 1.5 and skew_B < 3) and skew_abs_diff < 0.5:
                            skew_valid = True
                    elif skew == "no_skew_vs_no_skew":
                        if (-0.25 <= skew_A <= 0.25 and -0.25 <= skew_B <= 0.25) and skew_abs_diff < 0.5:
                            skew_valid = True

                    if skew_valid and \
                       abs(EV_diff - ev) <= 1 and \
                       abs(SD_diff - sd) <= 1:
                        data.append({
                            "P_A1": P_A1, "O_A1": O_A1, "P_A2": P_A2, "O_A2": O_A2,
                            "P_B1": P_B1, "O_B1": O_B1, "P_B2": P_B2, "O_B2": O_B2,
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, 'skew_diff': skew_diff,
                            "skew_level": skew,
                            "ev_level": ev,
                            "sd_level": sd
                        })
                        trials_found += 1

    return pd.DataFrame(data)

# Generate the lottery pairs
lottery_pairs = generate_lottery_pairs()

# Sort and reset index
lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'], ascending=[True, True, True])
lottery_pairs_sorted.reset_index(drop=True, inplace=True)

# Display the sorted dataframe
lottery_pairs_sorted

KeyboardInterrupt: 

In [112]:
import numpy as np
import pandas as pd
import random

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    if sigma_squared <= 0:
        return 0
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    return mu_3 / (sigma_squared**(3/2))

# Precompute thresholds for skewness to avoid recalculating in loops
def is_skew_valid(skew_A, skew_B, skew, skew_abs_diff):
    if skew == "right_vs_left":
        return 1.5 < skew_A < 3 and -3 < skew_B < -1.5 and skew_abs_diff < 0.5
    elif skew == "left_vs_right":
        return -3 < skew_A < -1.5 and 1.5 < skew_B < 3 and skew_abs_diff < 0.5
    elif skew == "no_skew_vs_no_skew":
        return -0.25 <= skew_A <= 0.25 and -0.25 <= skew_B <= 0.25 and skew_abs_diff < 0.5
    return False

# Generate lottery pairs
def generate_lottery_pairs():
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right", "no_skew_vs_no_skew"]

    data = []
    for skew in skew_levels:
        for ev in ev_levels:
            for sd in sd_levels:
                max_trials = 30 if skew in ["right_vs_left", "left_vs_right"] else 5
                trials_found = 0
                while trials_found < max_trials:
                    outcomes_A = sorted(random.sample(range(2, 200), 2))
                    probabilities_A = [round(random.uniform(0.02, 0.98), 2)]
                    probabilities_A.append(round(1 - probabilities_A[0], 2))

                    outcomes_B = sorted(random.sample(range(2, 200), 2))
                    probabilities_B = [round(random.uniform(0.02, 0.98), 2)]
                    probabilities_B.append(round(1 - probabilities_B[0], 2))

                    EV_A = np.dot(probabilities_A, outcomes_A)
                    EV_B = np.dot(probabilities_B, outcomes_B)
                    EV_diff = round(EV_A - EV_B, 2)

                    SD_A = np.sqrt(np.dot(probabilities_A, (np.array(outcomes_A) - EV_A)**2))
                    SD_B = np.sqrt(np.dot(probabilities_B, (np.array(outcomes_B) - EV_B)**2))
                    SD_diff = round(SD_A - SD_B, 2)

                    skew_A = calculate_skewness(probabilities_A, outcomes_A)
                    skew_B = calculate_skewness(probabilities_B, outcomes_B)
                    skew_abs_diff = abs(abs(skew_A) - abs(skew_B))

                    if is_skew_valid(skew_A, skew_B, skew, skew_abs_diff) and \
                       abs(EV_diff - ev) <= 1 and abs(SD_diff - sd) <= 1:
                        data.append({
                            "P_A1": probabilities_A[0], "O_A1": outcomes_A[0],
                            "P_A2": probabilities_A[1], "O_A2": outcomes_A[1],
                            "P_B1": probabilities_B[0], "O_B1": outcomes_B[0],
                            "P_B2": probabilities_B[1], "O_B2": outcomes_B[1],
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, "skew_diff": skew_A - skew_B,
                            "skew_level": skew, "ev_level": ev, "sd_level": sd
                        })
                        trials_found += 1

    return pd.DataFrame(data)

# Generate the lottery pairs
lottery_pairs = generate_lottery_pairs()

# Sort and reset index
lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'])
lottery_pairs_sorted.reset_index(drop=True, inplace=True)

# Display the sorted dataframe
lottery_pairs_sorted

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
0,0.16,84,0.84,136,0.83,141,0.17,180,127.68,147.63,-19.95,19.063515,14.649679,4.41,-1.854852,1.757035,-3.611887,left_vs_right,-20,5
1,0.11,4,0.89,81,0.87,84,0.13,142,72.53,91.54,-19.01,24.092511,19.505599,4.59,-2.492891,2.200394,-4.693285,left_vs_right,-20,5
2,0.14,17,0.86,124,0.81,114,0.19,197,109.02,129.77,-20.75,37.127612,32.560975,4.57,-2.075006,1.580419,-3.655425,left_vs_right,-20,5
3,0.11,100,0.89,133,0.86,148,0.14,162,129.37,149.96,-20.59,10.325362,4.857818,5.47,-2.492891,2.075006,-4.567896,left_vs_right,-20,5
4,0.13,42,0.87,91,0.85,99,0.15,132,84.63,103.95,-19.32,16.478868,11.783357,4.70,-2.200394,1.960392,-4.160786,left_vs_right,-20,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
970,0.84,83,0.16,178,0.18,37,0.82,87,98.20,78.00,20.20,34.827575,19.209373,15.62,1.854852,-1.665853,3.520706,right_vs_left,20,15
971,0.82,92,0.18,160,0.17,62,0.83,90,104.24,85.24,19.00,26.124747,10.517718,15.61,1.665853,-1.757035,3.422889,right_vs_left,20,15
972,0.87,90,0.13,148,0.10,64,0.90,80,97.54,78.40,19.14,19.505599,4.800000,14.71,2.200394,-2.666667,4.867060,right_vs_left,20,15
973,0.87,85,0.13,174,0.12,36,0.88,83,96.57,77.36,19.21,29.931006,15.273192,14.66,2.200394,-2.338738,4.539132,right_vs_left,20,15


In [113]:



from scipy.optimize import minimize
import random
random.seed(42)  # Set the random seed


# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 1000  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 100  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n


    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1 and abs(final_sd - original_sd) < 1 and abs(final_skew - original_skew) < 0.25:
            best_outcomes = optimized_outcomes.copy()
            best_probs = fixed_probs.copy()
            return best_outcomes, best_probs, {
            'original_ev': original_ev,
            'original_sd': original_sd,
            'original_skew': original_skew,
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return None, None, None

# Function to process each row of the DataFrame
def process_row(row):
    result_dict = {}
    # For Lottery A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])

    # Original statistics for Lottery A
    original_ev_A, original_sd_A, original_skew_A = lottery_stats(probs_A, outcomes_A)
    result_dict['simple_EVA'] = original_ev_A
    result_dict['simple_SDA'] = original_sd_A
    result_dict['simple_skewness_A'] = original_skew_A

    # Optimize Lottery A
    best_outcomes_A, best_probs_A, stats_A = optimize_lottery(probs_A, outcomes_A)

    if best_outcomes_A is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_A)):
            result_dict[f'complex_OA{i+1}'] = int(best_outcomes_A[i])  # Ensure integer outcomes
            result_dict[f'complex_PA{i+1}'] = best_probs_A[i]
        # Store new statistics
        result_dict['complex_EVA'] = stats_A['new_ev']
        result_dict['complex_SDA'] = stats_A['new_sd']
        result_dict['complex_skewness_A'] = stats_A['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OA{i+1}'] = np.nan
            result_dict[f'complex_PA{i+1}'] = np.nan
        result_dict['complex_EVA'] = np.nan
        result_dict['complex_SDA'] = np.nan
        result_dict['complex_skewness_A'] = np.nan

    # For Lottery B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])

    # Original statistics for Lottery B
    original_ev_B, original_sd_B, original_skew_B = lottery_stats(probs_B, outcomes_B)
    result_dict['simple_EVB'] = original_ev_B
    result_dict['simple_SDB'] = original_sd_B
    result_dict['simple_skewness_B'] = original_skew_B

    # Optimize Lottery B
    best_outcomes_B, best_probs_B, stats_B = optimize_lottery(probs_B, outcomes_B)

    if best_outcomes_B is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_B)):
            result_dict[f'complex_OB{i+1}'] = int(best_outcomes_B[i])  # Ensure integer outcomes
            result_dict[f'complex_PB{i+1}'] = best_probs_B[i]
        # Store new statistics
        result_dict['complex_EVB'] = stats_B['new_ev']
        result_dict['complex_SDB'] = stats_B['new_sd']
        result_dict['complex_skewness_B'] = stats_B['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OB{i+1}'] = np.nan
            result_dict[f'complex_PB{i+1}'] = np.nan
        result_dict['complex_EVB'] = np.nan
        result_dict['complex_SDB'] = np.nan
        result_dict['complex_skewness_B'] = np.nan

    # Differences between Lotteries A and B (Original)
    result_dict['simple_EVD'] = result_dict['simple_EVA'] - result_dict['simple_EVB']
    result_dict['simple_SDD'] = result_dict['simple_SDA'] - result_dict['simple_SDB']
    result_dict['simple_skewness_D'] = result_dict['simple_skewness_A'] - result_dict['simple_skewness_B']

    # Differences between Lotteries A and B (New)
    if best_outcomes_A is not None and best_outcomes_B is not None:
        result_dict['complex_EVD'] = result_dict['complex_EVA'] - result_dict['complex_EVB']
        result_dict['complex_SDD'] = result_dict['complex_SDA'] - result_dict['complex_SDB']
        result_dict['complex_skewness_D'] = result_dict['complex_skewness_A'] - result_dict['complex_skewness_B']
    else:
        result_dict['complex_EVD'] = np.nan
        result_dict['complex_SDD'] = np.nan
        result_dict['complex_skewness_D'] = np.nan

    return pd.Series(result_dict)

# Apply the process_row function to each row
new_columns = lottery_pairs_sorted.apply(process_row, axis=1)

# Concatenate the new columns to the original DataFrame
lottery_pairs_new = pd.concat([lottery_pairs_sorted, new_columns], axis=1)

# Convert outcome columns to nullable integer type
outcome_columns = [f'complex_OA{i+1}' for i in range(7)] + [f'complex_OB{i+1}' for i in range(7)]
lottery_pairs_new[outcome_columns] = lottery_pairs_new[outcome_columns].astype('Int64')





In [114]:
lottery_pairs_new

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.16,84,0.84,136,0.83,141,0.17,180,127.68,147.63,...,0.25,147.58,15.335697,1.552261,-19.95,4.413836,-3.611887,,,
1,0.11,4,0.89,81,0.87,84,0.13,142,72.53,91.54,...,,,,,-19.01,4.586912,-4.693285,,,
2,0.14,17,0.86,124,0.81,114,0.19,197,109.02,129.77,...,,,,,-20.75,4.566637,-3.655425,,,
3,0.11,100,0.89,133,0.86,148,0.14,162,129.37,149.96,...,0.23,150.00,5.824088,1.941352,-20.59,5.467544,-4.567896,-20.48,5.457295,-4.197162
4,0.13,42,0.87,91,0.85,99,0.15,132,84.63,103.95,...,0.26,103.87,12.583843,1.749609,-19.32,4.695511,-4.160786,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
970,0.84,83,0.16,178,0.18,37,0.82,87,98.20,78.00,...,0.05,78.15,19.882341,-1.533776,20.20,15.618203,3.520706,,,
971,0.82,92,0.18,160,0.17,62,0.83,90,104.24,85.24,...,0.05,85.30,11.407454,-1.719762,19.00,15.607029,3.422889,19.24,15.156966,3.351221
972,0.87,90,0.13,148,0.10,64,0.90,80,97.54,78.40,...,0.03,78.01,5.793954,-2.630595,19.14,14.705599,4.867060,,,
973,0.87,85,0.13,174,0.12,36,0.88,83,96.57,77.36,...,,,,,19.21,14.657813,4.539132,,,


In [115]:

# Ensure the columns are present in the merged_df DataFrame
columns_to_check = ['skew_level','ev_level', 'sd_level','EV_A', 'EV_B', 'EV_diff',
                    'complex_EVA', 'complex_EVB', 'complex_EVD',
                    'SD_A', 'SD_B', 'SD_diff',
                    'complex_SDA', 'complex_SDB', 'complex_SDD',
                    'simple_skewness_A', 'complex_skewness_A', 'simple_skewness_B', 'complex_skewness_B',
                    'simple_skewness_D', 'complex_skewness_D'
                    ]

# Check which columns are missing
missing_columns = [col for col in columns_to_check if col not in lottery_pairs_new.columns]

# Print missing columns
print(f"Missing columns: {missing_columns}")

# Select only the columns that are present
columns_to_select = [col for col in columns_to_check if col in lottery_pairs_new.columns]

df_selected = lottery_pairs_new[columns_to_select]


df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)




df_selected_before = df_selected_rounded.sort_values(by='skew_level', ascending=True).dropna()


Missing columns: []


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


In [116]:

# Filter rows based on conditions for A and B
filtered_df = df_selected_before[
    (abs(df_selected_before['simple_skewness_A'] - df_selected_before['complex_skewness_A']) < 0.25) &
    (abs(df_selected_before['simple_skewness_B'] - df_selected_before['complex_skewness_B']) < 0.25)
]

# Display the filtered dataframe
filtered_df

Unnamed: 0,skew_level,ev_level,sd_level,EV_A,EV_B,EV_diff,complex_EVA,complex_EVB,complex_EVD,SD_A,...,SD_diff,complex_SDA,complex_SDB,complex_SDD,simple_skewness_A,complex_skewness_A,simple_skewness_B,complex_skewness_B,simple_skewness_D,complex_skewness_D
303,left_vs_right,10,10,172.45,161.74,10.71,172.07,161.86,10.21,17.65,...,9.39,17.97,9.23,8.74,-1.58,-1.51,1.76,1.61,-3.34,-3.11
302,left_vs_right,10,10,144.60,134.69,9.91,144.63,134.64,9.99,14.66,...,10.29,15.48,5.37,10.11,-1.85,-1.63,2.20,2.18,-4.06,-3.82
298,left_vs_right,10,5,179.44,170.24,9.20,179.54,170.30,9.24,9.42,...,4.28,10.33,6.13,4.20,-1.58,-1.61,1.85,1.86,-3.44,-3.47
297,left_vs_right,10,5,126.84,117.10,9.74,126.88,117.33,9.55,18.03,...,5.89,18.87,12.86,6.01,-1.76,-1.77,1.96,1.76,-3.72,-3.53
293,left_vs_right,10,5,86.08,76.83,9.25,86.00,76.85,9.15,26.68,...,4.32,27.11,22.92,4.20,-1.58,-1.41,1.58,1.47,-3.16,-2.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,right_vs_left,-10,10,46.48,56.45,-9.97,46.31,56.16,-9.85,16.53,...,10.89,17.21,6.53,10.68,1.76,1.57,-1.76,-1.62,3.51,3.20
665,right_vs_left,-10,10,108.04,117.60,-9.56,107.94,117.16,-9.22,16.13,...,9.63,16.80,7.44,9.36,1.85,1.66,-2.34,-2.10,4.19,3.77
681,right_vs_left,-10,15,30.04,40.95,-10.91,30.27,41.01,-10.74,25.30,...,15.65,25.62,10.59,15.03,1.85,1.69,-1.96,-1.74,3.82,3.44
674,right_vs_left,-10,10,114.60,124.18,-9.58,114.43,124.13,-9.70,13.88,...,9.17,14.83,5.70,9.13,2.08,2.09,-2.20,-2.28,4.28,4.37


In [None]:
# Count rows for each combination of levels
level_counts = filtered_df.groupby(['skew_level', 'ev_level', 'sd_level']).size().reset_index(name='count')



# Display the filtered dataframe
level_counts

Unnamed: 0,skew_level,ev_level,sd_level,count
0,left_vs_right,-20,5,6
1,left_vs_right,-20,10,5
2,left_vs_right,-20,15,1
3,left_vs_right,-10,5,7
4,left_vs_right,-10,10,3
5,left_vs_right,-10,15,3
6,left_vs_right,0,5,11
7,left_vs_right,0,10,2
8,left_vs_right,0,15,2
9,left_vs_right,10,5,9


In [119]:
lottery_pairs_new.dropna().to_csv('study3_trials_raw.csv', index=False)

In [216]:
random_trials = (
    filtered_df
    .groupby(['skew_level', 'ev_level', 'sd_level'], group_keys=False)
    .apply(lambda x: x.sample(n=1))
)

# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    random_trials
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

  .apply(lambda x: x.sample(n=1))


Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,left_vs_right,114.932,115.194667,18.738667,8.788
1,no_skew_vs_no_skew,89.411333,89.276,35.485333,25.430667
2,right_vs_left,87.637333,87.708,17.044667,7.000667


add some missing trials

In [3]:
import numpy as np
import pandas as pd
import random

# Create the dataframe
data = {
    "skew_level": [
        "left_vs_right", "left_vs_right", "left_vs_right", "left_vs_right", "left_vs_right", "left_vs_right",
        "left_vs_right", "left_vs_right", "left_vs_right", "left_vs_right", "left_vs_right", "right_vs_left",
        "right_vs_left", "right_vs_left", "right_vs_left", "right_vs_left", "right_vs_left", "right_vs_left",
        "right_vs_left", "right_vs_left", "right_vs_left", "right_vs_left", "right_vs_left", "right_vs_left"
    ],
    "ev_level": [
        -20, -20, -10, 0, 0, 0, 10, 10, 20, 20, 20, -20, -20, -10, -10, 0, 0, 0,
        10, 10, 10, 20, 20, 20
    ],
    "sd_level": [
        5, 10, 15, 5, 10, 15, 5, 10, 5, 10, 15, 5, 10, 5, 10, 5, 10, 15, 5, 10,
        15, 5, 10, 15
    ],
    "count": [
        4, 2, 2, 1, 3, 1, 3, 1, 2, 3, 1, 5, 1, 1, 3, 6, 1, 1, 1, 1, 1, 3, 3, 2
    ]
}

excluded_levels = pd.DataFrame(data)



# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma_squared**(3/2)) if sigma_squared > 0 else 0
    return skewness

# Generate a set of lottery pairs with given constraints
def generate_lottery_pairs(excluded_levels):
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right"]

    data = []

    for skew in skew_levels:
        for ev in ev_levels:
            for sd in sd_levels:
                # Skip the levels present in the excluded_levels dataframe
                if not excluded_levels[
                    (excluded_levels["skew_level"] == skew) &
                    (excluded_levels["ev_level"] == ev) &
                    (excluded_levels["sd_level"] == sd)
                ].empty:
                    continue

                trials_found = 0
                max_trials = 30  # Adjust as needed

                while trials_found < max_trials:
                    O_A1, O_A2 = sorted(random.sample(range(2, 200), 2))
                    P_A1 = round(random.uniform(0.02, 0.98), 2)
                    P_A2 = round(1 - P_A1, 2)

                    O_B1, O_B2 = sorted(random.sample(range(2, 200), 2))
                    P_B1 = round(random.uniform(0.02, 0.98), 2)
                    P_B2 = round(1 - P_B1, 2)

                    # Check the new probability difference constraint
                    max_diff = abs(max(P_A1, P_A2) - max(P_B1, P_B2))
                    if max_diff <= 0.02:
                        continue

                    EV_A = P_A1 * O_A1 + P_A2 * O_A2
                    EV_B = P_B1 * O_B1 + P_B2 * O_B2
                    EV_diff = round(EV_A - EV_B, 2)

                    SD_A = np.sqrt(P_A1 * (O_A1 - EV_A)**2 + P_A2 * (O_A2 - EV_A)**2)
                    SD_B = np.sqrt(P_B1 * (O_B1 - EV_B)**2 + P_B2 * (O_B2 - EV_B)**2)
                    SD_diff = round(SD_A - SD_B, 2)

                    skew_A = calculate_skewness([P_A1, P_A2], [O_A1, O_A2])
                    skew_B = calculate_skewness([P_B1, P_B2], [O_B1, O_B2])
                    skew_diff = skew_A - skew_B

                    skew_valid = False
                    skew_abs_diff = abs(abs(skew_A) - abs(skew_B))

                    if skew == "right_vs_left":
                        if (skew_A > 1.5 and skew_A < 3 and skew_B < -1.5 and skew_B > -3) and skew_abs_diff < 0.5:
                            skew_valid = True
                    elif skew == "left_vs_right":
                        if (skew_A < -1.5 and skew_A > -3 and skew_B > 1.5 and skew_B < 3) and skew_abs_diff < 0.5:
                            skew_valid = True
                    elif skew == "no_skew_vs_no_skew":
                        if (-0.25 <= skew_A <= 0.25 and -0.25 <= skew_B <= 0.25) and skew_abs_diff < 0.5:
                            skew_valid = True

                    if skew_valid and \
                       abs(EV_diff - ev) <= 1 and \
                       abs(SD_diff - sd) <= 1:
                        data.append({
                            "P_A1": P_A1, "O_A1": O_A1, "P_A2": P_A2, "O_A2": O_A2,
                            "P_B1": P_B1, "O_B1": O_B1, "P_B2": P_B2, "O_B2": O_B2,
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, 'skew_diff': skew_diff,
                            "skew_level": skew,
                            "ev_level": ev,
                            "sd_level": sd
                        })
                        trials_found += 1

    return pd.DataFrame(data)

# Generate the lottery pairs excluding the specified levels and with the new constraint
lottery_pairs = generate_lottery_pairs(excluded_levels)

# Sort and reset index
lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'], ascending=[True, True, True])
lottery_pairs_sorted.reset_index(drop=True, inplace=True)

# Display the sorted dataframe
lottery_pairs_sorted

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
0,0.13,33,0.87,133,0.85,133,0.15,185,120.00,140.80,-20.80,33.630343,18.567714,15.06,-2.200394,1.960392,-4.160786,left_vs_right,-20,15
1,0.12,85,0.88,143,0.90,155,0.10,168,136.04,156.30,-20.26,18.847769,3.900000,14.95,-2.338738,2.666667,-5.005405,left_vs_right,-20,15
2,0.15,70,0.85,146,0.83,150,0.17,181,134.60,155.27,-20.67,27.137428,11.644617,15.49,-1.960392,1.757035,-3.717427,left_vs_right,-20,15
3,0.17,24,0.83,77,0.85,86,0.15,102,67.99,88.40,-20.41,19.908538,5.713143,14.20,-1.757035,1.960392,-3.717427,left_vs_right,-20,15
4,0.11,2,0.89,91,0.87,97,0.13,136,81.21,102.07,-20.86,27.847188,13.115834,14.73,-2.492891,2.200394,-4.693285,left_vs_right,-20,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,0.83,122,0.17,169,0.13,135,0.87,141,129.99,140.22,-10.23,17.654742,2.017821,15.64,1.757035,-2.200394,3.957429,right_vs_left,-10,15
176,0.82,74,0.18,152,0.15,61,0.85,105,88.04,98.40,-10.36,29.966621,15.711143,14.26,1.665853,-1.960392,3.626246,right_vs_left,-10,15
177,0.89,70,0.11,166,0.13,50,0.87,96,80.56,90.02,-9.46,30.037417,15.469958,14.57,2.492891,-2.200394,4.693285,right_vs_left,-10,15
178,0.83,102,0.17,150,0.15,116,0.85,122,110.16,121.10,-10.94,18.030374,2.142429,15.89,1.757035,-1.960392,3.717427,right_vs_left,-10,15


In [11]:

# Count rows for each combination of levels
level_counts = lottery_pairs_sorted.groupby(['skew_level', 'ev_level', 'sd_level']).size().reset_index(name='count')



# Display the filtered dataframe
level_counts



Unnamed: 0,skew_level,ev_level,sd_level,count
0,left_vs_right,-20,15,30
1,left_vs_right,-10,5,30
2,left_vs_right,-10,10,30
3,left_vs_right,10,15,30
4,right_vs_left,-20,15,30
5,right_vs_left,-10,15,30


In [4]:



from scipy.optimize import minimize
import random
random.seed(42)  # Set the random seed


# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 1000  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 100  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n


    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1 and abs(final_sd - original_sd) < 1 and abs(final_skew - original_skew) < 0.2:
            best_outcomes = optimized_outcomes.copy()
            best_probs = fixed_probs.copy()
            return best_outcomes, best_probs, {
            'original_ev': original_ev,
            'original_sd': original_sd,
            'original_skew': original_skew,
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return None, None, None

# Function to process each row of the DataFrame
def process_row(row):
    result_dict = {}
    # For Lottery A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])

    # Original statistics for Lottery A
    original_ev_A, original_sd_A, original_skew_A = lottery_stats(probs_A, outcomes_A)
    result_dict['simple_EVA'] = original_ev_A
    result_dict['simple_SDA'] = original_sd_A
    result_dict['simple_skewness_A'] = original_skew_A

    # Optimize Lottery A
    best_outcomes_A, best_probs_A, stats_A = optimize_lottery(probs_A, outcomes_A)

    if best_outcomes_A is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_A)):
            result_dict[f'complex_OA{i+1}'] = int(best_outcomes_A[i])  # Ensure integer outcomes
            result_dict[f'complex_PA{i+1}'] = best_probs_A[i]
        # Store new statistics
        result_dict['complex_EVA'] = stats_A['new_ev']
        result_dict['complex_SDA'] = stats_A['new_sd']
        result_dict['complex_skewness_A'] = stats_A['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OA{i+1}'] = np.nan
            result_dict[f'complex_PA{i+1}'] = np.nan
        result_dict['complex_EVA'] = np.nan
        result_dict['complex_SDA'] = np.nan
        result_dict['complex_skewness_A'] = np.nan

    # For Lottery B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])

    # Original statistics for Lottery B
    original_ev_B, original_sd_B, original_skew_B = lottery_stats(probs_B, outcomes_B)
    result_dict['simple_EVB'] = original_ev_B
    result_dict['simple_SDB'] = original_sd_B
    result_dict['simple_skewness_B'] = original_skew_B

    # Optimize Lottery B
    best_outcomes_B, best_probs_B, stats_B = optimize_lottery(probs_B, outcomes_B)

    if best_outcomes_B is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_B)):
            result_dict[f'complex_OB{i+1}'] = int(best_outcomes_B[i])  # Ensure integer outcomes
            result_dict[f'complex_PB{i+1}'] = best_probs_B[i]
        # Store new statistics
        result_dict['complex_EVB'] = stats_B['new_ev']
        result_dict['complex_SDB'] = stats_B['new_sd']
        result_dict['complex_skewness_B'] = stats_B['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OB{i+1}'] = np.nan
            result_dict[f'complex_PB{i+1}'] = np.nan
        result_dict['complex_EVB'] = np.nan
        result_dict['complex_SDB'] = np.nan
        result_dict['complex_skewness_B'] = np.nan

    # Differences between Lotteries A and B (Original)
    result_dict['simple_EVD'] = result_dict['simple_EVA'] - result_dict['simple_EVB']
    result_dict['simple_SDD'] = result_dict['simple_SDA'] - result_dict['simple_SDB']
    result_dict['simple_skewness_D'] = result_dict['simple_skewness_A'] - result_dict['simple_skewness_B']

    # Differences between Lotteries A and B (New)
    if best_outcomes_A is not None and best_outcomes_B is not None:
        result_dict['complex_EVD'] = result_dict['complex_EVA'] - result_dict['complex_EVB']
        result_dict['complex_SDD'] = result_dict['complex_SDA'] - result_dict['complex_SDB']
        result_dict['complex_skewness_D'] = result_dict['complex_skewness_A'] - result_dict['complex_skewness_B']
    else:
        result_dict['complex_EVD'] = np.nan
        result_dict['complex_SDD'] = np.nan
        result_dict['complex_skewness_D'] = np.nan

    return pd.Series(result_dict)

# Apply the process_row function to each row
new_columns = lottery_pairs_sorted.apply(process_row, axis=1)

# Concatenate the new columns to the original DataFrame
lottery_pairs_new = pd.concat([lottery_pairs_sorted, new_columns], axis=1)

# Convert outcome columns to nullable integer type
outcome_columns = [f'complex_OA{i+1}' for i in range(7)] + [f'complex_OB{i+1}' for i in range(7)]
lottery_pairs_new[outcome_columns] = lottery_pairs_new[outcome_columns].astype('Int64')





In [20]:
lottery_pairs_new

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.13,33,0.87,133,0.85,133,0.15,185,120.00,140.80,...,,,,,-20.80,15.062630,-4.160786,,,
1,0.12,85,0.88,143,0.90,155,0.10,168,136.04,156.30,...,,,,,-20.26,14.947769,-5.005405,,,
2,0.15,70,0.85,146,0.83,150,0.17,181,134.60,155.27,...,0.29,155.29,12.389750,1.627392,-20.67,15.492811,-3.717427,,,
3,0.17,24,0.83,77,0.85,86,0.15,102,67.99,88.40,...,0.17,88.18,6.594513,1.808186,-20.41,14.195396,-3.717427,-20.14,13.993794,-3.393066
4,0.11,2,0.89,91,0.87,97,0.13,136,81.21,102.07,...,,,,,-20.86,14.731354,-4.693285,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,0.83,122,0.17,169,0.13,135,0.87,141,129.99,140.22,...,,,,,-10.23,15.636921,3.957429,,,
176,0.82,74,0.18,152,0.15,61,0.85,105,88.04,98.40,...,0.03,98.49,16.385661,-1.855157,-10.36,14.255479,3.626246,,,
177,0.89,70,0.11,166,0.13,50,0.87,96,80.56,90.02,...,,,,,-9.46,14.567459,4.693285,,,
178,0.83,102,0.17,150,0.15,116,0.85,122,110.16,121.10,...,,,,,-10.94,15.887946,3.717427,,,


In [21]:
df_new = lottery_pairs_new.dropna()


# Filter rows based on conditions for A and B
filtered_df = df_new[
    (abs(df_new['simple_skewness_A'] - df_new['complex_skewness_A']) < 0.25) &
    (abs(df_new['simple_skewness_B'] - df_new['complex_skewness_B']) < 0.25) &
    (abs(np.maximum(df_new['P_A1'], df_new['P_A2']) -
         np.maximum(df_new['P_B1'], df_new['P_B2'])) > 0.015)
]

filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A1"]]
filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A2"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A1"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A2"]]






In [23]:
filtered_df.to_csv('study3_trials_new.csv', index=False)

In [16]:
# Count rows for each combination of levels
level_counts = filtered_df.groupby(['skew_level', 'ev_level', 'sd_level']).size().reset_index(name='count')



# Display the filtered dataframe
level_counts

Unnamed: 0,skew_level,ev_level,sd_level,count
0,left_vs_right,-20,15,2
1,left_vs_right,-10,5,9
2,left_vs_right,-10,10,2
3,right_vs_left,-20,15,1
4,right_vs_left,-10,15,5


In [14]:
random_trials = (
    filtered_df
    .groupby(['skew_level', 'ev_level', 'sd_level'], group_keys=False)
    .apply(lambda x: x.sample(n=1))
)
random_trials

  .apply(lambda x: x.sample(n=1))


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
3,0.17,24,0.83,77,0.85,86,0.15,102,67.99,88.4,...,0.17,88.18,6.594513,1.808186,-20.41,14.195396,-3.717427,-20.14,13.993794,-3.393066
52,0.19,95,0.81,128,0.85,129,0.15,150,121.73,132.15,...,0.2,131.82,8.426601,1.912868,-10.42,5.44743,-3.540812,-10.17,5.352641,-3.402701
76,0.18,70,0.82,122,0.84,118,0.16,143,112.64,122.0,...,0.25,122.11,9.915538,1.670003,-9.36,10.812596,-3.520706,-9.73,10.8757,-3.1719
142,0.81,33,0.19,111,0.14,31,0.86,74,47.82,67.98,...,0.06,67.8,15.679923,-1.918405,-20.16,15.679028,3.655425,-19.78,15.205018,3.34224
172,0.81,72,0.19,135,0.15,70,0.85,98,83.97,93.8,...,0.05,93.67,10.754585,-1.932643,-9.83,14.716957,3.540812,-9.61,14.523002,3.316078
