### now generate no_skew_vs_no_skew in good sd range like in rl and lr

In [378]:
import numpy as np
import pandas as pd
import random

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    if sigma_squared <= 0:
        return 0
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    return mu_3 / (sigma_squared**(3/2))

# Precompute thresholds for skewness to avoid recalculating in loops
def is_skew_valid(skew_A, skew_B, skew, skew_abs_diff):
    if skew == "right_vs_left":
        return 1.5 < skew_A < 3 and -3 < skew_B < -1.5 and skew_abs_diff < 0.5
    elif skew == "left_vs_right":
        return -3 < skew_A < -1.5 and 1.5 < skew_B < 3 and skew_abs_diff < 0.5
    elif skew == "no_skew_vs_no_skew":
        return -0.25 <= skew_A <= 0.25 and -0.25 <= skew_B <= 0.25 and skew_abs_diff < 0.5
    return False

# Generate lottery pairs
def generate_lottery_pairs():
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right", "no_skew_vs_no_skew"]

    data = []
    for skew in skew_levels:
        for ev in ev_levels:
            for sd in sd_levels:
                max_trials = 0 if skew in ["right_vs_left", "left_vs_right"] else 3
                trials_found = 0
                while trials_found < max_trials:
                    outcomes_A = sorted(random.sample(range(2, 200), 2))
                    probabilities_A = [round(random.uniform(0.02, 0.98), 2)]
                    probabilities_A.append(round(1 - probabilities_A[0], 2))

                    outcomes_B = sorted(random.sample(range(2, 200), 2))
                    probabilities_B = [round(random.uniform(0.02, 0.98), 2)]
                    probabilities_B.append(round(1 - probabilities_B[0], 2))

                    EV_A = np.dot(probabilities_A, outcomes_A)
                    EV_B = np.dot(probabilities_B, outcomes_B)
                    EV_diff = round(EV_A - EV_B, 2)

                    SD_A = np.sqrt(np.dot(probabilities_A, (np.array(outcomes_A) - EV_A)**2))
                    SD_B = np.sqrt(np.dot(probabilities_B, (np.array(outcomes_B) - EV_B)**2))
                    SD_diff = round(SD_A - SD_B, 2)

                    skew_A = calculate_skewness(probabilities_A, outcomes_A)
                    skew_B = calculate_skewness(probabilities_B, outcomes_B)
                    skew_abs_diff = abs(abs(skew_A) - abs(skew_B))

                    # Apply constraints for skewness, EV, SD, and standard deviation ranges
                    if is_skew_valid(skew_A, skew_B, skew, skew_abs_diff) and \
                       abs(EV_diff - ev) <= 1 and abs(SD_diff - sd) <= 1 and \
                       (skew == "no_skew_vs_no_skew" and 10 <= SD_A <= 25 and 3 <= SD_B <= 20) and \
                       ( abs(max(probabilities_A) - max(probabilities_B)) > 0.02) and \
                       ( max(probabilities_A) > 0.5):
                        data.append({
                            "P_A1": probabilities_A[0], "O_A1": outcomes_A[0],
                            "P_A2": probabilities_A[1], "O_A2": outcomes_A[1],
                            "P_B1": probabilities_B[0], "O_B1": outcomes_B[0],
                            "P_B2": probabilities_B[1], "O_B2": outcomes_B[1],
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, "skew_diff": skew_A - skew_B,
                            "skew_level": skew, "ev_level": ev, "sd_level": sd
                        })
                        trials_found += 1

    return pd.DataFrame(data)

# Generate the lottery pairs
lottery_pairs = generate_lottery_pairs()

# Sort and reset index
lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'])
lottery_pairs_sorted.reset_index(drop=True, inplace=True)

# Display the sorted dataframe
lottery_pairs_sorted

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
0,0.54,141,0.46,173,0.48,164,0.52,185,155.72,174.92,-19.2,15.948718,10.491597,5.46,0.160514,-0.080064,0.240579,no_skew_vs_no_skew,-20,5
1,0.49,50,0.51,100,0.47,75,0.53,115,75.5,96.2,-20.7,24.994999,19.963968,5.03,-0.040008,-0.120217,0.080209,no_skew_vs_no_skew,-20,5
2,0.45,78,0.55,108,0.49,104,0.51,124,94.5,114.2,-19.7,14.924812,9.998,4.93,-0.201008,-0.040008,-0.161,no_skew_vs_no_skew,-20,5
3,0.49,31,0.51,63,0.47,60,0.53,74,47.32,67.42,-20.1,15.9968,6.987389,9.01,-0.040008,-0.120217,0.080209,no_skew_vs_no_skew,-20,10
4,0.45,11,0.55,57,0.48,42,0.52,68,36.3,55.52,-19.22,22.884711,12.989596,9.9,-0.201008,-0.080064,-0.120943,no_skew_vs_no_skew,-20,10
5,0.45,46,0.55,88,0.53,79,0.47,101,69.1,89.34,-20.24,20.894736,10.980182,9.91,-0.201008,0.120217,-0.321224,no_skew_vs_no_skew,-20,10
6,0.49,128,0.51,174,0.44,162,0.56,179,151.46,171.52,-20.06,22.9954,8.438578,14.56,-0.040008,-0.241747,0.201739,no_skew_vs_no_skew,-20,15
7,0.46,72,0.54,114,0.52,109,0.48,119,94.68,113.8,-19.12,20.932692,4.995998,15.94,-0.160514,0.080064,-0.240579,no_skew_vs_no_skew,-20,15
8,0.45,46,0.55,87,0.51,82,0.49,94,68.55,87.88,-19.33,20.397242,5.9988,14.4,-0.201008,0.040008,-0.241016,no_skew_vs_no_skew,-20,15
9,0.48,102,0.52,140,0.56,119,0.44,146,121.76,130.88,-9.12,18.984794,13.402448,5.58,-0.080064,0.241747,-0.321811,no_skew_vs_no_skew,-10,5


In [379]:



from scipy.optimize import minimize
import random
random.seed(42)  # Set the random seed


# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 1000  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 100  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n


    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1 and abs(final_sd - original_sd) < 1 and abs(final_skew - original_skew) < 0.1:
            best_outcomes = optimized_outcomes.copy()
            best_probs = fixed_probs.copy()
            return best_outcomes, best_probs, {
            'original_ev': original_ev,
            'original_sd': original_sd,
            'original_skew': original_skew,
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return None, None, None

# Function to process each row of the DataFrame
def process_row(row):
    result_dict = {}
    # For Lottery A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])

    # Original statistics for Lottery A
    original_ev_A, original_sd_A, original_skew_A = lottery_stats(probs_A, outcomes_A)
    result_dict['simple_EVA'] = original_ev_A
    result_dict['simple_SDA'] = original_sd_A
    result_dict['simple_skewness_A'] = original_skew_A

    # Optimize Lottery A
    best_outcomes_A, best_probs_A, stats_A = optimize_lottery(probs_A, outcomes_A)

    if best_outcomes_A is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_A)):
            result_dict[f'complex_OA{i+1}'] = int(best_outcomes_A[i])  # Ensure integer outcomes
            result_dict[f'complex_PA{i+1}'] = best_probs_A[i]
        # Store new statistics
        result_dict['complex_EVA'] = stats_A['new_ev']
        result_dict['complex_SDA'] = stats_A['new_sd']
        result_dict['complex_skewness_A'] = stats_A['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OA{i+1}'] = np.nan
            result_dict[f'complex_PA{i+1}'] = np.nan
        result_dict['complex_EVA'] = np.nan
        result_dict['complex_SDA'] = np.nan
        result_dict['complex_skewness_A'] = np.nan

    # For Lottery B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])

    # Original statistics for Lottery B
    original_ev_B, original_sd_B, original_skew_B = lottery_stats(probs_B, outcomes_B)
    result_dict['simple_EVB'] = original_ev_B
    result_dict['simple_SDB'] = original_sd_B
    result_dict['simple_skewness_B'] = original_skew_B

    # Optimize Lottery B
    best_outcomes_B, best_probs_B, stats_B = optimize_lottery(probs_B, outcomes_B)

    if best_outcomes_B is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_B)):
            result_dict[f'complex_OB{i+1}'] = int(best_outcomes_B[i])  # Ensure integer outcomes
            result_dict[f'complex_PB{i+1}'] = best_probs_B[i]
        # Store new statistics
        result_dict['complex_EVB'] = stats_B['new_ev']
        result_dict['complex_SDB'] = stats_B['new_sd']
        result_dict['complex_skewness_B'] = stats_B['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OB{i+1}'] = np.nan
            result_dict[f'complex_PB{i+1}'] = np.nan
        result_dict['complex_EVB'] = np.nan
        result_dict['complex_SDB'] = np.nan
        result_dict['complex_skewness_B'] = np.nan

    # Differences between Lotteries A and B (Original)
    result_dict['simple_EVD'] = result_dict['simple_EVA'] - result_dict['simple_EVB']
    result_dict['simple_SDD'] = result_dict['simple_SDA'] - result_dict['simple_SDB']
    result_dict['simple_skewness_D'] = result_dict['simple_skewness_A'] - result_dict['simple_skewness_B']

    # Differences between Lotteries A and B (New)
    if best_outcomes_A is not None and best_outcomes_B is not None:
        result_dict['complex_EVD'] = result_dict['complex_EVA'] - result_dict['complex_EVB']
        result_dict['complex_SDD'] = result_dict['complex_SDA'] - result_dict['complex_SDB']
        result_dict['complex_skewness_D'] = result_dict['complex_skewness_A'] - result_dict['complex_skewness_B']
    else:
        result_dict['complex_EVD'] = np.nan
        result_dict['complex_SDD'] = np.nan
        result_dict['complex_skewness_D'] = np.nan

    return pd.Series(result_dict)

# Apply the process_row function to each row
new_columns = lottery_pairs_sorted.apply(process_row, axis=1)

# Concatenate the new columns to the original DataFrame
lottery_pairs_new = pd.concat([lottery_pairs_sorted, new_columns], axis=1)

# Convert outcome columns to nullable integer type
outcome_columns = [f'complex_OA{i+1}' for i in range(7)] + [f'complex_OB{i+1}' for i in range(7)]
lottery_pairs_new[outcome_columns] = lottery_pairs_new[outcome_columns].astype('Int64')





In [380]:
lottery_pairs_new.dropna().to_csv('study3_trials_raw_ns.csv', index=False)

In [381]:
lottery_pairs_new_ns = pd.read_csv('study3_trials_raw_ns.csv')

In [382]:


df_selected = lottery_pairs_new.dropna()

df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)

df_selected_before = df_selected_rounded.sort_values(by='skew_level', ascending=True).dropna()

# Filter rows based on conditions for A and B
filtered_df = df_selected_before[
    (abs(df_selected_before['simple_skewness_A'] - df_selected_before['complex_skewness_A']) < 0.25) &
    (abs(df_selected_before['simple_skewness_B'] - df_selected_before['complex_skewness_B']) < 0.25) &
    (abs(df_selected_before['complex_skewness_A']) < 0.3) &
    (abs(df_selected_before['complex_skewness_B']) < 0.3) &
    (abs(np.maximum(df_selected_before['P_A1'], df_selected_before['P_A2']) -
         np.maximum(df_selected_before['P_B1'], df_selected_before['P_B2'])) > 0.015)
]

filtered_df = filtered_df[filtered_df["P_A1"] != 0.5]
filtered_df = filtered_df[filtered_df["P_B1"] != 0.5]
filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A1"]]
filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A2"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A1"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A2"]]
filtered_df = filtered_df[abs(filtered_df["simple_skewness_D"]) < 0.5]
filtered_df = filtered_df[abs(filtered_df["complex_skewness_D"]) < 0.5]
filtered_df

  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.54,141,0.46,173,0.48,164,0.52,185,155.72,174.92,...,0.22,175.05,11.14,-0.01,-19.2,5.46,0.24,-19.45,5.73,0.2
24,0.44,146,0.56,187,0.51,164,0.49,176,168.96,169.88,...,0.19,169.46,6.99,0.14,-0.92,14.35,-0.28,-0.57,14.03,-0.31
25,0.47,52,0.53,97,0.55,69,0.45,85,75.85,76.2,...,0.06,76.42,8.92,0.15,-0.35,14.5,-0.32,-0.59,14.11,-0.19
26,0.46,14,0.54,62,0.52,32,0.48,50,39.92,40.64,...,0.18,40.29,9.77,0.12,-0.72,14.93,-0.24,-0.45,14.46,-0.33
27,0.46,143,0.54,179,0.44,137,0.56,163,162.44,151.56,...,0.13,151.63,13.86,-0.18,10.88,5.04,0.08,10.97,4.87,0.1
28,0.54,32,0.46,62,0.56,27,0.44,47,45.8,35.8,...,0.2,35.73,10.72,0.17,10.0,5.02,-0.08,9.87,5.07,-0.08
30,0.49,136,0.51,171,0.45,137,0.55,151,153.85,144.7,...,0.05,144.66,7.94,-0.29,9.15,10.53,0.16,9.15,9.99,0.23
31,0.48,66,0.52,109,0.54,67,0.46,90,88.36,77.58,...,0.11,77.49,12.21,0.16,10.78,10.02,-0.24,10.82,9.81,-0.15
34,0.47,74,0.53,117,0.51,80,0.49,94,96.79,86.86,...,0.24,86.84,7.89,0.1,9.93,14.46,-0.16,10.07,14.22,-0.31
35,0.45,109,0.55,147,0.48,116,0.52,124,129.9,120.16,...,0.22,120.22,4.89,-0.03,9.74,14.91,-0.12,9.79,14.69,-0.09


In [383]:


random_trials = (
    filtered_df
    .groupby(['skew_level', 'ev_level', 'sd_level'], group_keys=False)
    .apply(lambda x: x.sample(n=1))
)


random_trials

  .apply(lambda x: x.sample(n=1))


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.54,141,0.46,173,0.48,164,0.52,185,155.72,174.92,...,0.22,175.05,11.14,-0.01,-19.2,5.46,0.24,-19.45,5.73,0.2
4,0.45,11,0.55,57,0.48,42,0.52,68,36.3,55.52,...,0.18,55.37,13.76,-0.06,-19.22,9.9,-0.12,-18.76,9.23,-0.05
6,0.49,128,0.51,174,0.44,162,0.56,179,151.46,171.52,...,0.18,171.34,9.18,-0.22,-20.06,14.56,0.2,-19.74,14.65,0.13
9,0.48,102,0.52,140,0.56,119,0.44,146,121.76,130.88,...,0.15,130.82,14.24,0.2,-9.12,5.58,-0.32,-8.89,5.48,-0.28
13,0.52,19,0.48,55,0.55,38,0.45,56,36.28,46.1,...,0.08,46.01,9.75,0.13,-9.82,9.03,-0.12,-9.65,9.09,0.02
16,0.44,119,0.56,166,0.54,148,0.46,163,145.32,154.9,...,0.25,154.79,8.15,0.08,-9.58,15.85,-0.4,-9.41,15.99,-0.3
20,0.48,67,0.52,100,0.46,72,0.54,95,84.16,84.42,...,0.18,84.53,12.23,-0.19,-0.26,5.02,0.08,-0.33,4.98,0.16
22,0.45,93,0.55,130,0.49,105,0.51,120,113.35,112.65,...,0.13,112.61,8.45,-0.13,0.7,10.91,-0.16,0.6,10.72,-0.17
24,0.44,146,0.56,187,0.51,164,0.49,176,168.96,169.88,...,0.19,169.46,6.99,0.14,-0.92,14.35,-0.28,-0.57,14.03,-0.31
27,0.46,143,0.54,179,0.44,137,0.56,163,162.44,151.56,...,0.13,151.63,13.86,-0.18,10.88,5.04,0.08,10.97,4.87,0.1


In [384]:
random_trials

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.54,141,0.46,173,0.48,164,0.52,185,155.72,174.92,...,0.22,175.05,11.14,-0.01,-19.2,5.46,0.24,-19.45,5.73,0.2
4,0.45,11,0.55,57,0.48,42,0.52,68,36.3,55.52,...,0.18,55.37,13.76,-0.06,-19.22,9.9,-0.12,-18.76,9.23,-0.05
6,0.49,128,0.51,174,0.44,162,0.56,179,151.46,171.52,...,0.18,171.34,9.18,-0.22,-20.06,14.56,0.2,-19.74,14.65,0.13
9,0.48,102,0.52,140,0.56,119,0.44,146,121.76,130.88,...,0.15,130.82,14.24,0.2,-9.12,5.58,-0.32,-8.89,5.48,-0.28
13,0.52,19,0.48,55,0.55,38,0.45,56,36.28,46.1,...,0.08,46.01,9.75,0.13,-9.82,9.03,-0.12,-9.65,9.09,0.02
16,0.44,119,0.56,166,0.54,148,0.46,163,145.32,154.9,...,0.25,154.79,8.15,0.08,-9.58,15.85,-0.4,-9.41,15.99,-0.3
20,0.48,67,0.52,100,0.46,72,0.54,95,84.16,84.42,...,0.18,84.53,12.23,-0.19,-0.26,5.02,0.08,-0.33,4.98,0.16
22,0.45,93,0.55,130,0.49,105,0.51,120,113.35,112.65,...,0.13,112.61,8.45,-0.13,0.7,10.91,-0.16,0.6,10.72,-0.17
24,0.44,146,0.56,187,0.51,164,0.49,176,168.96,169.88,...,0.19,169.46,6.99,0.14,-0.92,14.35,-0.28,-0.57,14.03,-0.31
27,0.46,143,0.54,179,0.44,137,0.56,163,162.44,151.56,...,0.13,151.63,13.86,-0.18,10.88,5.04,0.08,10.97,4.87,0.1


In [385]:
df_lr_rl = pd.read_csv('study3_trials_final_no_ns.csv')
df_lr_rl

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.82,131,0.18,96,0.15,163,0.85,142,124.7,145.15,...,0.28,144.92,8.294191,1.760486,-20.45,5.948061,-3.626246,-20.16,5.903781,-3.332272
1,0.84,96,0.16,43,0.14,132,0.86,103,87.52,107.06,...,0.25,107.17,10.849935,2.044513,-19.54,9.367497,-3.929858,-19.59,9.207073,-3.659274
2,0.85,100,0.15,40,0.12,127,0.88,109,91.0,111.16,...,0.28,111.32,6.837953,2.290897,-20.16,15.574978,-4.29913,-20.38,15.301523,-4.070624
3,0.85,157,0.15,126,0.17,173,0.83,159,152.35,161.38,...,0.24,161.45,6.179604,1.695312,-9.03,5.810355,-3.717427,-8.88,5.764646,-3.520548
4,0.85,92,0.15,40,0.2,111,0.8,90,84.2,94.2,...,0.11,94.14,9.3156,1.30269,-10.0,10.167714,-3.460392,-9.91,10.075045,-3.131868
5,0.85,114,0.15,49,0.2,131,0.8,110,104.25,114.2,...,0.24,114.24,9.300667,1.50481,-9.95,14.809642,-3.460392,-10.12,14.340941,-3.376895
6,0.82,167,0.18,127,0.16,184,0.84,155,159.8,159.64,...,0.3,159.47,11.448541,1.704134,0.16,4.735923,-3.520706,0.2,4.457467,-3.220631
7,0.83,104,0.17,57,0.14,115,0.86,93,96.01,96.08,...,0.2,95.92,8.375775,1.826096,-0.07,10.021027,-3.832041,0.06,9.92722,-3.366588
8,0.85,190,0.15,138,0.12,193,0.88,180,182.2,181.56,...,0.25,181.21,4.585401,2.225802,0.64,14.343214,-4.29913,0.74,14.548012,-3.961913
9,0.88,136,0.12,101,0.1,144,0.9,120,131.8,122.4,...,0.36,122.73,8.101673,2.669114,9.4,4.173654,-5.005405,8.98,4.223338,-4.779732


In [388]:


merged_df_version3 = pd.concat([df_lr_rl, random_trials], ignore_index=True)
merged_df_version3.to_csv('study3_trials_final_version3.csv', index=False)

In [387]:

# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    random_trials
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,no_skew_vs_no_skew,116.323333,116.212,19.175333,9.064


In [363]:
import numpy as np
import pandas as pd
dat_0 = pd.read_csv('study3_trials_raw.csv')
df_2 = pd.DataFrame(dat_0)
dat_1 = pd.read_csv('study3_trials_new.csv')
dat_new = pd.DataFrame(dat_1)

df_na = pd.concat([df_2, dat_new], ignore_index=True)
df_na

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.11,100,0.89,133,0.86,148,0.14,162,129.37,149.96,...,0.23,150.00,5.824088,1.941352,-20.59,5.467544,-4.567896,-20.48,5.457295,-4.197162
1,0.15,113,0.85,140,0.81,154,0.19,166,135.95,156.28,...,0.22,156.54,5.673482,1.638248,-20.33,4.933318,-3.540812,-20.57,4.955210,-3.348683
2,0.13,128,0.87,160,0.84,174,0.16,187,155.84,176.08,...,0.19,175.93,5.734553,1.954150,-20.24,5.995831,-4.055246,-20.33,5.976126,-3.918112
3,0.17,90,0.83,124,0.80,135,0.20,152,118.22,138.40,...,0.11,138.36,7.698727,1.501406,-20.18,5.971515,-3.257035,-20.16,5.762328,-3.146273
4,0.19,44,0.81,83,0.82,92,0.18,117,75.59,96.50,...,0.21,96.34,10.533964,1.533275,-20.91,5.695049,-3.246273,-20.79,5.468770,-2.931367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,0.87,70,0.13,145,0.15,64,0.85,94,79.75,89.50,...,0.03,89.71,11.651863,-1.844871,-9.75,14.510615,4.160786,-9.74,13.929558,3.906443
258,0.85,17,0.15,74,0.13,23,0.87,37,25.55,35.18,...,0.03,35.16,5.696876,-2.085050,-9.63,15.644823,4.160786,-9.54,15.187941,3.961510
259,0.82,20,0.18,82,0.15,19,0.85,44,31.16,40.25,...,0.03,40.22,9.703175,-1.842827,-9.09,14.892837,3.626246,-8.96,14.461932,3.369006
260,0.81,72,0.19,135,0.15,70,0.85,98,83.97,93.80,...,0.05,93.67,10.754585,-1.932643,-9.83,14.716957,3.540812,-9.61,14.523002,3.316078


In [364]:

# Filter rows based on conditions for A and B
filtered_df = df_na[
    (abs(df_na['simple_skewness_A'] - df_na['complex_skewness_A']) < 0.25) &
    (abs(df_na['simple_skewness_B'] - df_na['complex_skewness_B']) < 0.25) &
    (abs(np.maximum(df_na['P_A1'], df_na['P_A2']) -
         np.maximum(df_na['P_B1'], df_na['P_B2'])) > 0.015)
]

filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A1"]]
filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A2"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A1"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A2"]]

filtered_df = filtered_df[filtered_df["skew_level"] != 'no_skew_vs_no_skew']



In [365]:
# Count rows for each combination of levels
level_counts = filtered_df.groupby(['skew_level', 'ev_level', 'sd_level']).size().reset_index(name='count')



# Display the filtered dataframe
level_counts

Unnamed: 0,skew_level,ev_level,sd_level,count
0,left_vs_right,-20,5,5
1,left_vs_right,-20,10,4
2,left_vs_right,-20,15,2
3,left_vs_right,-10,5,12
4,left_vs_right,-10,10,2
5,left_vs_right,-10,15,3
6,left_vs_right,0,5,4
7,left_vs_right,0,10,3
8,left_vs_right,0,15,1
9,left_vs_right,10,5,8


In [366]:

random_trials = (
    filtered_df
    .groupby(['skew_level', 'ev_level', 'sd_level'], group_keys=False)
    .apply(lambda x: x.sample(n=1))
)

random_trials


  .apply(lambda x: x.sample(n=1))


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
5,0.18,96,0.82,131,0.85,142,0.15,163,124.7,145.15,...,0.28,144.92,8.294191,1.760486,-20.45,5.948061,-3.626246,-20.16,5.903781,-3.332272
11,0.16,43,0.84,96,0.86,103,0.14,132,87.52,107.06,...,0.25,107.17,10.849935,2.044513,-19.54,9.367497,-3.929858,-19.59,9.207073,-3.659274
244,0.15,40,0.85,100,0.88,109,0.12,127,91.0,111.16,...,0.28,111.32,6.837953,2.290897,-20.16,15.574978,-4.29913,-20.38,15.301523,-4.070624
14,0.15,126,0.85,157,0.83,159,0.17,173,152.35,161.38,...,0.24,161.45,6.179604,1.695312,-9.03,5.810355,-3.717427,-8.88,5.764646,-3.520548
255,0.15,40,0.85,92,0.8,90,0.2,111,84.2,94.2,...,0.11,94.14,9.3156,1.30269,-10.0,10.167714,-3.460392,-9.91,10.075045,-3.131868
26,0.15,49,0.85,114,0.8,110,0.2,131,104.25,114.2,...,0.24,114.24,9.300667,1.50481,-9.95,14.809642,-3.460392,-10.12,14.340941,-3.376895
38,0.18,127,0.82,167,0.84,155,0.16,184,159.8,159.64,...,0.3,159.47,11.448541,1.704134,0.16,4.735923,-3.520706,0.2,4.457467,-3.220631
40,0.17,57,0.83,104,0.86,93,0.14,115,96.01,96.08,...,0.2,95.92,8.375775,1.826096,-0.07,10.021027,-3.832041,0.06,9.92722,-3.366588
44,0.15,138,0.85,190,0.88,180,0.12,193,182.2,181.56,...,0.25,181.21,4.585401,2.225802,0.64,14.343214,-4.29913,0.74,14.548012,-3.961913
51,0.12,101,0.88,136,0.9,120,0.1,144,131.8,122.4,...,0.36,122.73,8.101673,2.669114,9.4,4.173654,-5.005405,8.98,4.223338,-4.779732


In [367]:

# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    random_trials
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,left_vs_right,125.762,125.779333,17.677962,7.673979
1,right_vs_left,72.715333,72.704667,17.813756,7.744212


In [368]:



# Function to ensure greater outcomes are in O_*1 and associated probabilities are swapped accordingly
def swap_values(row):
    # For A
    if row["O_A1"] < row["O_A2"]:
        row["O_A1"], row["O_A2"] = row["O_A2"], row["O_A1"]
        row["P_A1"], row["P_A2"] = row["P_A2"], row["P_A1"]
    # For B
    if row["O_B1"] < row["O_B2"]:
        row["O_B1"], row["O_B2"] = row["O_B2"], row["O_B1"]
        row["P_B1"], row["P_B2"] = row["P_B2"], row["P_B1"]
    return row

# Apply the function to the dataframe
random_trials = random_trials.apply(swap_values, axis=1)
random_trials




Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
5,0.82,131,0.18,96,0.15,163,0.85,142,124.7,145.15,...,0.28,144.92,8.294191,1.760486,-20.45,5.948061,-3.626246,-20.16,5.903781,-3.332272
11,0.84,96,0.16,43,0.14,132,0.86,103,87.52,107.06,...,0.25,107.17,10.849935,2.044513,-19.54,9.367497,-3.929858,-19.59,9.207073,-3.659274
244,0.85,100,0.15,40,0.12,127,0.88,109,91.0,111.16,...,0.28,111.32,6.837953,2.290897,-20.16,15.574978,-4.29913,-20.38,15.301523,-4.070624
14,0.85,157,0.15,126,0.17,173,0.83,159,152.35,161.38,...,0.24,161.45,6.179604,1.695312,-9.03,5.810355,-3.717427,-8.88,5.764646,-3.520548
255,0.85,92,0.15,40,0.2,111,0.8,90,84.2,94.2,...,0.11,94.14,9.3156,1.30269,-10.0,10.167714,-3.460392,-9.91,10.075045,-3.131868
26,0.85,114,0.15,49,0.2,131,0.8,110,104.25,114.2,...,0.24,114.24,9.300667,1.50481,-9.95,14.809642,-3.460392,-10.12,14.340941,-3.376895
38,0.82,167,0.18,127,0.16,184,0.84,155,159.8,159.64,...,0.3,159.47,11.448541,1.704134,0.16,4.735923,-3.520706,0.2,4.457467,-3.220631
40,0.83,104,0.17,57,0.14,115,0.86,93,96.01,96.08,...,0.2,95.92,8.375775,1.826096,-0.07,10.021027,-3.832041,0.06,9.92722,-3.366588
44,0.85,190,0.15,138,0.12,193,0.88,180,182.2,181.56,...,0.25,181.21,4.585401,2.225802,0.64,14.343214,-4.29913,0.74,14.548012,-3.961913
51,0.88,136,0.12,101,0.1,144,0.9,120,131.8,122.4,...,0.36,122.73,8.101673,2.669114,9.4,4.173654,-5.005405,8.98,4.223338,-4.779732


In [370]:

merged_df = random_trials

# Sort and reset index
merged_df = merged_df.sort_values(by=['skew_level', 'ev_level', 'sd_level'])
merged_df.reset_index(drop=True, inplace=True)


merged_df



Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.82,131,0.18,96,0.15,163,0.85,142,124.7,145.15,...,0.28,144.92,8.294191,1.760486,-20.45,5.948061,-3.626246,-20.16,5.903781,-3.332272
1,0.84,96,0.16,43,0.14,132,0.86,103,87.52,107.06,...,0.25,107.17,10.849935,2.044513,-19.54,9.367497,-3.929858,-19.59,9.207073,-3.659274
2,0.85,100,0.15,40,0.12,127,0.88,109,91.0,111.16,...,0.28,111.32,6.837953,2.290897,-20.16,15.574978,-4.29913,-20.38,15.301523,-4.070624
3,0.85,157,0.15,126,0.17,173,0.83,159,152.35,161.38,...,0.24,161.45,6.179604,1.695312,-9.03,5.810355,-3.717427,-8.88,5.764646,-3.520548
4,0.85,92,0.15,40,0.2,111,0.8,90,84.2,94.2,...,0.11,94.14,9.3156,1.30269,-10.0,10.167714,-3.460392,-9.91,10.075045,-3.131868
5,0.85,114,0.15,49,0.2,131,0.8,110,104.25,114.2,...,0.24,114.24,9.300667,1.50481,-9.95,14.809642,-3.460392,-10.12,14.340941,-3.376895
6,0.82,167,0.18,127,0.16,184,0.84,155,159.8,159.64,...,0.3,159.47,11.448541,1.704134,0.16,4.735923,-3.520706,0.2,4.457467,-3.220631
7,0.83,104,0.17,57,0.14,115,0.86,93,96.01,96.08,...,0.2,95.92,8.375775,1.826096,-0.07,10.021027,-3.832041,0.06,9.92722,-3.366588
8,0.85,190,0.15,138,0.12,193,0.88,180,182.2,181.56,...,0.25,181.21,4.585401,2.225802,0.64,14.343214,-4.29913,0.74,14.548012,-3.961913
9,0.88,136,0.12,101,0.1,144,0.9,120,131.8,122.4,...,0.36,122.73,8.101673,2.669114,9.4,4.173654,-5.005405,8.98,4.223338,-4.779732


In [371]:
merged_df.to_csv('study3_trials_final_no_ns.csv', index=False)


In [372]:
dat_ns = pd.read_csv('study3_trials_final_ns.csv')
df_ns = pd.DataFrame(dat_ns)
df_ns

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.53,104,0.47,135,0.54,129,0.46,148,118.57,137.74,...,0.11,137.86,10.34,-0.07,-19.17,6.0,-0.04,-19.4,5.82,0.26
1,0.45,116,0.55,159,0.47,147,0.53,169,139.65,158.66,...,0.22,158.69,11.92,-0.19,-19.01,10.41,-0.08,-18.88,10.05,-0.12
2,0.53,43,0.47,87,0.49,78,0.51,90,63.68,84.12,...,0.2,84.14,6.86,0.08,-20.44,15.96,0.16,-20.56,15.79,0.01
3,0.53,73,0.47,107,0.55,87,0.45,113,88.98,98.7,...,0.24,98.85,13.61,0.19,-9.72,4.03,-0.08,-9.74,4.17,-0.24
4,0.56,90,0.44,125,0.48,109,0.52,123,105.4,116.28,...,0.21,116.23,7.59,-0.02,-10.88,10.38,0.32,-10.72,10.56,0.21
5,0.46,60,0.54,100,0.45,86,0.55,97,81.6,92.05,...,0.18,92.06,6.45,-0.44,-10.45,14.46,0.04,-10.51,14.07,0.08
6,0.55,117,0.45,149,0.52,120,0.48,142,131.4,130.56,...,0.2,130.63,11.91,0.05,0.84,4.93,0.12,0.8,4.51,0.13
7,0.53,28,0.47,78,0.51,38,0.49,66,51.5,51.72,...,0.15,51.55,14.78,0.07,-0.22,10.96,0.08,0.0,10.6,0.28
8,0.48,144,0.52,183,0.45,160,0.55,168,164.28,164.4,...,0.16,164.12,4.95,-0.13,-0.12,15.5,0.12,-0.15,15.15,-0.01
9,0.46,145,0.54,176,0.56,142,0.44,163,161.74,151.24,...,0.2,151.31,11.17,0.14,10.5,5.03,-0.4,10.32,5.08,-0.46


In [377]:
# Filter rows based on conditions for A and B
filtered_df_ns = df_ns[
    (abs(df_ns['simple_skewness_A'] - df_ns['complex_skewness_A']) < 0.25) &
    (abs(df_ns['simple_skewness_B'] - df_ns['complex_skewness_B']) < 0.25) &
    (abs(np.maximum(df_ns['P_A1'], df_ns['P_A2']) -
         np.maximum(df_ns['P_B1'], df_ns['P_B2'])) > 0.015)
]




# Count rows for each combination of levels
level_counts = filtered_df_ns.groupby(['skew_level', 'ev_level', 'sd_level']).size().reset_index(name='count')



# Display the filtered dataframe
level_counts


Unnamed: 0,skew_level,ev_level,sd_level,count
0,no_skew_vs_no_skew,-20,10,1
1,no_skew_vs_no_skew,-20,15,1
2,no_skew_vs_no_skew,-10,5,1
3,no_skew_vs_no_skew,-10,10,1
4,no_skew_vs_no_skew,0,5,1
5,no_skew_vs_no_skew,0,10,1
6,no_skew_vs_no_skew,0,15,1
7,no_skew_vs_no_skew,10,5,1
8,no_skew_vs_no_skew,10,15,1
9,no_skew_vs_no_skew,20,10,1


In [375]:
df_ns_sorted = filtered_df_ns.apply(swap_values, axis=1)

# Sort and reset index
df_ns_sorted = df_ns_sorted.sort_values(by=['skew_level', 'ev_level', 'sd_level'])
df_ns_sorted.reset_index(drop=True, inplace=True)
df_ns_sorted


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.55,159,0.45,116,0.53,169,0.47,147,139.65,158.66,...,0.22,158.69,11.92,-0.19,-19.01,10.41,-0.08,-18.88,10.05,-0.12
1,0.47,87,0.53,43,0.51,90,0.49,78,63.68,84.12,...,0.2,84.14,6.86,0.08,-20.44,15.96,0.16,-20.56,15.79,0.01
2,0.47,107,0.53,73,0.45,113,0.55,87,88.98,98.7,...,0.24,98.85,13.61,0.19,-9.72,4.03,-0.08,-9.74,4.17,-0.24
3,0.44,125,0.56,90,0.52,123,0.48,109,105.4,116.28,...,0.21,116.23,7.59,-0.02,-10.88,10.38,0.32,-10.72,10.56,0.21
4,0.45,149,0.55,117,0.48,142,0.52,120,131.4,130.56,...,0.2,130.63,11.91,0.05,0.84,4.93,0.12,0.8,4.51,0.13
5,0.47,78,0.53,28,0.49,66,0.51,38,51.5,51.72,...,0.15,51.55,14.78,0.07,-0.22,10.96,0.08,0.0,10.6,0.28
6,0.52,183,0.48,144,0.55,168,0.45,160,164.28,164.4,...,0.16,164.12,4.95,-0.13,-0.12,15.5,0.12,-0.15,15.15,-0.01
7,0.54,176,0.46,145,0.44,163,0.56,142,161.74,151.24,...,0.2,151.31,11.17,0.14,10.5,5.03,-0.4,10.32,5.08,-0.46
8,0.54,176,0.46,130,0.49,152,0.51,138,154.84,144.86,...,0.18,144.69,7.75,0.15,9.98,15.93,-0.2,10.23,15.94,-0.31
9,0.44,86,0.56,53,0.46,56,0.54,42,67.52,48.44,...,0.21,48.36,7.77,0.08,19.08,9.4,0.08,19.01,9.45,0.3


In [294]:
merged_df_version2_final.to_csv('study3_trials_final_version2.csv', index=False)

In [436]:
dat = pd.read_csv('study3_trials_final_version3.csv')
df = pd.DataFrame(dat)


final_df_sorted_test = df

In [437]:
# Combine all columns into a single tuple per row
final_df_sorted_test['combined'] = final_df_sorted_test[['P_A1', 'O_A1', 'P_A2', 'O_A2', 'P_B1', 'O_B1', 'P_B2', 'O_B2']].apply(tuple, axis=1)

# Check for duplicates
duplicates_exist = final_df_sorted_test['combined'].duplicated(keep=False).any()

if duplicates_exist:
    print("There are duplicate rows in the dataset.")
    # Optionally, show the duplicate rows
    duplicate_rows = final_df_sorted_test[final_df_sorted_test['combined'].duplicated(keep=False)]
    print("Duplicate rows:")
    print(duplicate_rows)
else:
    print("No duplicate rows found in the dataset.")



No duplicate rows found in the dataset.


In [438]:
# Function to ensure greater outcomes are in O_*1 and associated probabilities are swapped accordingly
def swap_values(row):
    # For A
    if row["O_A1"] < row["O_A2"]:
        row["O_A1"], row["O_A2"] = row["O_A2"], row["O_A1"]
        row["P_A1"], row["P_A2"] = row["P_A2"], row["P_A1"]
    # For B
    if row["O_B1"] < row["O_B2"]:
        row["O_B1"], row["O_B2"] = row["O_B2"], row["O_B1"]
        row["P_B1"], row["P_B2"] = row["P_B2"], row["P_B1"]
    return row

# Apply the function to the dataframe
final_df_sorted_test = final_df_sorted_test.apply(swap_values, axis=1)

In [439]:


# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    final_df_sorted_test
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,left_vs_right,125.762,125.779333,17.677962,7.673979
1,no_skew_vs_no_skew,116.323333,116.212,19.175333,9.064
2,right_vs_left,72.715333,72.704667,17.813756,7.744212


In [451]:
# Split the dataframe
filtered_final_df_sorted_test_lr = final_df_sorted_test[final_df_sorted_test["skew_level"].isin(["left_vs_right"])]
filtered_final_df_sorted_test_ns = final_df_sorted_test[final_df_sorted_test["skew_level"].isin(["no_skew_vs_no_skew"])]
filtered_final_df_sorted_test_rl = final_df_sorted_test[final_df_sorted_test["skew_level"].isin(["right_vs_left"])]

# Function to adjust outcomes and additional columns
def adjust_outcomes_lr(row):
    # Combine all columns to check in one list
    columns_to_check = ["O_A1", "O_A2", "O_B1", "O_B2"] + \
                       [f"complex_OA{i}" for i in range(1, 8)] + \
                       [f"complex_OB{i}" for i in range(1, 8)]
    
    # Check if all values in the specified columns are greater than 25
    if all(row[col] > 80 for col in columns_to_check):
        # Subtract the same value (e.g., 20) from all these columns
        for col in columns_to_check:
            row[col] -= 77
    return row

# Function to adjust outcomes and additional columns
def adjust_outcomes_ns(row):
    # Combine all columns to check in one list
    columns_to_check = ["O_A1", "O_A2", "O_B1", "O_B2"] + \
                       [f"complex_OA{i}" for i in range(1, 8)] + \
                       [f"complex_OB{i}" for i in range(1, 8)]
    
    # Check if all values in the specified columns are greater than 25
    if all(row[col] > 40 for col in columns_to_check):
        # Subtract the same value (e.g., 20) from all these columns
        for col in columns_to_check:
            row[col] -= 37
    return row

# Function to adjust outcomes and additional columns
def adjust_outcomes_rl(row):
    # Combine all columns to check in one list
    columns_to_check = ["O_A1", "O_A2", "O_B1", "O_B2"] + \
                       [f"complex_OA{i}" for i in range(1, 8)] + \
                       [f"complex_OB{i}" for i in range(1, 8)]
    
    # Check if all values in the specified columns are greater than 25
    if all(row[col] < 150 for col in columns_to_check):
        # Subtract the same value (e.g., 20) from all these columns
        for col in columns_to_check:
            row[col] += 30
    return row

# Apply the function to adjust filtered dataframe
filtered_final_df_sorted_test_lr = filtered_final_df_sorted_test_lr.apply(adjust_outcomes_lr, axis=1)
filtered_final_df_sorted_test_ns = filtered_final_df_sorted_test_ns.apply(adjust_outcomes_ns, axis=1)
filtered_final_df_sorted_test_rl = filtered_final_df_sorted_test_rl.apply(adjust_outcomes_rl, axis=1)

# Merge the two dataframes back together
result_df = pd.concat([filtered_final_df_sorted_test_lr, filtered_final_df_sorted_test_ns, filtered_final_df_sorted_test_rl], ignore_index=True)


In [452]:

                                                        



# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    if sigma_squared <= 0:
        return 0
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    return mu_3 / (sigma_squared**(3/2))

# Function to calculate EV, SD, and skewness for a single option
def calculate_metrics(p1, o1, p2, o2):
    probabilities = [p1, p2]
    outcomes = [o1, o2]
    # Expected value
    ev = np.sum(np.array(probabilities) * np.array(outcomes))
    # Standard deviation
    sigma_squared = np.sum((np.array(outcomes)**2) * np.array(probabilities)) - ev**2
    sd = np.sqrt(sigma_squared) if sigma_squared > 0 else 0
    # Skewness
    skew = calculate_skewness(probabilities, outcomes)
    return ev, sd, skew



# Apply the function to calculate all metrics for both options
result_df["EV_A"], result_df["SD_A"], result_df["skew_A"] = zip(*result_df.apply(lambda row: calculate_metrics(row["P_A1"], row["O_A1"], row["P_A2"], row["O_A2"]), axis=1))
result_df["EV_B"], result_df["SD_B"], result_df["skew_B"] = zip(*result_df.apply(lambda row: calculate_metrics(row["P_B1"], row["O_B1"], row["P_B2"], row["O_B2"]), axis=1))

# Calculate differences
result_df["EV_diff"] = result_df["EV_A"] - result_df["EV_B"]
result_df["SD_diff"] = result_df["SD_A"] - result_df["SD_B"]
result_df["skew_diff"] = result_df["skew_A"] - result_df["skew_B"]
result_df["simple_EVB"] = result_df["EV_B"]
result_df["simple_SDB"] = result_df["SD_B"]
result_df["simple_skewness_B"] = result_df["skew_B"]
result_df["simple_skewness_D"] = result_df["skew_diff"]


# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    result_df
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,left_vs_right,89.828667,89.846,17.677962,7.673979
1,no_skew_vs_no_skew,89.19,89.078667,19.175326,9.063782
2,right_vs_left,90.715333,90.704667,17.813756,7.744212


In [453]:
result_df

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D,combined
0,0.82,54,0.18,19,0.15,86,0.85,65,47.7,68.15,...,144.92,8.294191,1.760486,-20.45,5.948061,-3.626246,-20.16,5.903781,-3.332272,"(0.82, 131.0, 0.18, 96.0, 0.15, 163.0, 0.85, 1..."
1,0.84,96,0.16,43,0.14,132,0.86,103,87.52,107.06,...,107.17,10.849935,2.044513,-19.54,9.367497,-3.929858,-19.59,9.207073,-3.659274,"(0.84, 96.0, 0.16, 43.0, 0.14, 132.0, 0.86, 10..."
2,0.85,100,0.15,40,0.12,127,0.88,109,91.0,111.16,...,111.32,6.837953,2.290897,-20.16,15.574978,-4.29913,-20.38,15.301523,-4.070624,"(0.85, 100.0, 0.15, 40.0, 0.12, 127.0, 0.88, 1..."
3,0.85,80,0.15,49,0.17,96,0.83,82,75.35,84.38,...,161.45,6.179604,1.695312,-9.03,5.810355,-3.717427,-8.88,5.764646,-3.520548,"(0.85, 157.0, 0.15, 126.0, 0.17, 173.0, 0.83, ..."
4,0.85,92,0.15,40,0.2,111,0.8,90,84.2,94.2,...,94.14,9.3156,1.30269,-10.0,10.167714,-3.460392,-9.91,10.075045,-3.131868,"(0.85, 92.0, 0.15, 40.0, 0.2, 111.0, 0.8, 90.0)"
5,0.85,114,0.15,49,0.2,131,0.8,110,104.25,114.2,...,114.24,9.300667,1.50481,-9.95,14.809642,-3.460392,-10.12,14.340941,-3.376895,"(0.85, 114.0, 0.15, 49.0, 0.2, 131.0, 0.8, 110.0)"
6,0.82,90,0.18,50,0.16,107,0.84,78,82.8,82.64,...,159.47,11.448541,1.704134,0.16,4.735923,-3.520706,0.2,4.457467,-3.220631,"(0.82, 167.0, 0.18, 127.0, 0.16, 184.0, 0.84, ..."
7,0.83,104,0.17,57,0.14,115,0.86,93,96.01,96.08,...,95.92,8.375775,1.826096,-0.07,10.021027,-3.832041,0.06,9.92722,-3.366588,"(0.83, 104.0, 0.17, 57.0, 0.14, 115.0, 0.86, 9..."
8,0.85,113,0.15,61,0.12,116,0.88,103,105.2,104.56,...,181.21,4.585401,2.225802,0.64,14.343214,-4.29913,0.74,14.548012,-3.961913,"(0.85, 190.0, 0.15, 138.0, 0.12, 193.0, 0.88, ..."
9,0.88,59,0.12,24,0.1,67,0.9,43,54.8,45.4,...,122.73,8.101673,2.669114,9.4,4.173654,-5.005405,8.98,4.223338,-4.779732,"(0.88, 136.0, 0.12, 101.0, 0.1, 144.0, 0.9, 12..."


In [454]:
result_df.to_csv('study3_trials_final_version_machted_ev_version3.csv', index=False)

In [458]:

final_df_v2 = pd.read_csv('best_final_v2.csv')


In [462]:
# Ensure the columns are present in the merged_df DataFrame
columns_to_check = ['skew_level','EVA', 'EVB', 'EVD',
                    'EVA_complex', 'EVB_complex', 'EVD_complex',
                    'SDA', 'SDB', 'SDD',
                    'SDA_complex', 'SDB_complex', 'SDD_complex',
                    'skewness_a', 'skewness_b', 'skewness_diff', 
                    'skewness_a_complex', 'skewness_b_complex', 'skewness_diff_complex']
                    

# Check which columns are missing
missing_columns = [col for col in columns_to_check if col not in final_df_v2.columns]

# Print missing columns
print(f"Missing columns: {missing_columns}")

# Select only the columns that are present
columns_to_select = [col for col in columns_to_check if col in final_df_v2.columns]

df_selected = final_df_v2[columns_to_select]

df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)




df_selected_rounded.to_csv('best_final_v2_short.csv', index=False)

Missing columns: []


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)
