### now generate no_skew_vs_no_skew in good sd range like in rl and lr

In [57]:
import numpy as np
import pandas as pd
import random

# Define the skewness calculation function
def calculate_skewness(probabilities, outcomes):
    probabilities = np.array(probabilities)
    outcomes = np.array(outcomes)
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    if sigma_squared <= 0:
        return 0
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    return mu_3 / (sigma_squared**(3/2))

# Precompute thresholds for skewness to avoid recalculating in loops
def is_skew_valid(skew_A, skew_B, skew, skew_abs_diff):
    if skew == "right_vs_left":
        return 1.5 < skew_A < 3 and -3 < skew_B < -1.5 and skew_abs_diff < 0.5
    elif skew == "left_vs_right":
        return -3 < skew_A < -1.5 and 1.5 < skew_B < 3 and skew_abs_diff < 0.5
    elif skew == "no_skew_vs_no_skew":
        return -0.25 <= skew_A <= 0.25 and -0.25 <= skew_B <= 0.25 and skew_abs_diff < 0.5
    return False

# Generate lottery pairs
def generate_lottery_pairs():
    ev_levels = [-20, -10, 0, 10, 20]
    sd_levels = [5, 10, 15]
    skew_levels = ["right_vs_left", "left_vs_right", "no_skew_vs_no_skew"]

    data = []
    for skew in skew_levels:
        for ev in ev_levels:
            for sd in sd_levels:
                max_trials = 0 if skew in ["right_vs_left", "left_vs_right"] else 5
                trials_found = 0
                while trials_found < max_trials:
                    outcomes_A = sorted(random.sample(range(2, 200), 2))
                    probabilities_A = [round(random.uniform(0.02, 0.98), 2)]
                    probabilities_A.append(round(1 - probabilities_A[0], 2))

                    outcomes_B = sorted(random.sample(range(2, 200), 2))
                    probabilities_B = [round(random.uniform(0.02, 0.98), 2)]
                    probabilities_B.append(round(1 - probabilities_B[0], 2))

                    EV_A = np.dot(probabilities_A, outcomes_A)
                    EV_B = np.dot(probabilities_B, outcomes_B)
                    EV_diff = round(EV_A - EV_B, 2)

                    SD_A = np.sqrt(np.dot(probabilities_A, (np.array(outcomes_A) - EV_A)**2))
                    SD_B = np.sqrt(np.dot(probabilities_B, (np.array(outcomes_B) - EV_B)**2))
                    SD_diff = round(SD_A - SD_B, 2)

                    skew_A = calculate_skewness(probabilities_A, outcomes_A)
                    skew_B = calculate_skewness(probabilities_B, outcomes_B)
                    skew_abs_diff = abs(abs(skew_A) - abs(skew_B))

                    # Apply constraints for skewness, EV, SD, and standard deviation ranges
                    if is_skew_valid(skew_A, skew_B, skew, skew_abs_diff) and \
                       abs(EV_diff - ev) <= 1 and abs(SD_diff - sd) <= 1 and \
                       (skew == "no_skew_vs_no_skew" and 10 <= SD_A <= 25 and 3 <= SD_B <= 20):
                        data.append({
                            "P_A1": probabilities_A[0], "O_A1": outcomes_A[0],
                            "P_A2": probabilities_A[1], "O_A2": outcomes_A[1],
                            "P_B1": probabilities_B[0], "O_B1": outcomes_B[0],
                            "P_B2": probabilities_B[1], "O_B2": outcomes_B[1],
                            "EV_A": EV_A, "EV_B": EV_B, "EV_diff": EV_diff,
                            "SD_A": SD_A, "SD_B": SD_B, "SD_diff": SD_diff,
                            "skew_A": skew_A, "skew_B": skew_B, "skew_diff": skew_A - skew_B,
                            "skew_level": skew, "ev_level": ev, "sd_level": sd
                        })
                        trials_found += 1

    return pd.DataFrame(data)

# Generate the lottery pairs
lottery_pairs = generate_lottery_pairs()

# Sort and reset index
lottery_pairs_sorted = lottery_pairs.sort_values(by=['skew_level', 'ev_level', 'sd_level'])
lottery_pairs_sorted.reset_index(drop=True, inplace=True)

# Display the sorted dataframe
lottery_pairs_sorted

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,EV_diff,SD_A,SD_B,SD_diff,skew_A,skew_B,skew_diff,skew_level,ev_level,sd_level
0,0.56,80,0.44,118,0.50,103,0.50,130,96.72,116.50,-19.78,18.862704,13.500000,5.36,0.241747,0.000000,0.241747,no_skew_vs_no_skew,-20,5
1,0.56,36,0.44,64,0.45,59,0.55,75,48.32,67.80,-19.48,13.898834,7.959899,5.94,0.241747,-0.201008,0.442754,no_skew_vs_no_skew,-20,5
2,0.48,94,0.52,141,0.45,119,0.55,155,118.44,138.80,-20.36,23.481192,17.909774,5.57,-0.080064,-0.201008,0.120943,no_skew_vs_no_skew,-20,5
3,0.51,38,0.49,79,0.45,60,0.55,93,58.09,78.15,-20.06,20.495900,16.417293,4.08,0.040008,-0.201008,0.241016,no_skew_vs_no_skew,-20,5
4,0.53,104,0.47,135,0.54,129,0.46,148,118.57,137.74,-19.17,15.472075,9.469551,6.00,0.120217,0.160514,-0.040298,no_skew_vs_no_skew,-20,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,0.52,91,0.48,132,0.55,86,0.45,96,110.68,90.50,20.18,20.483593,4.974937,15.51,0.080064,0.201008,-0.120943,no_skew_vs_no_skew,20,15
71,0.45,121,0.55,163,0.54,119,0.46,129,144.10,123.60,20.50,20.894736,4.983974,15.91,-0.201008,0.160514,-0.361522,no_skew_vs_no_skew,20,15
72,0.48,16,0.52,62,0.53,11,0.47,29,39.92,19.46,20.46,22.981593,8.983785,14.00,-0.080064,0.120217,-0.200281,no_skew_vs_no_skew,20,15
73,0.55,96,0.45,139,0.49,89,0.51,100,115.35,94.61,20.74,21.392230,5.498900,15.89,0.201008,-0.040008,0.241016,no_skew_vs_no_skew,20,15


In [58]:



from scipy.optimize import minimize
import random
random.seed(42)  # Set the random seed


# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")

# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 1000  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 100  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n


    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1 and abs(final_sd - original_sd) < 1 and abs(final_skew - original_skew) < 0.25:
            best_outcomes = optimized_outcomes.copy()
            best_probs = fixed_probs.copy()
            return best_outcomes, best_probs, {
            'original_ev': original_ev,
            'original_sd': original_sd,
            'original_skew': original_skew,
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return None, None, None

# Function to process each row of the DataFrame
def process_row(row):
    result_dict = {}
    # For Lottery A
    probs_A = np.array([row['P_A1'], row['P_A2']])
    probs_A = probs_A / probs_A.sum()
    outcomes_A = np.array([row['O_A1'], row['O_A2']])

    # Original statistics for Lottery A
    original_ev_A, original_sd_A, original_skew_A = lottery_stats(probs_A, outcomes_A)
    result_dict['simple_EVA'] = original_ev_A
    result_dict['simple_SDA'] = original_sd_A
    result_dict['simple_skewness_A'] = original_skew_A

    # Optimize Lottery A
    best_outcomes_A, best_probs_A, stats_A = optimize_lottery(probs_A, outcomes_A)

    if best_outcomes_A is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_A)):
            result_dict[f'complex_OA{i+1}'] = int(best_outcomes_A[i])  # Ensure integer outcomes
            result_dict[f'complex_PA{i+1}'] = best_probs_A[i]
        # Store new statistics
        result_dict['complex_EVA'] = stats_A['new_ev']
        result_dict['complex_SDA'] = stats_A['new_sd']
        result_dict['complex_skewness_A'] = stats_A['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OA{i+1}'] = np.nan
            result_dict[f'complex_PA{i+1}'] = np.nan
        result_dict['complex_EVA'] = np.nan
        result_dict['complex_SDA'] = np.nan
        result_dict['complex_skewness_A'] = np.nan

    # For Lottery B
    probs_B = np.array([row['P_B1'], row['P_B2']])
    probs_B = probs_B / probs_B.sum()
    outcomes_B = np.array([row['O_B1'], row['O_B2']])

    # Original statistics for Lottery B
    original_ev_B, original_sd_B, original_skew_B = lottery_stats(probs_B, outcomes_B)
    result_dict['simple_EVB'] = original_ev_B
    result_dict['simple_SDB'] = original_sd_B
    result_dict['simple_skewness_B'] = original_skew_B

    # Optimize Lottery B
    best_outcomes_B, best_probs_B, stats_B = optimize_lottery(probs_B, outcomes_B)

    if best_outcomes_B is not None:
        # Store new outcomes and probabilities
        for i in range(len(best_outcomes_B)):
            result_dict[f'complex_OB{i+1}'] = int(best_outcomes_B[i])  # Ensure integer outcomes
            result_dict[f'complex_PB{i+1}'] = best_probs_B[i]
        # Store new statistics
        result_dict['complex_EVB'] = stats_B['new_ev']
        result_dict['complex_SDB'] = stats_B['new_sd']
        result_dict['complex_skewness_B'] = stats_B['new_skew']
    else:
        for i in range(7):
            result_dict[f'complex_OB{i+1}'] = np.nan
            result_dict[f'complex_PB{i+1}'] = np.nan
        result_dict['complex_EVB'] = np.nan
        result_dict['complex_SDB'] = np.nan
        result_dict['complex_skewness_B'] = np.nan

    # Differences between Lotteries A and B (Original)
    result_dict['simple_EVD'] = result_dict['simple_EVA'] - result_dict['simple_EVB']
    result_dict['simple_SDD'] = result_dict['simple_SDA'] - result_dict['simple_SDB']
    result_dict['simple_skewness_D'] = result_dict['simple_skewness_A'] - result_dict['simple_skewness_B']

    # Differences between Lotteries A and B (New)
    if best_outcomes_A is not None and best_outcomes_B is not None:
        result_dict['complex_EVD'] = result_dict['complex_EVA'] - result_dict['complex_EVB']
        result_dict['complex_SDD'] = result_dict['complex_SDA'] - result_dict['complex_SDB']
        result_dict['complex_skewness_D'] = result_dict['complex_skewness_A'] - result_dict['complex_skewness_B']
    else:
        result_dict['complex_EVD'] = np.nan
        result_dict['complex_SDD'] = np.nan
        result_dict['complex_skewness_D'] = np.nan

    return pd.Series(result_dict)

# Apply the process_row function to each row
new_columns = lottery_pairs_sorted.apply(process_row, axis=1)

# Concatenate the new columns to the original DataFrame
lottery_pairs_new = pd.concat([lottery_pairs_sorted, new_columns], axis=1)

# Convert outcome columns to nullable integer type
outcome_columns = [f'complex_OA{i+1}' for i in range(7)] + [f'complex_OB{i+1}' for i in range(7)]
lottery_pairs_new[outcome_columns] = lottery_pairs_new[outcome_columns].astype('Int64')





In [61]:
lottery_pairs_new.dropna().to_csv('study3_trials_raw_ns.csv', index=False)

In [72]:
lottery_pairs_new

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.56,80,0.44,118,0.50,103,0.50,130,96.72,116.50,...,0.27,116.61,14.049836,0.005338,-19.78,5.362704,0.241747,-19.68,5.577316,0.423431
1,0.56,36,0.44,64,0.45,59,0.55,75,48.32,67.80,...,0.23,67.93,8.817318,-0.086044,-19.48,5.938935,0.442754,-19.54,5.824331,0.216566
2,0.48,94,0.52,141,0.45,119,0.55,155,118.44,138.80,...,0.17,138.79,18.477172,-0.325400,-20.36,5.571419,0.120943,-20.26,5.535930,0.451182
3,0.51,38,0.49,79,0.45,60,0.55,93,58.09,78.15,...,0.18,78.07,17.067076,-0.114431,-20.06,4.078607,0.241016,-20.12,3.924529,-0.083916
4,0.53,104,0.47,135,0.54,129,0.46,148,118.57,137.74,...,0.11,137.86,10.335395,-0.072473,-19.17,6.002524,-0.040298,-19.40,5.823453,0.258914
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,0.52,91,0.48,132,0.55,86,0.45,96,110.68,90.50,...,0.14,90.20,5.951470,0.155407,20.18,15.508656,-0.120943,20.53,15.004605,-0.215926
71,0.45,121,0.55,163,0.54,119,0.46,129,144.10,123.60,...,0.24,123.80,5.751522,0.067360,20.50,15.910762,-0.361522,20.31,15.716080,-0.211612
72,0.48,16,0.52,62,0.53,11,0.47,29,39.92,19.46,...,0.14,19.47,9.908032,0.355383,20.46,13.997807,-0.200281,20.64,13.412729,-0.235741
73,0.55,96,0.45,139,0.49,89,0.51,100,115.35,94.61,...,0.14,94.42,6.453185,-0.009098,20.74,15.893330,0.241016,21.10,15.538850,0.174872


In [81]:


df_selected = lottery_pairs_new.dropna()


df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)




df_selected_before = df_selected_rounded.sort_values(by='skew_level', ascending=True).dropna()



# Filter rows based on conditions for A and B
filtered_df = df_selected_before[
    (abs(df_selected_before['simple_skewness_A'] - df_selected_before['complex_skewness_A']) < 0.25) &
    (abs(df_selected_before['simple_skewness_B'] - df_selected_before['complex_skewness_B']) < 0.25)
]

filtered_df = filtered_df[filtered_df["P_A1"] != 0.5]
filtered_df = filtered_df[filtered_df["P_B1"] != 0.5]
filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A1"]]
filtered_df = filtered_df[filtered_df["P_B1"] != filtered_df["P_A2"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A1"]]
filtered_df = filtered_df[filtered_df["O_B1"] != filtered_df["O_A2"]]
filtered_df = filtered_df[abs(filtered_df["simple_skewness_D"]) < 0.5]
filtered_df = filtered_df[abs(filtered_df["complex_skewness_D"]) < 0.5]
# Display the filtered dataframe
filtered_df


  df_selected_rounded = df_selected.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
53,0.52,128,0.48,167,0.45,127,0.55,146,146.72,137.45,...,0.2,137.44,10.38,-0.18,9.27,10.03,0.28,9.3,9.83,0.17
52,0.54,47,0.46,90,0.56,46,0.44,69,66.78,56.12,...,0.18,56.28,12.24,0.0,10.66,10.01,-0.08,10.62,9.79,0.01
51,0.51,60,0.49,103,0.56,61,0.44,83,81.07,70.68,...,0.22,70.51,11.69,0.21,10.39,10.58,-0.2,10.35,10.2,-0.28
50,0.46,23,0.54,55,0.55,25,0.45,38,40.28,30.85,...,0.18,30.69,7.43,0.05,9.43,9.48,-0.36,9.38,9.19,-0.38
49,0.56,144,0.44,180,0.55,138,0.45,163,159.84,149.25,...,0.04,149.17,13.32,0.04,10.59,5.43,0.04,10.81,5.18,0.28
47,0.56,139,0.44,180,0.49,132,0.51,162,157.04,147.3,...,0.16,147.3,15.59,-0.22,9.74,5.35,0.28,9.5,5.4,0.4
45,0.46,145,0.54,176,0.56,142,0.44,163,161.74,151.24,...,0.2,151.31,11.17,0.14,10.5,5.03,-0.4,10.32,5.08,-0.46
44,0.47,87,0.53,130,0.54,104,0.46,118,109.79,110.44,...,0.1,110.07,7.9,-0.06,-0.65,14.48,-0.28,-0.36,14.02,0.06
42,0.48,144,0.52,183,0.45,160,0.55,168,164.28,164.4,...,0.16,164.12,4.95,-0.13,-0.12,15.5,0.12,-0.15,15.15,-0.01
46,0.51,102,0.49,135,0.54,99,0.46,121,118.17,109.12,...,0.28,109.19,11.87,0.28,9.05,5.53,-0.12,8.78,5.45,-0.42


In [83]:


random_trials = (
    filtered_df
    .groupby(['skew_level', 'ev_level', 'sd_level'], group_keys=False)
    .apply(lambda x: x.sample(n=1))
)

random_trials.to_csv('study3_trials_final_ns.csv', index=False)


random_trials

  .apply(lambda x: x.sample(n=1))


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
4,0.53,104,0.47,135,0.54,129,0.46,148,118.57,137.74,...,0.11,137.86,10.34,-0.07,-19.17,6.0,-0.04,-19.4,5.82,0.26
9,0.45,116,0.55,159,0.47,147,0.53,169,139.65,158.66,...,0.22,158.69,11.92,-0.19,-19.01,10.41,-0.08,-18.88,10.05,-0.12
11,0.53,43,0.47,87,0.49,78,0.51,90,63.68,84.12,...,0.2,84.14,6.86,0.08,-20.44,15.96,0.16,-20.56,15.79,0.01
19,0.53,73,0.47,107,0.55,87,0.45,113,88.98,98.7,...,0.24,98.85,13.61,0.19,-9.72,4.03,-0.08,-9.74,4.17,-0.24
24,0.56,90,0.44,125,0.48,109,0.52,123,105.4,116.28,...,0.21,116.23,7.59,-0.02,-10.88,10.38,0.32,-10.72,10.56,0.21
27,0.46,60,0.54,100,0.45,86,0.55,97,81.6,92.05,...,0.18,92.06,6.45,-0.44,-10.45,14.46,0.04,-10.51,14.07,0.08
30,0.55,117,0.45,149,0.52,120,0.48,142,131.4,130.56,...,0.2,130.63,11.91,0.05,0.84,4.93,0.12,0.8,4.51,0.13
35,0.53,28,0.47,78,0.51,38,0.49,66,51.5,51.72,...,0.15,51.55,14.78,0.07,-0.22,10.96,0.08,0.0,10.6,0.28
42,0.48,144,0.52,183,0.45,160,0.55,168,164.28,164.4,...,0.16,164.12,4.95,-0.13,-0.12,15.5,0.12,-0.15,15.15,-0.01
45,0.46,145,0.54,176,0.56,142,0.44,163,161.74,151.24,...,0.2,151.31,11.17,0.14,10.5,5.03,-0.4,10.32,5.08,-0.46


In [80]:

# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    random_trials
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,no_skew_vs_no_skew,111.718,111.673333,17.944,7.872667


In [1]:
import numpy as np
import pandas as pd
dat_na = pd.read_csv('study3_trials_raw.csv')
df_na = pd.DataFrame(dat_na)
df_na

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.11,100,0.89,133,0.86,148,0.14,162,129.37,149.96,...,0.23,150.00,5.824088,1.941352,-20.59,5.467544,-4.567896,-20.48,5.457295,-4.197162
1,0.15,113,0.85,140,0.81,154,0.19,166,135.95,156.28,...,0.22,156.54,5.673482,1.638248,-20.33,4.933318,-3.540812,-20.57,4.955210,-3.348683
2,0.13,128,0.87,160,0.84,174,0.16,187,155.84,176.08,...,0.19,175.93,5.734553,1.954150,-20.24,5.995831,-4.055246,-20.33,5.976126,-3.918112
3,0.17,90,0.83,124,0.80,135,0.20,152,118.22,138.40,...,0.11,138.36,7.698727,1.501406,-20.18,5.971515,-3.257035,-20.16,5.762328,-3.146273
4,0.19,44,0.81,83,0.82,92,0.18,117,75.59,96.50,...,0.21,96.34,10.533964,1.533275,-20.91,5.695049,-3.246273,-20.79,5.468770,-2.931367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,0.83,140,0.17,183,0.18,116,0.82,131,147.31,128.30,...,0.13,128.47,6.650496,-1.485445,19.01,10.389399,3.422889,18.79,10.136177,3.099117
239,0.84,91,0.16,160,0.12,53,0.88,85,102.04,81.16,...,0.04,81.23,11.318882,-2.091176,20.88,14.897049,4.193590,20.77,14.582083,3.709319
240,0.84,86,0.16,142,0.12,59,0.88,77,94.96,74.84,...,0.02,74.85,6.778459,-2.243516,20.12,14.680631,4.193590,19.96,14.447796,3.984241
241,0.83,75,0.17,138,0.15,47,0.85,70,85.71,66.55,...,0.04,66.72,9.146672,-1.844715,19.16,15.452224,3.717427,19.07,15.042291,3.566817


In [3]:

# Filter rows based on conditions for A and B
filtered_df = df_na[
    (abs(df_na['simple_skewness_A'] - df_na['complex_skewness_A']) < 0.25) &
    (abs(df_na['simple_skewness_B'] - df_na['complex_skewness_B']) < 0.25)
]

# Display the filtered dataframe
filtered_df.dropna()

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.11,100,0.89,133,0.86,148,0.14,162,129.37,149.96,...,0.23,150.00,5.824088,1.941352,-20.59,5.467544,-4.567896,-20.48,5.457295,-4.197162
1,0.15,113,0.85,140,0.81,154,0.19,166,135.95,156.28,...,0.22,156.54,5.673482,1.638248,-20.33,4.933318,-3.540812,-20.57,4.955210,-3.348683
2,0.13,128,0.87,160,0.84,174,0.16,187,155.84,176.08,...,0.19,175.93,5.734553,1.954150,-20.24,5.995831,-4.055246,-20.33,5.976126,-3.918112
3,0.17,90,0.83,124,0.80,135,0.20,152,118.22,138.40,...,0.11,138.36,7.698727,1.501406,-20.18,5.971515,-3.257035,-20.16,5.762328,-3.146273
4,0.19,44,0.81,83,0.82,92,0.18,117,75.59,96.50,...,0.21,96.34,10.533964,1.533275,-20.91,5.695049,-3.246273,-20.79,5.468770,-2.931367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,0.83,140,0.17,183,0.18,116,0.82,131,147.31,128.30,...,0.13,128.47,6.650496,-1.485445,19.01,10.389399,3.422889,18.79,10.136177,3.099117
239,0.84,91,0.16,160,0.12,53,0.88,85,102.04,81.16,...,0.04,81.23,11.318882,-2.091176,20.88,14.897049,4.193590,20.77,14.582083,3.709319
240,0.84,86,0.16,142,0.12,59,0.88,77,94.96,74.84,...,0.02,74.85,6.778459,-2.243516,20.12,14.680631,4.193590,19.96,14.447796,3.984241
241,0.83,75,0.17,138,0.15,47,0.85,70,85.71,66.55,...,0.04,66.72,9.146672,-1.844715,19.16,15.452224,3.717427,19.07,15.042291,3.566817


In [55]:
random_trials = (
    filtered_df
    .groupby(['skew_level', 'ev_level', 'sd_level'], group_keys=False)
    .apply(lambda x: x.sample(n=1))
)

# Group by skew_level in the random_trials dataframe and calculate the mean
mean_values_random_trials = (
    random_trials
    .groupby('skew_level')[['EV_A', 'EV_B', 'SD_A', 'SD_B']]
    .mean()
    .reset_index()
)

mean_values_random_trials

# sda in rl and lr are between 17 to 20 and sdb between 7 to 10

  .apply(lambda x: x.sample(n=1))


Unnamed: 0,skew_level,EV_A,EV_B,SD_A,SD_B
0,left_vs_right,138.509333,138.213333,16.737613,7.064805
1,no_skew_vs_no_skew,93.759333,93.572,36.555328,26.101772
2,right_vs_left,84.202,84.321333,18.50354,8.424336


In [4]:
# Count rows for each combination of levels
level_counts = filtered_df.groupby(['skew_level', 'ev_level', 'sd_level']).size().reset_index(name='count')



# Display the filtered dataframe
level_counts

Unnamed: 0,skew_level,ev_level,sd_level,count
0,left_vs_right,-20,5,7
1,left_vs_right,-20,10,5
2,left_vs_right,-20,15,1
3,left_vs_right,-10,5,8
4,left_vs_right,-10,10,4
5,left_vs_right,-10,15,3
6,left_vs_right,0,5,11
7,left_vs_right,0,10,4
8,left_vs_right,0,15,2
9,left_vs_right,10,5,10
