In [2]:
from scipy.optimize import minimize
import random
import numpy as np
import pandas as pd



# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

# Function to calculate EV, SD, and skewness
def lottery_stats(probs, outcomes):
    ev = np.sum(probs * outcomes)
    sd = np.sqrt(np.sum(probs * (outcomes - ev) ** 2))
    skw = calculate_skewness(probs, outcomes)
    return ev, sd, skw

# Generate unique probabilities that sum to 1
def generate_unique_probs(n, low=0.02, high=0.4):
    possible_probs = np.arange(low, high + 0.001, 0.01)
    possible_probs = np.round(possible_probs, 2)
    possible_probs = possible_probs[(possible_probs >= low) & (possible_probs <= high)]
    # Remove duplicates due to rounding
    possible_probs = np.unique(possible_probs)
    # Ensure there are enough unique probabilities
    if len(possible_probs) < n:
        raise ValueError("Not enough unique probabilities within the specified bounds.")
    for _ in range(1000):  # Maximum attempts
        probs = np.random.choice(possible_probs, size=n, replace=False)
        probs = probs / probs.sum()
        probs = np.round(probs, 2)
        probs[-1] += 1 - probs.sum()  # Adjust to sum to 1
        # Check for uniqueness after rounding and adjustment
        if np.all(probs >= low) and np.all(probs <= high) and len(np.unique(probs)) == n:
            return probs
    raise ValueError("Unable to generate unique probabilities within bounds after rounding.")


# Optimization function
def optimize_lottery(original_probs, original_outcomes):
    # Original lottery statistics
    original_ev = np.sum(original_probs * original_outcomes)
    original_sd = np.sqrt(np.sum(original_probs * (original_outcomes - original_ev) ** 2))
    original_skew = calculate_skewness(original_probs, original_outcomes)

    # Parameters
    n = 7  # Number of outcomes
    iterations = 10000  # Number of iterations to run
    delta = 2  # Minimum difference to ensure uniqueness after rounding
    max_gap = 100  # Maximum allowed gap between consecutive outcomes
    gap_weight = 0.01  # Weight for the gap penalty in the objective function

    # Bounds for outcomes
    bounds = [(2, 200)] * n


    # Run the optimization multiple times
    for iteration in range(iterations):
        # Generate unique probabilities
        try:
            fixed_probs = generate_unique_probs(n)
        except ValueError:
            continue  # Skip if unable to generate probabilities

        # Desired average gap between outcomes
        desired_gap = (bounds[0][1] - bounds[0][0]) / (n - 1)

        # Objective function with gap penalty
        def objective_outcomes(outcomes):
            ev, sd, skw = lottery_stats(fixed_probs, outcomes)
            penalty_stats = (ev - original_ev)**2 + (sd - original_sd)**2 + (skw - original_skew)**2
            penalty_gaps = np.sum((np.diff(outcomes) + desired_gap)**2)  # Adjusted for decreasing order
            penalty = penalty_stats + gap_weight * penalty_gaps
            return penalty

        # Initial guess for outcomes (from high to low)
        initial_outcomes = np.linspace(bounds[0][1], bounds[0][0], n)

        # Constraints to ensure outcomes are decreasing and gaps are within limits
        constraints = [
            {'type': 'ineq', 'fun': lambda x, i=i: x[i] - x[i+1] - delta} for i in range(n - 1)
        ] + [
            {'type': 'ineq', 'fun': lambda x, i=i: max_gap - (x[i] - x[i+1])} for i in range(n - 1)
        ]

        # Optimize outcomes
        result = minimize(
            objective_outcomes,
            initial_outcomes,
            bounds=bounds,
            constraints=constraints,
            method='SLSQP',
            options={'ftol': 1e-9, 'disp': False}
        )

        # Check if the optimization was successful
        if not result.success:
            continue  # Skip this iteration if optimization failed

        # Extract optimized outcomes
        optimized_outcomes = np.round(result.x).astype(int)

        # Adjust outcomes to ensure uniqueness after rounding and maintain gaps
        for i in range(1, n):
            # Ensure the gap does not exceed max_gap
            if optimized_outcomes[i - 1] - optimized_outcomes[i] > max_gap:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - max_gap
            # Ensure outcomes are decreasing by at least delta
            if optimized_outcomes[i] >= optimized_outcomes[i - 1]:
                optimized_outcomes[i] = optimized_outcomes[i - 1] - int(delta)
            # Ensure within bounds
            if optimized_outcomes[i] < bounds[i][0]:
                optimized_outcomes[i] = bounds[i][0]

        # Recalculate the statistics with adjusted outcomes
        final_ev, final_sd, final_skew = lottery_stats(fixed_probs, optimized_outcomes)

        # Check if the differences in EV and SD are less than 1
        if abs(final_ev - original_ev) < 1 and abs(final_sd - original_sd) < 1 and abs(final_skew - original_skew) < 0.25:
            best_outcomes = optimized_outcomes.copy()
            best_probs = fixed_probs.copy()
            return best_outcomes, best_probs, {
            'original_ev': original_ev,
            'original_sd': original_sd,
            'original_skew': original_skew,
            'new_ev': final_ev,
            'new_sd': final_sd,
            'new_skew': final_skew
        }
    else:
        return {
            'original_ev': original_ev,
            'original_sd': original_sd,
            'original_skew': original_skew
        }



# Example usage



probs_A = np.array([0.12, 0.88])
outcomes_A = np.array([134, 61])

result = optimize_lottery(probs_A, outcomes_A)
print(result)

{'original_ev': 69.75999999999999, 'original_sd': 23.722192141537004, 'original_skew': 2.3387383286073167}


In [3]:
import numpy as np
import pandas as pd
dat_na = pd.read_csv('study3_trials_na.csv')
df_na = pd.DataFrame(dat_na)
df_na

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.07,48,0.93,96,0.93,111,0.07,140,92.64,113.03,...,,,,,-20.39,4.847793,-6.741211,,,
1,0.11,86,0.89,128,0.89,140,0.11,166,123.38,142.86,...,0.33,142.84,9.123289,2.324576,-19.48,5.006236,-4.985782,,,
2,0.07,9,0.93,86,0.92,97,0.08,148,80.61,101.08,...,,,,,-20.47,5.810367,-6.466887,,,
3,0.07,48,0.93,130,0.92,140,0.08,195,124.26,144.40,...,,,,,-20.14,6.000929,-6.466887,,,
4,0.13,128,0.87,156,0.86,171,0.14,183,152.36,172.68,...,,,,,-20.32,5.252652,-4.275399,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
585,0.89,66,0.11,167,0.09,4,0.91,63,77.11,57.69,...,,,,,19.42,14.717142,5.358203,,,
586,0.91,88,0.09,138,0.11,71,0.89,72,92.50,71.89,...,,,,,20.61,13.996198,5.358203,,,
587,0.90,67,0.10,165,0.09,11,0.91,62,76.80,57.41,...,,,,,19.39,14.804730,5.531979,,,
588,0.91,63,0.09,127,0.08,34,0.92,49,68.76,47.80,...,,,,,20.96,14.246235,5.961593,,,


In [9]:
filtered_na_rows = df_na[df_na[['complex_PB7', 'complex_PA7']].isna().all(axis=1)]
# Filter rows where the absolute values of 'simple_skewness_A' and 'simple_skewness_B' are greater than 3
filtered_skew_rows = filtered_na_rows[(filtered_na_rows['skew_A'].abs() < 3) & (filtered_na_rows['skew_B'].abs() < 3)]
filtered_skew_rows


Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
6,0.10,6,0.90,108,0.89,110,0.11,189,97.80,118.69,...,,,,,-20.89,5.881709,-5.159557,,,
19,0.09,34,0.91,97,0.90,106,0.10,151,91.33,110.50,...,,,,,-19.17,4.529451,-5.531979,,,
23,0.10,29,0.90,107,0.91,115,0.09,160,99.20,119.05,...,,,,,-19.85,10.521821,-5.531979,,,
30,0.12,21,0.88,125,0.87,124,0.13,197,112.52,133.49,...,,,,,-20.97,9.245849,-4.539132,,,
33,0.11,52,0.89,137,0.89,143,0.11,194,127.65,148.61,...,,,,,-20.96,10.638252,-4.985782,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
577,0.88,46,0.12,93,0.14,29,0.86,31,51.64,30.72,...,,,,,20.92,14.579218,4.413744,,,
583,0.90,88,0.10,183,0.11,39,0.89,82,97.50,77.27,...,,,,,20.23,15.045740,5.159557,,,
585,0.89,66,0.11,167,0.09,4,0.91,63,77.11,57.69,...,,,,,19.42,14.717142,5.358203,,,
586,0.91,88,0.09,138,0.11,71,0.89,72,92.50,71.89,...,,,,,20.61,13.996198,5.358203,,,


In [10]:
filtered_na_rows = df_na[df_na[['complex_PB7', 'complex_PA7']].isna().all(axis=1)]
# Filter rows where the absolute values of 'simple_skewness_A' and 'simple_skewness_B' are greater than 3
filtered_skew_rows = filtered_na_rows[(filtered_na_rows['skew_A'].abs() > 3) & (filtered_na_rows['skew_B'].abs() > 3)]
filtered_skew_rows

Unnamed: 0,P_A1,O_A1,P_A2,O_A2,P_B1,O_B1,P_B2,O_B2,EV_A,EV_B,...,complex_PB7,complex_EVB,complex_SDB,complex_skewness_B,simple_EVD,simple_SDD,simple_skewness_D,complex_EVD,complex_SDD,complex_skewness_D
0,0.07,48,0.93,96,0.93,111,0.07,140,92.64,113.03,...,,,,,-20.39,4.847793,-6.741211,,,
2,0.07,9,0.93,86,0.92,97,0.08,148,80.61,101.08,...,,,,,-20.47,5.810367,-6.466887,,,
3,0.07,48,0.93,130,0.92,140,0.08,195,124.26,144.40,...,,,,,-20.14,6.000929,-6.466887,,,
7,0.05,121,0.95,156,0.94,174,0.06,184,154.25,174.60,...,,,,,-20.35,5.253205,-7.834952,,,
10,0.03,2,0.97,36,0.97,55,0.03,58,34.98,55.09,...,,,,,-20.11,5.288204,-11.020755,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,0.93,125,0.07,191,0.07,81,0.93,111,129.62,108.90,...,,,,,20.72,9.185293,6.741211,,,
567,0.98,62,0.02,147,0.02,32,0.98,44,63.70,43.76,...,,,,,19.94,10.220000,13.714286,,,
571,0.92,35,0.08,105,0.08,8,0.92,22,40.60,20.88,...,,,,,19.72,15.192419,6.192562,,,
580,0.94,44,0.06,118,0.05,17,0.95,28,48.44,27.45,...,,,,,20.99,15.176632,7.834952,,,


In [15]:
# Function to calculate skewness
def calculate_skewness(probabilities, outcomes):
    mu = np.sum(outcomes * probabilities)
    sigma_squared = np.sum((outcomes**2) * probabilities) - mu**2
    sigma = np.sqrt(sigma_squared)
    mu_3 = np.sum(((outcomes - mu)**3) * probabilities)
    skewness = mu_3 / (sigma**3) if sigma != 0 else 0
    return skewness

probs_A = np.array([0.1, 0.9])
outcomes_A = np.array([30, 100])

calculate_skewness(probs_A, outcomes_A)

-2.6666666666666665