# **INDEPENDENT NORMAL**

In [3]:
#############

import numpy as np

# Fixed random seed for reproducibility
np.random.seed(12345)

# Number of samples
N = 3_000_000

# Means, stdevs
mu_x_0, mu_y_0 = 1.2, 1.1
sigma_x_0, sigma_y_0 = 1.0, 1.0

# Proposed solution
x_star_0 = 1.00236
y_star_0 = 1.04249
c_0 = 2

# Slope of line D that passes through (x_star, y_star) and (2,2)
#   slope = (2 - y_star)/(2 - x_star)
slope_D_0 = (c_0 - y_star_0) / (c_0 - x_star_0)

def yD_of_x(x):
    return y_star_0 + slope_D_0 * (x - x_star_0)
def xD_of_y(y):
    # slope = dy/dx => dx/dy = 1/slope
    return x_star_0 + (1./slope_D_0) * (y - y_star_0)

# 1) Generate random samples from the independent normals
X_0 = np.random.normal(mu_x_0, sigma_x_0, N)
Y_0 = np.random.normal(mu_y_0, sigma_y_0, N)

# 2) Define region B: 
#    "above line F" => y >= y_star if x <= x_star,
#    "above line D" => y >= yD_of_x(x) if x >= x_star
#    Combined logically:
left_mask_B_0  = (X_0 <= x_star_0) & (Y_0 >= y_star_0)
right_mask_B_0 = (X_0 >= x_star_0) & (Y_0 >= yD_of_x(X_0))
B_mask_0 = left_mask_B_0 | right_mask_B_0

# 3) Define region A:
#    "right of line E" => x >= x_star if y <= y_star,
#    "right of line D" => x >= xD_of_y(y) if y >= y_star
#    Combined logically:
down_mask_A_0  = (Y_0 <= y_star_0) & (X_0 >= x_star_0)
up_mask_A_0    = (Y_0 >= y_star_0) & (X_0 >= xD_of_y(Y_0))
A_mask_0 = down_mask_A_0 | up_mask_A_0

# 4) Estimate E[Y | B]
Y_in_B_0 = Y_0[B_mask_0]
meanY_in_B_0 = np.mean(Y_in_B_0) if len(Y_in_B_0) > 0 else float('nan')

# 5) Estimate E[X | A]
X_in_A_0 = X_0[A_mask_0]
meanX_in_A_0 = np.mean(X_in_A_0) if len(X_in_A_0) > 0 else float('nan')


print("INDEPENDENT NORMALS")
print("Share of X in A =", len(X_in_A_0)/N)
print("Share of Y in B =", len(Y_in_B_0)/N)
print("x* =", x_star_0)
print("y* =", y_star_0)
print("E(x) in A =", meanX_in_A_0)
print("E(y) in B =", meanY_in_B_0)



INDEPENDENT NORMALS
Share of X in A = 0.4305116666666667
Share of Y in B = 0.3681163333333333
x* = 1.00236
y* = 1.04249
E(x) in A = 2.000001467830328
E(y) in B = 2.00000768162508


In [4]:
# Initialize x_star and y_star
x_star_1 = 1.1
y_star_1 = 1.1

# Define the target mean value
target_mean_1 = 2.0

# Define a tolerance for convergence
tolerance_1 = 1e-3

# Define a maximum number of iterations to prevent infinite loops
max_iterations_1 = 1000

# Function to update x_star and y_star
def update_stars(x_star, y_star, meanX_in_A, meanY_in_B, learning_rate=0.02):
    x_star += learning_rate * (target_mean_1 - meanX_in_A)
    y_star += learning_rate * (target_mean_1 - meanY_in_B)
    return x_star, y_star

# Iteratively update x_star and y_star until convergence
for iteration_1 in range(max_iterations_1):
    # Update slope_D based on current x_star and y_star
    slope_D_1 = (target_mean_1 - y_star_1) / (target_mean_1 - x_star_1)
    
    def yD_of_x(x):
        return y_star_1 + slope_D_1 * (x - x_star_1)
    
    def xD_of_y(y):
        return x_star_1 + (1./slope_D_1) * (y - y_star_1)
    
    # Define region B
    left_mask_B_1  = (X_0 <= x_star_1) & (Y_0 >= y_star_1)
    right_mask_B_1 = (X_0 >= x_star_1) & (Y_0 >= yD_of_x(X_0))
    B_mask_1 = left_mask_B_1 | right_mask_B_1
    
    # Define region A
    down_mask_A_1  = (Y_0 <= y_star_1) & (X_0 >= x_star_1)
    up_mask_A_1    = (Y_0 >= y_star_1) & (X_0 >= xD_of_y(Y_0))
    A_mask_1 = down_mask_A_1 | up_mask_A_1
    
    # Estimate E[Y | B]
    Y_in_B_1 = Y_0[B_mask_1]
    meanY_in_B_1 = np.mean(Y_in_B_1) if len(Y_in_B_1) > 0 else float('nan')
    
    # Estimate E[X | A]
    X_in_A_1 = X_0[A_mask_1]
    meanX_in_A_1 = np.mean(X_in_A_1) if len(X_in_A_1) > 0 else float('nan')
    
    # Check for convergence
    if abs(meanY_in_B_1 - target_mean_1) < tolerance_1 and abs(meanX_in_A_1 - target_mean_1) < tolerance_1:
        break
    
    # Update x_star and y_star
    x_star_1, y_star_1 = update_stars(x_star_1, y_star_1, meanX_in_A_1, meanY_in_B_1)

print("ITERATIONS ON INDEPENDENT NORMALS")
print("")
print("Converged after", iteration_1 + 1, "iterations")
print("Share of X in A =", len(X_in_A_1)/N)
print("Share of Y in B =", len(Y_in_B_1)/N)
print("x* =", x_star_1)
print("y* =", y_star_1)
print("E(x) in A =", meanX_in_A_1)
print("E(y) in B =", meanY_in_B_1)

ITERATIONS ON INDEPENDENT NORMALS

Converged after 409 iterations
Share of X in A = 0.43007833333333334
Share of Y in B = 0.367787
x* = 1.0046437609623038
y* = 1.0444846721397816
E(x) in A = 2.0009999404210497
E(y) in B = 2.0008704279696143


# **CORRELATED NORMAL**

In [5]:
############# POSITIVE CORRELATION #############

import numpy as np

# Fixed random seed for reproducibility
np.random.seed(12345)

# Number of samples
N = 3_000_000

# Means, stdevs
mu_x_2, mu_y_2 = 1.2, 1.1
sigma_x_2, sigma_y_2 = 1.0, 1.0
rho_2=0.99

# Proposed solution
x_star_2 = 1.191
y_star_2 = 1.193
c_0_2 = 2

# Slope of line D that passes through (x_star, y_star) and (2,2)
#   slope = (2 - y_star)/(2 - x_star)
slope_D_2 = (c_0_2 - y_star_2) / (c_0_2 - x_star_2)

def yD_of_x(x):
    return y_star_2 + slope_D_2 * (x - x_star_2)
def xD_of_y(y):
    # slope = dy/dx => dx/dy = 1/slope
    return x_star_2 + (1./slope_D_2) * (y - y_star_2)

# 1) Generate random sample from the correlated normals

cov_2 = np.array([[sigma_x_2**2, rho_2*sigma_x_2*sigma_y_2], [rho_2*sigma_x_2*sigma_y_2, sigma_y_2**2]])
mean_2 = np.array([mu_x_2, mu_y_2])
X_2, Y_2 = np.random.multivariate_normal(mean_2, cov_2, N).T




# 2) Define region B: 
#    "above line F" => y >= y_star if x <= x_star,
#    "above line D" => y >= yD_of_x(x) if x >= x_star
#    Combined logically:
left_mask_B_2  = (X_2 <= x_star_2) & (Y_2 >= y_star_2)
right_mask_B_2 = (X_2 >= x_star_2) & (Y_2 >= yD_of_x(X_2))
B_mask_2 = left_mask_B_2 | right_mask_B_2

# 3) Define region A:
#    "right of line E" => x >= x_star if y <= y_star,
#    "right of line D" => x >= xD_of_y(y) if y >= y_star
#    Combined logically:
down_mask_A_2  = (Y_2 <= y_star_2) & (X_2 >= x_star_2)
up_mask_A_2    = (Y_2 >= y_star_2) & (X_2 >= xD_of_y(Y_2))
A_mask_2 = down_mask_A_2 | up_mask_A_2

# 4) Estimate E[Y | B]
Y_in_B_2 = Y_2[B_mask_2]
meanY_in_B_2 = np.mean(Y_in_B_2) if len(Y_in_B_2) > 0 else float('nan')

# 5) Estimate E[X | A]
X_in_A_2 = X_2[A_mask_2]
meanX_in_A_2 = np.mean(X_in_A_2) if len(X_in_A_2) > 0 else float('nan')

print("Share of X in A =", len(X_in_A_2)/N)
print("Share of Y in B =", len(Y_in_B_2)/N)
print("x* =", x_star_2)
print("y* =", y_star_2)
print("E(x) in A =", meanX_in_A_2)
print("E(y) in B =", meanY_in_B_2)



Share of X in A = 0.391681
Share of Y in B = 0.119893
x* = 1.191
y* = 1.193
E(x) in A = 1.999553962971532
E(y) in B = 1.9968068038928537


In [6]:
############# ITERATIVE SOLUTION #############

# Initialize x_star and y_star
x_star_3 = 1.1
y_star_3 = 1.1

# Define the target mean value
target_mean_3 = 2.0

# Define a tolerance for convergence
tolerance_3 = 1e-3

# Define a maximum number of iterations to prevent infinite loops
max_iterations_3 = 1000

# Function to update x_star and y_star
def update_stars(x_star, y_star, meanX_in_A, meanY_in_B, learning_rate=0.04):
    x_star += learning_rate * (target_mean_3 - meanX_in_A)
    y_star += learning_rate * (target_mean_3 - meanY_in_B)
    return x_star, y_star

# Iteratively update x_star and y_star until convergence

for iteration_3 in range(max_iterations_3):
    # Update slope_D based on current x_star and y_star
    slope_D_3 = (target_mean_3 - y_star_3) / (target_mean_3 - x_star_3)
    
    def yD_of_x(x):
        return y_star_3 + slope_D_3 * (x - x_star_3)
    
    def xD_of_y(y):
        return x_star_3 + (1./slope_D_3) * (y - y_star_3)
    
    # Define region B
    left_mask_B_3  = (X_2 <= x_star_3) & (Y_2 >= y_star_3)
    right_mask_B_3 = (X_2 >= x_star_3) & (Y_2 >= yD_of_x(X_2))
    B_mask_3 = left_mask_B_3 | right_mask_B_3
    
    # Define region A
    down_mask_A_3  = (Y_2 <= y_star_3) & (X_2 >= x_star_3)
    up_mask_A_3    = (Y_2 >= y_star_3) & (X_2 >= xD_of_y(Y_2))
    A_mask_3 = down_mask_A_3 | up_mask_A_3
    
    # Estimate E[Y | B]
    Y_in_B_3 = Y_2[B_mask_3]
    meanY_in_B_3 = np.mean(Y_in_B_3) if len(Y_in_B_3) > 0 else float('nan')
    
    # Estimate E[X | A]
    X_in_A_3 = X_2[A_mask_3]
    meanX_in_A_3 = np.mean(X_in_A_3) if len(X_in_A_3) > 0 else float('nan')
    
    # Check for convergence
    if abs(meanY_in_B_3 - target_mean_3) < tolerance_3 and abs(meanX_in_A_3 - target_mean_3) < tolerance_3:
        break
    
    # Update x_star and y_star
    x_star_3, y_star_3 = update_stars(x_star_3, y_star_3, meanX_in_A_3, meanY_in_B_3)

print("ITERATIONS ON CORRELATED NORMALS")
print("")
print("Converged after", iteration_3 + 1, "iterations")
print("Share of X in A =", len(X_in_A_3)/N)
print("Share of Y in B =", len(Y_in_B_3)/N)
print("x* =", x_star_3)
print("y* =", y_star_3)
print("E(x) in A =", meanX_in_A_3)
print("E(y) in B =", meanY_in_B_3)




ITERATIONS ON CORRELATED NORMALS

Converged after 163 iterations
Share of X in A = 0.3916656666666667
Share of Y in B = 0.11983266666666667
x* = 1.1910787241786869
y* = 1.1935377302301617
E(x) in A = 1.9990266553906717
E(y) in B = 1.9990332544584042


In [7]:
############# NEGATIVE CORRELATION #############

import numpy as np

# Fixed random seed for reproducibility
np.random.seed(12345)

# Number of samples
N = 3_000_000

# Means, stdevs
mu_x_4, mu_y_4 = 1.2, 1.1
sigma_x_4, sigma_y_4 = 1.0, 1.0
rho_4=-0.99

# Proposed solution
x_star_4 = 1.015
y_star_4 = 1.065
c_0_4 = 2

# Slope of line D that passes through (x_star, y_star) and (2,2)
#   slope = (2 - y_star)/(2 - x_star)
slope_D_4 = (c_0_4 - y_star_4) / (c_0_4 - x_star_4)

def yD_of_x(x):
    return y_star_4 + slope_D_4 * (x - x_star_4)
def xD_of_y(y):
    # slope = dy/dx => dx/dy = 1/slope
    return x_star_4 + (1./slope_D_4) * (y - y_star_4)

# 1) Generate random sample from the correlated normals

cov_4 = np.array([[sigma_x_4**2, rho_4*sigma_x_4*sigma_y_4], [rho_4*sigma_x_4*sigma_y_4, sigma_y_4**2]])
mean_4 = np.array([mu_x_4, mu_y_4])
X_4, Y_4 = np.random.multivariate_normal(mean_4, cov_4, N).T




# 2) Define region B: 
#    "above line F" => y >= y_star if x <= x_star,
#    "above line D" => y >= yD_of_x(x) if x >= x_star
#    Combined logically:
left_mask_B_4  = (X_4 <= x_star_4) & (Y_4 >= y_star_4)
right_mask_B_4 = (X_4 >= x_star_4) & (Y_4 >= yD_of_x(X_4))
B_mask_4 = left_mask_B_4 | right_mask_B_4

# 3) Define region A:
#    "right of line E" => x >= x_star if y <= y_star,
#    "right of line D" => x >= xD_of_y(y) if y >= y_star
#    Combined logically:
down_mask_A_4  = (Y_4 <= y_star_4) & (X_4 >= x_star_4)
up_mask_A_4    = (Y_4 >= y_star_4) & (X_4 >= xD_of_y(Y_4))
A_mask_4 = down_mask_A_4 | up_mask_A_4

# 4) Estimate E[Y | B]
Y_in_B_4 = Y_4[B_mask_4]
meanY_in_B_4 = np.mean(Y_in_B_4) if len(Y_in_B_4) > 0 else float('nan')

# 5) Estimate E[X | A]
X_in_A_4 = X_4[A_mask_4]
meanX_in_A_4 = np.mean(X_in_A_4) if len(X_in_A_4) > 0 else float('nan')

print("Share of X in A =", len(X_in_A_4)/N)
print("Share of Y in B =", len(Y_in_B_4)/N)
print("x* =", x_star_4)
print("y* =", y_star_4)
print("E(x) in A =", meanX_in_A_4)
print("E(y) in B =", meanY_in_B_4)



Share of X in A = 0.528269
Share of Y in B = 0.470284
x* = 1.015
y* = 1.065
E(x) in A = 1.9519922012286823
E(y) in B = 1.9429191333746652


In [8]:
############# ITERATIVE SOLUTION #############

# Initialize x_star and y_star
x_star_5 = 1.1
y_star_5 = 1.1

# Define the target mean value
target_mean_5 = 2.0

# Define a tolerance for convergence
tolerance_5 = 1e-3

# Define a maximum number of iterations to prevent infinite loops
max_iterations_5 = 1000

# Function to update x_star and y_star

def update_stars(x_star, y_star, meanX_in_A, meanY_in_B, learning_rate=0.1):

    x_star += learning_rate * (target_mean_5 - meanX_in_A)
    y_star += learning_rate * (target_mean_5 - meanY_in_B)
    return x_star, y_star

# Iteratively update x_star and y_star until convergence

for iteration_5 in range(max_iterations_5):
    # Update slope_D based on current x_star and y_star
    slope_D_5 = (target_mean_5 - y_star_5) / (target_mean_5 - x_star_5)
    
    def yD_of_x(x):
        return y_star_5 + slope_D_5 * (x - x_star_5)
    
    def xD_of_y(y):
        return x_star_5 + (1./slope_D_5) * (y - y_star_5)
    
    # Define region B
    left_mask_B_5  = (X_4 <= x_star_5) & (Y_4 >= y_star_5)
    right_mask_B_5 = (X_4 >= x_star_5) & (Y_4 >= yD_of_x(X_4))
    B_mask_5 = left_mask_B_5 | right_mask_B_5
    
    # Define region A
    down_mask_A_5  = (Y_4 <= y_star_5) & (X_4 >= x_star_5)
    up_mask_A_5    = (Y_4 >= y_star_5) & (X_4 >= xD_of_y(Y_4))
    A_mask_5 = down_mask_A_5 | up_mask_A_5
    
    # Estimate E[Y | B]
    Y_in_B_5 = Y_4[B_mask_5]
    meanY_in_B_5 = np.mean(Y_in_B_5) if len(Y_in_B_5) > 0 else float('nan')
    
    # Estimate E[X | A]
    X_in_A_5 = X_4[A_mask_5]
    meanX_in_A_5 = np.mean(X_in_A_5) if len(X_in_A_5) > 0 else float('nan')
    
    # Check for convergence
    if abs(meanY_in_B_5 - target_mean_5) < tolerance_5 and abs(meanX_in_A_5 - target_mean_5) < tolerance_5:
        break
    
    # Update x_star and y_star
    x_star_5, y_star_5 = update_stars(x_star_5, y_star_5, meanX_in_A_5, meanY_in_B_5)

print("ITERATIONS ON NEGATIVE CORRELATED NORMALS")
print("")
print("Converged after", iteration_5 + 1, "iterations")
print("Share of X in A =", len(X_in_A_5)/N)
print("Share of Y in B =", len(Y_in_B_5)/N)
print("x* =", x_star_5)
print("y* =", y_star_5)
print("E(x) in A =", meanX_in_A_5)
print("E(y) in B =", meanY_in_B_5)




ITERATIONS ON NEGATIVE CORRELATED NORMALS

Converged after 81 iterations
Share of X in A = 0.4994843333333333
Share of Y in B = 0.4376386666666667
x* = 1.1962650611742887
y* = 1.250792645768163
E(x) in A = 1.9990440695090503
E(y) in B = 1.999013559601772
