In [1]:
import cvxpy as cp
import numpy as np
import dsp
import mosek
import scipy.stats as stats
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from numpy.linalg import det, solve
from sklearn.model_selection import train_test_split
from multiprocessing import Pool
from tqdm import tqdm  # Import tqdm for progress display

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#CVXPY call this the logistic function
def logistic(x):
    return np.log(1 + np.exp(x))

def sigmoid(x):
    return 1/(1+np.exp(-x))

In [3]:


def DRO(X, y, K, max_iter=1000, tol=1e-6, eps=0.5, P_list=None, theta0_star=None, model="CN"):
    """
    Parameters:
    X: list of numpy arrays, each element is a matrix corresponding to different domains.
    y: list of numpy arrays, response variables for each domain (not used in this function).
    K: int, number of classes.
    max_iter: int, maximum number of iterations.
    tol: float, tolerance for convergence.
    eps: float, learning rate for gradient descent.
    P_list: list of numpy arrays, probabilities for logistic regression.
    theta0_star: numpy array, initial theta.
    model: str, one of "ID" or "CN", determining the update rule for theta.
    
    Returns:
    A dictionary with theta, gamma, and the number of iterations.
    """
    
    # 检查 model 参数是否在允许的选项中
    if model not in ["ID", "CN"]:
        raise ValueError(f"Invalid model '{model}'. Choose either 'ID' or 'CN'.")
    
    L = len(X)
    p = X[0].shape[1]
    theta0 = np.zeros((p, K))
    theta = theta0
    n_Q = sum([x.shape[0] for x in X])
    X_mat = np.vstack(X)  # Combine all X matrices into one big matrix
    
    gamma0 = np.ones(L) / L

    # Step 3: Main loop
    mu_list = []
    for l in range(L):
        Xl = X[l]
        Pl = P_list[l]
        mu_list.append(Xl.T @ Pl / n_Q)
    
    mu_list_k = [np.column_stack([mu_list[l][:, k] for l in range(L)]) for k in range(K)]
    
    iter = 0
    s = 1
    gamma = gamma0
    theta = theta0_star if theta0_star is not None else np.zeros((p, K))
    
    while s > tol and iter < max_iter:
        iter += 1
        
        # Step 3.1: Compute Exp and q_mat
        Exp = np.exp(X_mat @ theta - np.max(X_mat @ theta, axis=1, keepdims=True))
        Sexp = np.sum(Exp, axis=1)
        q_mat = Exp / Sexp[:, None]
        
        # Step 3.2: Compute v_mat
        v_mat = X_mat.T @ q_mat / n_Q
        
        # Step 3.3: Compute Sig_list
        Sig_list = []
        for k in range(K):
            Dkk = np.diag(q_mat[:, k] - q_mat[:, k]**2)
            Sig_k = X_mat.T @ Dkk @ X_mat / n_Q
            while det(Sig_k) <= 1e-2:
                Sig_k += 1e-2 * np.eye(p)
            Sig_list.append(Sig_k)
        
        # Step 3.4: Update gamma using CVXPY
        A = np.zeros((L, L))
        for k in range(K):
            A += mu_list_k[k].T @ solve(Sig_list[k], mu_list_k[k])
        
        while det(A) <= 1e-2:
            A += 1e-3 * np.eye(L)
        
        b = np.zeros(L)
        for k in range(K):
            b += mu_list_k[k].T @ solve(Sig_list[k], v_mat[:, k])
        
        gamma_var = cp.Variable(L)
        objective = cp.Minimize(0.5 * cp.quad_form(gamma_var, A) - b @ gamma_var)
        constraints = [cp.sum(gamma_var) == 1, gamma_var >= 0]
        prob = cp.Problem(objective, constraints)
        prob.solve()
        gamma = gamma_var.value
        
        # Step 3.5: Update theta based on the selected model
        theta_old = theta.copy()
        for k in range(1, K):
            grad = mu_list_k[k] @ gamma - v_mat[:, k]
            if model == "CN":
                theta[:, k] += eps * solve(Sig_list[k], grad)
            elif model == "ID":
                theta[:, k] += eps * grad
        
        # Step 3.6: Update s (convergence check)
        s = np.sqrt(np.sum((theta_old - theta) ** 2))
    
    gamma = np.array(gamma).flatten()
    return {'theta': theta, 'gamma': gamma, 'iter': iter}

# 示例用法
# 定义一些模拟数据
#X = [np.random.randn(10, 3), np.random.randn(15, 3)]  # 两个域的特征矩阵
#y = [np.random.randint(0, 2, 10), np.random.randint(0, 2, 15)]  # 响应变量 (在本函数中不使用)
#P_list = [np.random.rand(10, 2), np.random.rand(15, 2)]  # 对数几率模型的概率
#theta0_star = np.random.rand(3, 2)  # 初始 theta

# 选择模型 "CN" 或 "ID"
#result_id = DRO(X, y, 2, P_list=P_list, theta0_star=theta0_star, model="ID")
#print(result_id)
#result_cn = DRO(X, y, 2, P_list=P_list, theta0_star=theta0_star, model="CN")
#print(result_cn)

# 示例调用
#X = [np.random.randn(100, 10) for _ in range(3)]
#y = [np.random.randint(0, 2, 100) for _ in range(3)]
#K = 3
#P_list = [np.random.rand(100, K) for _ in range(3)]
#theta0_star = np.random.rand(10, K)
#result_id = DRO(X, y, K, P_list=P_list, theta0_star=theta0_star,model="ID")
#print(result_id)
#result_cn = DRO(X, y, K, P_list=P_list, theta0_star=theta0_star,model="CN")
#print(result_cn)


In [4]:
def calculate_predictions(X, theta):
    if np.ndim(theta) == 1:
        theta_column = np.reshape(theta,(len(theta),1))
    else:
        theta_column = theta
    zeros_column = np.zeros((len(theta),1))
    theta_cc = np.hstack([zeros_column,theta_column]) 
    # calculate logits (n * K)
    logits = np.dot(X, theta_cc)
    
    # calculate softmax probability p_pred (n * K)
    exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))  
    p_pred = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
    
    # classification prediction y_pred (n, )
    y_pred = np.argmax(p_pred, axis=1)
    
    return p_pred, y_pred

# eg
# X = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) 
# theta = np.array([[0.5, -0.5], [1.0, -1.0]])  
# theta = np.array([0.5, -0.5]) 
# p_pred, y_pred = calculate_predictions(X, theta)
# print("p_pred:\n", p_pred)
# print("y_pred:\n", y_pred) # from [0,1,2]

In [5]:
def cross_entropy_loss(y_true, y_pred_prob):
    """
    Calculate the cross-entropy loss.

    Parameters:
    y_true: numpy array of shape (n_samples, 1), the true labels (0 or 1).
    y_pred_prob: numpy array of shape (n_samples, 2), the predicted probabilities for each class (class 0 and class 1).

    Returns:
    Cross-entropy loss as a float.
    """
    # Ensure that y_pred_prob values are within (0, 1) to avoid log overflow.
    epsilon = 1e-15
    y_pred_prob = np.clip(y_pred_prob, epsilon, 1 - epsilon)
    
    # Extract the probabilities corresponding to the positive class (class 1).
    y_pred_prob_positive = y_pred_prob[:, 1]
    
    # Calculate the cross-entropy loss.
    loss = -np.mean(y_true * np.log(y_pred_prob_positive) + (1 - y_true) * np.log(1 - y_pred_prob_positive))
    
    return loss

def zero_one_loss(y_true, y_pred):
    """
    Calculate the 0-1 loss (error rate).

    Parameters:
    y_true: numpy array of shape (n_samples, 1), the true labels.
    y_pred: numpy array of shape (n_samples, 1), the predicted labels.

    Returns:
    0-1 loss (error rate) as a float.
    """
    # Compute the error rate.
    error_rate = np.mean(y_true != y_pred)
    
    return error_rate

In [7]:

Error_var_id = []
Sd_var_id = []
Error_var_cn = []
Sd_var_cn = []

zero_one_loss_id_dsp_test = []
zero_one_loss_cn_dsp_test = []
zero_one_loss_id_dsp_train = []
zero_one_loss_cn_dsp_train = []

cross_entropy_loss_id_dsp_test = []
cross_entropy_loss_cn_dsp_test = []
cross_entropy_loss_id_dsp_train = []
cross_entropy_loss_cn_dsp_train = []


size = 20 
base = 500
rep = 10
p = 3

beta_1 = np.array([0.9,0.2,0.2])
beta_2 = np.array([0.2,0.9,0.2])
beta_3 = np.array([0.2,0.2,0.9])

for j in range(size):
    print("j= ",j)
    importance = [] ## 
    weighting = [] ## gamma
    variable = [] ## theta
    wei_id = []
    wei_cn = []
    var_id = []
    var_cn = []
    yPred_id = []
    yPred_cn = []
    yPred_dsp = []
    pPred_id = []
    pPred_cn = []
    pPred_dsp = []
    y_test_true = []
    y_train_true = []
    y_Pred_id_list = []
    y_Pred_cn_list = []
    y_Pred_dsp_list = []
    y_True_list = []
    p_Pred_id_list = []
    p_Pred_cn_list = []
    p_Pred_dsp_list = []
    yFit_dsp = []
    yFit_id = []
    yFit_cn = []
    pFit_dsp = []
    pFit_id = []
    pFit_cn = []
    for i in range(rep):
        n = base*(j+1)
        X_1 = np.random.uniform(-10, 10, (n, 3))
        X_2 = np.random.uniform(-10, 10, (n, 3))
        X_3 = np.random.uniform(-10, 10, (n, 3))

        logits_1 = X_1.dot(beta_1)
        probs_1 = sigmoid(logits_1)
        y_1 = np.random.binomial(1, probs_1)

        logits_2 = X_2.dot(beta_2)
        probs_2 = sigmoid(logits_2)
        y_2 = np.random.binomial(1, probs_2)

        logits_3 = X_3.dot(beta_3)
        probs_3 = sigmoid(logits_3)
        y_3 = np.random.binomial(1, probs_3)

        X = [X_1,X_2,X_3]
        X_mat = np.vstack(X)
        print(np.shape(X_mat))
        y = [y_1,y_2,y_3]

        # data splitting
        X_train_list = []
        X_test_list = []
        y_train_list = []
        y_test_list = []

        for X_i, y_i in zip(X, y):
            X_train, X_test, y_train, y_test = train_test_split(X_i, y_i, test_size=0.3, random_state=42)
            X_train_list.append(X_train)
            X_test_list.append(X_test)
            y_train_list.append(y_train)
            y_test_list.append(y_test)

        y_test_true.append(np.vstack(y_test_list))
        y_train_true.append(np.vstack(y_train_list))
        X_test_mat = np.vstack(X_test_list)
        print(np.shape(X_test_mat))
        X_train_mat = np.vstack(X_train_list)
        print(np.shape(X_train_mat))
        
        
        model1=LogisticRegression()
        model2=LogisticRegression()
        model3=LogisticRegression()
        model1.fit(X_train_list[0], y_train_list[0])
        model2.fit(X_train_list[1], y_train_list[1])
        model3.fit(X_train_list[2], y_train_list[2])
        phat_1=model1.predict_proba(X_train_list[0])
        phat_2=model2.predict_proba(X_train_list[1])
        phat_3=model3.predict_proba(X_train_list[2])


        P_list = [phat_1,phat_2,phat_3]
        K=2
        theta0_star = np.random.rand(p, K)
        ## DRO sol
        res_id = DRO(X_train_list, y_train_list, K, P_list=P_list, theta0_star=theta0_star, model="ID")
        res_cn = DRO(X_train_list, y_train_list, K, P_list=P_list, theta0_star=theta0_star, model="CN")
        var_id.append(res_id["theta"][:, 1:])
        var_cn.append(res_cn["theta"][:, 1:])
        wei_id.append(res_id["gamma"])
        wei_cn.append(res_cn["gamma"])
        p_pred,y_pred = calculate_predictions(X_test_mat, res_id["theta"][:, 1:])
        ppred_id = p_pred
        pPred_id.append(ppred_id)
        ypred_id = y_pred
        yPred_id.append(ypred_id)
        p_pred,y_pred = calculate_predictions(X_train_mat, res_id["theta"][:, 1:])
        pfit_id = p_pred
        pFit_id.append(pfit_id)
        yfit_id = y_pred
        yFit_id.append(yfit_id)
        p_pred,y_pred = calculate_predictions(X_test_mat, res_cn["theta"][:, 1:])
        ypred_cn = y_pred
        yPred_cn.append(ypred_cn)
        ppred_cn = p_pred
        pPred_cn.append(ppred_cn)
        p_pred,y_pred = calculate_predictions(X_train_mat, res_cn["theta"][:, 1:])
        yfit_cn = y_pred
        yFit_cn.append(yfit_cn)
        pfit_cn = p_pred
        pFit_cn.append(pfit_cn)
        
        
        ## DSP sol
        q = cp.Variable(3, nonneg=True) # gamma
        b = cp.Variable(3) # theta
        b_column = cp.reshape(b, (3, 1))
        zero_column = np.zeros((3, 1))
        B = cp.hstack([zero_column,b_column])

        logodds_1 = X_train_list[0] @ B
        logodds_2 = X_train_list[1] @ B
        logodds_3 = X_train_list[2] @ B
        logodds_vs = cp.vstack([X_train_list[0],X_train_list[1],X_train_list[2]])@ B 


        imp_1 = cp.sum(cp.multiply(phat_1, logodds_1))

        imp_2 = cp.sum(cp.multiply(phat_2, logodds_2))

        imp_3 = cp.sum(cp.multiply(phat_3, logodds_3))

        f_1 = dsp.saddle_inner(q,cp.hstack([imp_1,imp_2,imp_3])/ (0.7*n))

        f_2 = cp.sum(cp.log_sum_exp(logodds_vs,axis=1))/(n*3*0.7)

        f=f_1-f_2

        obj = dsp.MinimizeMaximize(f)
        constraints = [cp.sum(q) == 1]
        prob = dsp.SaddlePointProblem(obj,constraints,[q],[b])

        try:
            prob.solve(solver="MOSEK")
            importance.append(prob.value)
            weighting.append(q.value)
            variable.append(b.value)
            p_pred,y_pred = calculate_predictions(X_test_mat, b.value)
            ypred_dsp = y_pred
            yPred_dsp.append(ypred_dsp)
            ppred_dsp = p_pred
            pPred_dsp.append(ppred_dsp)
            p_pred,y_pred = calculate_predictions(X_train_mat, b.value)
            yfit_dsp = y_pred
            yFit_dsp.append(yfit_dsp)
            pfit_dsp = p_pred
            pFit_dsp.append(pfit_dsp)
            print(i)

            #site_1 = y_1*(X_1 @ b.value) - logistic(X_1 @ b.value) - y_1*np.log(p1_bar) - (1-y_1)*np.log(1-p1_bar)
            #site_2 = y_2*(X_2 @ b.value) - logistic(X_2 @ b.value) - y_2*np.log(p2_bar) - (1-y_2)*np.log(1-p2_bar)
            #site_3 = y_3*(X_3 @ b.value) - logistic(X_3 @ b.value) - y_3*np.log(p3_bar) - (1-y_3)*np.log(1-p3_bar)

            #site_importance = np.vstack([site_1,site_2,site_3])
            #site_importance = site_importance.T
            #cov = np.cov(site_importance, rowvar = False)

            #var = np.dot(q.value, np.dot(cov, q.value.T))
            #std_clt = np.sqrt(var/1000)
            #stds.append(std_clt)

        except Exception as e:
            print(f"Error on iteration {i}: {e}")
            continue
     

    
    
    # parameter error
    error_var_id = np.mean((np.hstack(var_id).T-np.vstack(variable))**2)
    Error_var_id.append(error_var_id)
    sd_var_id = np.sqrt(np.var(np.hstack(var_id)))
    Sd_var_id.append(sd_var_id)
    error_var_cn = np.mean((np.hstack(var_cn).T-np.vstack(variable))**2)
    Error_var_cn.append(error_var_cn)
    sd_var_cn = np.sqrt(np.var(np.hstack(var_cn)))
    Sd_var_cn.append(sd_var_cn)
    
    # 0-1 loss
    zero_one_loss_id_dsp_test.append(zero_one_loss(np.hstack(y_test_list), yPred_id[-1]))
    zero_one_loss_cn_dsp_test.append(zero_one_loss(np.hstack(y_test_list), yPred_cn[-1]))
    zero_one_loss_id_dsp_train.append(zero_one_loss(np.hstack(y_train_list), yFit_id[-1]))
    zero_one_loss_cn_dsp_train.append(zero_one_loss(np.hstack(y_train_list), yFit_cn[-1]))

    # cross-entropy loss
    cross_entropy_loss_id_dsp_test.append(cross_entropy_loss(np.hstack(y_test_list), np.vstack(pPred_id)))
    cross_entropy_loss_cn_dsp_test.append(cross_entropy_loss(np.hstack(y_test_list), np.vstack(pPred_cn)))
    cross_entropy_loss_id_dsp_train.append(cross_entropy_loss(np.hstack(y_train_list), np.vstack(pFit_id)))
    cross_entropy_loss_cn_dsp_train.append(cross_entropy_loss(np.hstack(y_train_list), np.vstack(pFit_cn)))


j=  0
(1500, 3)
(450, 3)
(1050, 3)
0
(1500, 3)
(450, 3)
(1050, 3)
1
(1500, 3)
(450, 3)
(1050, 3)
2
(1500, 3)
(450, 3)
(1050, 3)
3
(1500, 3)
(450, 3)
(1050, 3)
4
(1500, 3)
(450, 3)
(1050, 3)
5
(1500, 3)
(450, 3)
(1050, 3)
6
(1500, 3)
(450, 3)
(1050, 3)
7
(1500, 3)
(450, 3)
(1050, 3)
8
(1500, 3)
(450, 3)
(1050, 3)
9


ValueError: operands could not be broadcast together with shapes (450,) (4500,) 

In [8]:
# Function to run experiments for a given sample size
def run_experiment(j):
    print(f"Running experiment for sample size index {j}")
    
    # Result containers for each repetition
    var_id, var_cn, variable = [], [], []
    yPred_id, yPred_cn, yPred_dsp = [], [], []
    pPred_id, pPred_cn, pPred_dsp = [], [], []
    yFit_id, yFit_cn, yFit_dsp = [], [], []
    pFit_id, pFit_cn, pFit_dsp = [], [], []
    y_test_true, y_train_true = [], []

    for i in range(rep):
        n = base * (j + 1)
        X_1 = np.random.uniform(-10, 10, (n, 3))
        X_2 = np.random.uniform(-10, 10, (n, 3))
        X_3 = np.random.uniform(-10, 10, (n, 3))

        logits_1 = X_1.dot(beta_1)
        probs_1 = sigmoid(logits_1)
        y_1 = np.random.binomial(1, probs_1)

        logits_2 = X_2.dot(beta_2)
        probs_2 = sigmoid(logits_2)
        y_2 = np.random.binomial(1, probs_2)

        logits_3 = X_3.dot(beta_3)
        probs_3 = sigmoid(logits_3)
        y_3 = np.random.binomial(1, probs_3)

        X = [X_1, X_2, X_3]
        y = [y_1, y_2, y_3]

        # Data splitting
        X_train_list, X_test_list, y_train_list, y_test_list = [], [], [], []

        for X_i, y_i in zip(X, y):
            X_train, X_test, y_train, y_test = train_test_split(X_i, y_i, test_size=0.3, random_state=42)
            X_train_list.append(X_train)
            X_test_list.append(X_test)
            y_train_list.append(y_train)
            y_test_list.append(y_test)

        y_test_true.append(np.vstack(y_test_list))
        y_train_true.append(np.vstack(y_train_list))
        X_test_mat = np.vstack(X_test_list)
        X_train_mat = np.vstack(X_train_list)

        # Train logistic regression models
        model1 = LogisticRegression()
        model2 = LogisticRegression()
        model3 = LogisticRegression()
        model1.fit(X_train_list[0], y_train_list[0])
        model2.fit(X_train_list[1], y_train_list[1])
        model3.fit(X_train_list[2], y_train_list[2])
        phat_1 = model1.predict_proba(X_train_list[0])
        phat_2 = model2.predict_proba(X_train_list[1])
        phat_3 = model3.predict_proba(X_train_list[2])

        P_list = [phat_1, phat_2, phat_3]
        K = 2
        theta0_star = np.random.rand(p, K)

        # DRO solutions
        res_id = DRO(X_train_list, y_train_list, K, P_list=P_list, theta0_star=theta0_star, model="ID")
        res_cn = DRO(X_train_list, y_train_list, K, P_list=P_list, theta0_star=theta0_star, model="CN")
        var_id.append(res_id["theta"][:, 1:])
        var_cn.append(res_cn["theta"][:, 1:])

        # ID model predictions
        p_pred, y_pred = calculate_predictions(X_test_mat, res_id["theta"][:, 1:])
        pPred_id.append(p_pred)
        yPred_id.append(y_pred)
        p_pred, y_pred = calculate_predictions(X_train_mat, res_id["theta"][:, 1:])
        pFit_id.append(p_pred)
        yFit_id.append(y_pred)

        # CN model predictions
        p_pred, y_pred = calculate_predictions(X_test_mat, res_cn["theta"][:, 1:])
        pPred_cn.append(p_pred)
        yPred_cn.append(y_pred)
        p_pred, y_pred = calculate_predictions(X_train_mat, res_cn["theta"][:, 1:])
        pFit_cn.append(p_pred)
        yFit_cn.append(y_pred)

        # DSP solution
        q = cp.Variable(3, nonneg=True)  # gamma
        b = cp.Variable(3)  # theta
        b_column = cp.reshape(b, (3, 1))
        zero_column = np.zeros((3, 1))
        B = cp.hstack([zero_column, b_column])

        logodds_1 = X_train_list[0] @ B
        logodds_2 = X_train_list[1] @ B
        logodds_3 = X_train_list[2] @ B
        logodds_vs = cp.vstack([X_train_list[0], X_train_list[1], X_train_list[2]]) @ B 

        imp_1 = cp.sum(cp.multiply(phat_1, logodds_1))
        imp_2 = cp.sum(cp.multiply(phat_2, logodds_2))
        imp_3 = cp.sum(cp.multiply(phat_3, logodds_3))

        f_1 = cp.sum(cp.hstack([imp_1, imp_2, imp_3])) / (0.7 * n)
        f_2 = cp.sum(cp.log_sum_exp(logodds_vs, axis=1)) / (n * 3 * 0.7)
        f = f_1 - f_2

        obj = cp.Minimize(f)
        constraints = [cp.sum(q) == 1]
        prob = cp.Problem(obj, constraints)

        try:
            prob.solve(solver="MOSEK")
            variable.append(b.value)
            p_pred, y_pred = calculate_predictions(X_test_mat, b.value)
            pPred_dsp.append(p_pred)
            yPred_dsp.append(y_pred)
            p_pred, y_pred = calculate_predictions(X_train_mat, b.value)
            pFit_dsp.append(p_pred)
            yFit_dsp.append(y_pred)
        except Exception as e:
            print(f"Error on DSP solve iteration {i}: {e}")
            continue

    # Calculate errors and losses
    error_var_id = np.mean((np.hstack(var_id).T - np.vstack(variable))**2)
    error_var_cn = np.mean((np.hstack(var_cn).T - np.vstack(variable))**2)
    sd_var_id = np.sqrt(np.var(np.hstack(var_id)))
    sd_var_cn = np.sqrt(np.var(np.hstack(var_cn)))

    zero_one_loss_id_dsp_test = zero_one_loss(np.hstack(y_test_true), yPred_id[-1])
    zero_one_loss_cn_dsp_test = zero_one_loss(np.hstack(y_test_true), yPred_cn[-1])
    zero_one_loss_id_dsp_train = zero_one_loss(np.hstack(y_train_true), yFit_id[-1])
    zero_one_loss_cn_dsp_train = zero_one_loss(np.hstack(y_train_true), yFit_cn[-1])

    cross_entropy_loss_id_dsp_test = cross_entropy_loss(np.hstack(y_test_true), np.vstack(pPred_id))
    cross_entropy_loss_cn_dsp_test = cross_entropy_loss(np.hstack(y_test_true), np.vstack(pPred_cn))
    cross_entropy_loss_id_dsp_train = cross_entropy_loss(np.hstack(y_train_true), np.vstack(pFit_id))
    cross_entropy_loss_cn_dsp_train = cross_entropy_loss(np.hstack(y_train_true), np.vstack(pFit_cn))

    return {
            "Error_var_id": error_var_id,
            "Sd_var_id": sd_var_id,
            "Error_var_cn": error_var_cn,
            "Sd_var_cn": sd_var_cn,
            "Zero_one_loss_id_dsp_test": zero_one_loss_id_dsp_test,
            "Zero_one_loss_cn_dsp_test": zero_one_loss_cn_dsp_test,
            "Zero_one_loss_id_dsp_train": zero_one_loss_id_dsp_train,
            "Zero_one_loss_cn_dsp_train": zero_one_loss_cn_dsp_train,
            "Cross_entropy_loss_id_dsp_test": cross_entropy_loss_id_dsp_test,
            "Cross_entropy_loss_cn_dsp_test": cross_entropy_loss_cn_dsp_test,
            "Cross_entropy_loss_id_dsp_train": cross_entropy_loss_id_dsp_train,
            "Cross_entropy_loss_cn_dsp_train": cross_entropy_loss_cn_dsp_train,
        }





In [9]:
# Main program
if __name__ == "__main__":
    # Parameters
    size = 20
    base = 500
    rep = 10
    p = 3
    beta_1 = np.array([0.9, 0.2, 0.2])
    beta_2 = np.array([0.2, 0.9, 0.2])
    beta_3 = np.array([0.2, 0.2, 0.9])

    # List to store results
    results = []
    print("set up")
    # Setup tqdm progress bar
    with tqdm(total=size * rep) as pbar:
        def update(*a):
            pbar.update()

        # Function to run experiments in parallel
        def run_experiments_parallel():
            with Pool(processes=None) as pool:
                results = list(tqdm(pool.imap_unordered(run_experiment, range(size)), total=size))
            return results

        # Run experiments
        print("start")
        results = run_experiments_parallel()

    # Print or use results as needed
    print("Results:", results)

set up


  0%|          | 0/200 [00:00<?, ?it/s]

start




In [52]:
error_var_id = np.mean((np.hstack(var_id).T-np.vstack(variable))**2)/(0.7*n)/3
sd_var_id = np.sqrt(np.var(np.hstack(var_id))/(0.7*n)/3)
print("sd of id: ",sd_var_id)
error_var_cn = np.mean((np.hstack(var_cn).T-np.vstack(variable))**2)/(0.7*n)/3
sd_var_cn = np.sqrt(np.var(np.hstack(var_cn))/(0.7*n)/3)
print("sd of cn: ",sd_var_cn)
print("L-2 error of id: ",error_var_id)
print("L-2 error of cn: ",error_var_cn)

sd of id:  0.006925646434470042
sd of cn:  0.006925646434470042
L-2 error of id:  0.00012396863395844335
L-2 error of cn:  0.00012396863395844335


In [71]:
y_Pred_id = np.vstack(yPred_id) # (300*3*10)*1
#y_Pred_id = np.reshape(y_Pred_id.T,(30000,1))
y_Pred_cn = np.vstack(yPred_cn) # (300*3*10)*1
#y_Pred_cn = np.reshape(y_Pred_cn.T,(30000,1))
y_Pred_dsp = np.vstack(yPred_dsp) # (300*3*10)*1 
#y_Pred_dsp = np.reshape(y_Pred_dsp.T,(30000,1))
y_True = np.vstack(y_test_true) # (300*3*10)*1 对

p_Pred_id = np.vstack(pPred_id) # (300*3*10)*2 ？ 30000*2
p_Pred_cn = np.vstack(pPred_cn) # (300*3*10)*2
p_Pred_dsp = np.vstack(pPred_dsp) # (300*3*10)*2


# Cross-Entropy Loss
# cel_id = cross_entropy_loss(y_True, p_Pred_id)
# cel_cn = cross_entropy_loss(y_True, p_Pred_cn)
# cel_dsp = cross_entropy_loss(y_True, p_Pred_dsp)
#cel_id_dsp = cross_entropy_loss(y_Pred_dsp, p_Pred_id)
#cel_cn_dsp = cross_entropy_loss(y_Pred_dsp, p_Pred_cn)
#print("Cross-Entropy Loss, id-dsp: ", cel_id_dsp)
#print("Cross-Entropy Loss, cn-dsp: ", cel_cn_dsp)


# 0-1 loss
bl_id_dsp = zero_one_loss(y_Pred_dsp, y_Pred_id)
bl_cn_dsp = zero_one_loss(y_Pred_dsp, y_Pred_cn)
print("0-1 loss, id-dsp: ", bl_id_dsp)
print("0-1 loss, cn-dsp: ", bl_cn_dsp)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 1500 and the array at index 20 has size 3000

In [76]:
np.shape(y_Pred_id)

(1, 30)

In [None]:
    # new list
    y_Pred_id = np.vstack(yPred_id) # (300*3*10)*1
    #y_Pred_id = np.reshape(y_Pred_id.T,(30000,1))
    y_Pred_cn = np.vstack(yPred_cn) # (300*3*10)*1
    #y_Pred_cn = np.reshape(y_Pred_cn.T,(30000,1))
    y_Pred_dsp = np.vstack(yPred_dsp) # (300*3*10)*1 
    #y_Pred_dsp = np.reshape(y_Pred_dsp.T,(30000,1))
    y_True = np.vstack(y_test_true) # (300*3*10)*1 对

    p_Pred_id = np.vstack(pPred_id) # (300*3*10)*2 ？ 30000*2
    p_Pred_cn = np.vstack(pPred_cn) # (300*3*10)*2
    p_Pred_dsp = np.vstack(pPred_dsp) # (300*3*10)*2
    y_Pred_id_list.append(y_Pred_id )
    y_Pred_cn_list.append(y_Pred_cn)
    y_Pred_dsp_list.append(y_Pred_dsp)
    y_True_list.append(y_True)
    p_Pred_id_list.append(p_Pred_id)
    p_Pred_cn_list.append(p_Pred_cn)
    p_Pred_dsp_list.append(p_Pred_dsp)

In [67]:
y_True = np.vstack(y_test_true) # (300*3*10)*1
np.shape(y_True)
#p_Pred_id = np.vstack(pPred_id) # (300*3*10)*2
#np.shape(p_Pred_id)

(30, 300)

## Purely Homogenerous Regime
L=3,p=4,K=2
证明id足够，其他等价，step 2只有在初值离谱的情况下才有用


In [4]:
importance = [] ## 
weighting = [] ## gamma
variable = [] ## theta
wei_id = []
wei_cn = []
var_id = []
var_cn = []
stds = []

size = 10 
base = 10
rep = 1
p = 4

beta_1 = np.array([0.5,-0.5,1,-1])
beta_3 = beta_2 = beta_1

for j in range(size):
    print("j=",j)
    for i in range(rep):
        n = base*(j+1)
        X_3=X_2=X_1 = np.random.multivariate_normal(np.zeros(p), np.eye(p),n)

        logits_3=logits_2=logits_1 = X_1.dot(beta_1)
        probs_3=probs_2=probs_1 = sigmoid(logits_1)
        y_3=y_2=y_1 = np.random.binomial(1, probs_1)


        model1=LogisticRegression()
        model2=LogisticRegression()
        model3=LogisticRegression()
        model1.fit(X_1, y_1)
        model2.fit(X_2, y_2)
        model3.fit(X_3, y_3)
        phat_1=model1.predict_proba(X_1)
        phat_2=model2.predict_proba(X_2)
        phat_3=model3.predict_proba(X_3)

        X = [X_1,X_2,X_3]
        y = [y_1,y_2,y_3]
        P_list = [phat_1,phat_2,phat_3]
        K=2
        theta0_star = np.random.rand(p, K)
        ## DRO_id sol
        res_id = DRO(X, y, K, P_list=P_list, theta0_star=theta0_star, model="ID")
        res_cn = DRO(X, y, K, P_list=P_list, theta0_star=theta0_star, model="CN")
        var_id.append(res_id["theta"][:, 1:])
        var_cn.append(res_cn["theta"][:, 1:])
        wei_id.append(res_id["gamma"])
        wei_cn.append(res_cn["gamma"])

        ## DSP sol
        q = cp.Variable(3, nonneg=True) # gamma
        b = cp.Variable(p) # theta
        b_column = cp.reshape(b, (p, 1))
        zero_column = np.zeros((p, 1))
        B = cp.hstack([zero_column,b_column])

        logodds_1 = X_1 @ B
        logodds_2 = X_2 @ B
        logodds_3 = X_3 @ B
        logodds_vs = cp.vstack([X_1,X_2,X_3])@ B 


        imp_1 = cp.sum(cp.multiply(phat_1, logodds_1))

        imp_2 = cp.sum(cp.multiply(phat_2, logodds_2))

        imp_3 = cp.sum(cp.multiply(phat_3, logodds_3))

        f_1 = dsp.saddle_inner(q,cp.hstack([imp_1,imp_2,imp_3])/ n)

        f_2 = cp.sum(cp.log_sum_exp(logodds_vs,axis=1))/(n*3)

        f=f_1-f_2

        obj = dsp.MinimizeMaximize(f)
        constraints = [cp.sum(q) == 1]
        prob = dsp.SaddlePointProblem(obj,constraints,[q],[b])

        try:
            prob.solve(solver="MOSEK")
            importance.append(prob.value)
            weighting.append(q.value)
            variable.append(b.value)
            print(i)

            #site_1 = y_1*(X_1 @ b.value) - logistic(X_1 @ b.value) - y_1*np.log(p1_bar) - (1-y_1)*np.log(1-p1_bar)
            #site_2 = y_2*(X_2 @ b.value) - logistic(X_2 @ b.value) - y_2*np.log(p2_bar) - (1-y_2)*np.log(1-p2_bar)
            #site_3 = y_3*(X_3 @ b.value) - logistic(X_3 @ b.value) - y_3*np.log(p3_bar) - (1-y_3)*np.log(1-p3_bar)

            #site_importance = np.vstack([site_1,site_2,site_3])
            #site_importance = site_importance.T
            #cov = np.cov(site_importance, rowvar = False)

            #var = np.dot(q.value, np.dot(cov, q.value.T))
            #std_clt = np.sqrt(var/1000)
            #stds.append(std_clt)

        except Exception as e:
            print(f"Error on iteration {i}: {e}")
            continue

j= 0
0
j= 1
0
j= 2
0
j= 3
0
j= 4
0
j= 5
0
j= 6
0
j= 7
0
j= 8
0
j= 9
0


In [22]:
for j in range(size):
    variable_j = variable[rep*(j-1):(rep*j),:]
    var_id_j = var_id

(100, 4)

Expression(AFFINE, UNKNOWN, ())