In [1]:
# basic (built-in) Python packages
import numpy as np
from numpy.linalg import norm
import pandas as pd
import matplotlib.pyplot as plt

# advanced (built-in) Python packages
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize

# my implemented Python functions and classes
from data.generate_data import generate_data
from model.BaseModel import BaseModel
from model.Initial import Initial
from model.OS import OS
from model.MS import MS
from model.ORACLE_beta import ORACLE_beta
from model.ORACLE_sigma import ORACLE_sigma

In [2]:
def compute_rmse(est, true):
    est = est.ravel()
    true = true.ravel()
    assert est.shape == true.shape
    length = len(est)
    rmse = norm(est - true) / np.sqrt(length)
    return rmse

# Hyperparameters

In [3]:
seed = 1
np.random.seed(seed=seed)

N = 2000        # the size of the unlabeled dataset
r = 0.5
n = int(N * r)            # pilot sample size
alpha = 0.5 # n**(-0.1)  
print(f"alpha={alpha:.4f}")

p = 20          # feature dimension
K = 1           # (K+1) classes
M = int(0.5 * int(n / np.log(n)))          # the size of the annotator pool #int(n / np.log(n))
print(f"[n*alpha={int(n*alpha)}/N={N}]")
print(f"[M={M}] vs [n*alpha={int(n*alpha)}]")

alpha=0.5000
[n*alpha=500/N=2000]
[M=72] vs [n*alpha=500]


# Data Generation

In [4]:
beta, sigma, theta, X, Y, X1, X2, Y1, Y2, A1, AY1 = generate_data(K, p, N, n, M, alpha, seed=0)

True Labels 0    1084
1     916
dtype: int64 



In [5]:
sigma

array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
       0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
       0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 2. , 2. , 2. ,
       2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. ,
       2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. , 2. ,
       2. , 2. , 2. , 2. , 2. , 2. , 2. ])

# Initial Estimator

In [6]:
init_model = Initial(X1, AY1, A1, K)
init_beta, init_sigma, init_betams = init_model.init_param()

In [8]:
init_beta_rmse = compute_rmse(init_beta, beta[1:])
init_sigma_rmse = compute_rmse(init_sigma, sigma)
print(f"Init beta:  {init_beta_rmse:.7f}")
print(f"Init sigma: {init_sigma_rmse:.7f}")

Init beta:  0.0058207
Init sigma: 0.1998974


# One Step / Two Step / Multiple Step

- OS:

In [9]:
os_model = OS(X1, AY1, A1, K, init_beta, init_sigma)
os_beta, os_sigma = os_model.update_alg(max_steps=1, tol=1e-5)

os_beta_rmse = compute_rmse(os_beta, beta[1:])
os_sigma_rmse = compute_rmse(os_sigma, sigma)
print(f"\nOS beta: {os_beta_rmse:.7f}")
print(f"OS sigma: {os_sigma_rmse:.7f}")

######## [Step 1] ########
norm(gradient): 1.4860345

OS beta: 0.0061072
OS sigma: 0.1361674


- TS:

In [11]:
ts_model = MS(X1, AY1, A1, K, init_beta, init_sigma)
ts_beta, ts_sigma = ts_model.update_alg(max_steps=2, tol=1e-5, true_beta=beta[1:].ravel())

######## [Step 1] ########
norm(gradient): 1.4860345
RMSE(beta): 0.0273124
######## [Step 2] ########
norm(gradient): 0.1432535
RMSE(beta): 0.0245739


In [12]:
ts_beta_rmse = compute_rmse(ts_beta, beta[1:])
ts_sigma_rmse = compute_rmse(ts_sigma, sigma)
print(f"TS beta: {ts_beta_rmse:.7f}")
print(f"TS sigma: {ts_sigma_rmse:.7f}")

TS beta: 0.0054949
TS sigma: 0.1361674


- MS:

In [13]:
ms_model = MS(X1, AY1, A1, K, init_beta, init_sigma)
ms_beta, ms_sigma = ms_model.update_alg(max_steps=3, tol=1e-5, true_beta=beta[1:].ravel(), echo=False)

In [14]:
ms_beta_rmse = compute_rmse(ms_beta, beta[1:])
ms_sigma_rmse = compute_rmse(ms_sigma, sigma)
print(f"MS beta:  {ms_beta_rmse:.7f}")
print(f"MS sigma: {ms_sigma_rmse:.7f}")

MS beta:  0.0054983
MS sigma: 0.1361674


# Oracle_beta

In [15]:
orab_model = ORACLE_beta(X1, AY1, A1, K, init_beta, sigma)
oracle_beta = orab_model.update_alg(max_steps=5, tol=1e-5, true_beta=beta[1:])

oracle_beta_rmse = compute_rmse(oracle_beta, beta[1:])
print(f"Oracle beta: {oracle_beta_rmse:.7f}")

######## [Step 1] ########
norm(gradient): 0.9505189
RMSE(beta): 0.0264329
######## [Step 2] ########
norm(gradient): 0.0131490
RMSE(beta): 0.0268268
######## [Step 3] ########
norm(gradient): 0.0000107
RMSE(beta): 0.0268272
Oracle beta: 0.0053082


In [16]:
# diff_mom, diff_son = orabeta_model.check(beta[1:], sigma)
# plt.boxplot([diff_mom, diff_son.ravel()])
# plt.show()

# Oracle_sigma

In [17]:
oras_model = ORACLE_sigma(X1, AY1, A1, K, init_beta, init_sigma)
oracle_sigma = oras_model.update_alg(max_steps=10, tol=1e-5, true_sigma=sigma)

######## [Step 1] ########
norm(sigma): 0.1412845
RMSE(sigma): 1.1554190
######## [Step 2] ########
norm(sigma): 0.0259453
RMSE(sigma): 1.0943676
######## [Step 3] ########
norm(sigma): 0.0017965
RMSE(sigma): 1.0910246
######## [Step 4] ########
norm(sigma): 0.0000207
RMSE(sigma): 1.0909568
######## [Step 5] ########
norm(sigma): 0.0000000
RMSE(sigma): 1.0909567


In [18]:
oracle_sigma_rmse = compute_rmse(oracle_sigma, sigma)
print(f"Oracle sigma: {oracle_sigma_rmse:.7f}")

Oracle sigma: 0.1285705


# Results

In [19]:
print(f"Init beta: {init_beta_rmse:.7f}")
print(f"OS   beta: {os_beta_rmse:.7f}")
print(f"TS   beta: {ts_beta_rmse:.7f}")
print(f"MS   beta: {ms_beta_rmse:.7f}")
print(f"Ora  beta: {oracle_beta_rmse:.7f}")

Init beta: 0.0058207
OS   beta: 0.0061072
TS   beta: 0.0054949
MS   beta: 0.0054983
Ora  beta: 0.0053082


In [21]:
print(f"Init sigma: {init_sigma_rmse:.7f}")
print(f"OS   sigma: {os_sigma_rmse:.7f}")
print(f"TS   sigma: {ts_sigma_rmse:.7f}")
print(f"MS   sigma: {ms_sigma_rmse:.7f}")
print(f"Ora  sigma: {oracle_sigma_rmse:.7f}")

Init sigma: 0.1998974
OS   sigma: 0.1361674
TS   sigma: 0.1361674
MS   sigma: 0.1361674
Ora  sigma: 0.1285705


# Compare Experiments

In [5]:
p = 20                           # feature dimension
K = 1                            # (K+1) classes
r = 0.5

In [6]:
B = 1000
RMSE_results = []
NAME_results = []

In [7]:
alpha_list = [0.25, 0.75]
N_list = [2000, 5000, 10000,]

In [None]:
for N in N_list:
    n = int(N * r)                 # pilot sample size
    M = int(0.5 * n / np.log(n))   # the size of the annotator pool
    
    for alpha in alpha_list:
        print(f"[n*alpha={int(n*alpha)}/N={N}]")
        print(f"[M={M}] vs [n*alpha={int(n*alpha)}]")
        
        for seed in range(0, int(B/2)):
            RMSE_list = [seed, n, alpha, M, K]
            NAME_list = ["seed", "n", "alpha", "M", "K"]
            np.random.seed(seed)
            beta, sigma, theta, X, Y, X1, X2, Y1, Y2, A1, AY1 = generate_data(K, p, N, n, M, alpha, seed=seed)
        
            # Initial Estimator
            print(f"\n########## Initial Estimator ##########")
            init_model = Initial(X1, AY1, A1, K)
            init_beta, init_sigma, init_betams = init_model.init_param()
            init_beta_rmse = compute_rmse(init_beta, beta[1:])
            init_sigma_rmse = compute_rmse(init_sigma, sigma)
            RMSE_list += [init_beta_rmse, init_sigma_rmse]
            NAME_list += ["init_beta", "init_sigma"]
            
            # OS (One-Step) Estimator 
            print(f"\n########## OS Estimator ##########")
            os_model = OS(X1, AY1, A1, K, init_beta, init_sigma)
            os_beta, os_sigma = os_model.update_alg(max_steps=1, tol=1e-5, true_beta=beta[1:].ravel())
            os_beta_rmse = compute_rmse(os_beta, beta[1:])
            os_sigma_rmse = compute_rmse(os_sigma, sigma)
            RMSE_list += [os_beta_rmse, os_sigma_rmse]
            NAME_list += ["os_beta", "os_sigma"]
        
            # TS (Two-Step) Estimator 
            print(f"\n########## TS Estimator ##########")
            ts_model = MS(X1, AY1, A1, K, init_beta, init_sigma)
            ts_beta, ts_sigma = ts_model.update_alg(max_steps=2, tol=1e-5, true_beta=beta[1:].ravel())
            ts_beta_rmse = compute_rmse(ts_beta, beta[1:])
            ts_sigma_rmse = compute_rmse(ts_sigma, sigma)
            RMSE_list += [ts_beta_rmse, ts_sigma_rmse]
            NAME_list += ["ts_beta", "ts_sigma"]
        
            # MS (Multiple-Step) Estimator 
            print(f"\n########## MS Estimator ##########")
            ms_model = MS(X1, AY1, A1, K, init_beta, init_sigma)
            ms_beta, ms_sigma = ms_model.update_alg(max_steps=3, tol=1e-5, true_beta=beta[1:].ravel())
            ms_beta_rmse = compute_rmse(ms_beta, beta[1:])
            ms_sigma_rmse = compute_rmse(ms_sigma, sigma)
            RMSE_list += [ms_beta_rmse, ms_sigma_rmse]
            NAME_list += ["ms_beta", "ms_sigma"]
        
            # Oracle_beta
            print(f"\n########## ORACLE_beta Estimator ##########")
            oracle_model = ORACLE_beta(X1, AY1, A1, K, init_beta, sigma)
            oracle_beta = oracle_model.update_alg(max_steps=10, tol=1e-5, true_beta=beta[1:])
            oracle_beta_rmse = compute_rmse(oracle_beta, beta[1:])
            
            # Oracle_sigma
            print(f"\n########## ORACLE_sigma Estimator ##########")
            oracle_model = ORACLE_sigma(X1, AY1, A1, K, beta[1:], init_sigma)
            oracle_sigma = oracle_model.update_alg(max_steps=10, tol=1e-5, true_sigma=sigma)
            oracle_sigma_rmse = compute_rmse(oracle_sigma, sigma)
            RMSE_list += [oracle_beta_rmse, oracle_sigma_rmse]
            NAME_list += ["oracle_beta", "oracle_sigma"]
        
            # Print Results
            print()
            print(f"Init beta: {init_beta_rmse:.7f}")
            print(f"Ora  beta: {oracle_beta_rmse:.7f}")
            print(f"OS   beta: {os_beta_rmse:.7f}")
            print(f"TS   beta: {ts_beta_rmse:.7f}")
            print(f"MS   beta: {ms_beta_rmse:.7f}")
            print()
            print(f"Init sigma: {init_sigma_rmse:.7f}")
            print(f"Ora  sigma: {oracle_sigma_rmse:.7f}")
            print(f"OS   sigma: {os_sigma_rmse:.7f}")
            print(f"TS   sigma: {ts_sigma_rmse:.7f}")
            print(f"MS   sigma: {ms_sigma_rmse:.7f}")
        
            # Record Results
            RMSE_results.append(RMSE_list)
            a = pd.DataFrame(RMSE_results, columns=NAME_list)
            a.to_csv(f"./results/[K={K}]rmse_data3.csv")

[n*alpha=250/N=2000]
[M=72] vs [n*alpha=250]
True Labels 0    1084
1     916
dtype: int64 


########## Initial Estimator ##########

########## OS Estimator ##########
######## [Step 1] ########
norm(gradient): 1.3033490
RMSE(beta): 0.0187813

########## TS Estimator ##########
######## [Step 1] ########
norm(gradient): 1.3033490
RMSE(beta): 0.0187813
######## [Step 2] ########
norm(gradient): 0.3184381
RMSE(beta): 0.0221279

########## MS Estimator ##########
######## [Step 1] ########
norm(gradient): 1.3033490
RMSE(beta): 0.0187813
######## [Step 2] ########
norm(gradient): 0.3184381
RMSE(beta): 0.0221279
######## [Step 3] ########
norm(gradient): 0.1963616
RMSE(beta): 0.0217777

########## ORACLE_beta Estimator ##########
######## [Step 1] ########
norm(gradient): 0.8618527
RMSE(beta): 0.0243329
######## [Step 2] ########
norm(gradient): 0.0200554
RMSE(beta): 0.0247440
######## [Step 3] ########
norm(gradient): 0.0000597
RMSE(beta): 0.0247461
######## [Step 4] ########
norm(gradien