In [1]:
# default_exp gradientMethod

# Gradient Based Estimation

$\hat{\alpha_i}$: the local dictCurve estimate for the $i^{th}$ bag

$\hat{\alpha_{c_i}}$: the $i^{th}$ global distCurve estimate using bootstrapped sample

$w_{ji}$: the contribution of bag j to the $i^{th}$ global estimate

$\tilde{\alpha_i}$: the expected global class prior given the current contribution values and local estimates for each bag

$\tilde{\alpha_i} = \frac{w_{1i} \cdot \hat{\alpha_1} \cdot n_1 \dots w_{Ni} \cdot \hat{\alpha_N} \cdot n_N}{w_{1i} \cdot n_1 \dots w_{Ni} \cdot n_N} $



Loss for cluster $c_i$


$\mathcal{L}_{c_i} = \frac{1}{2}(\tilde{\alpha_i} - \hat{\alpha_{c_i}})^2$

    def gradientMethod(ds):
        alphaHat : init alphaHat for each bag
        alpha_C : get K global alpha estimates
        init W randomly
        for each iteration:
            # calcualte loss given the current values of alphaHat and w
            loss = lossFunction(w[:,1], alpha_C[1]) + ... + lossFunction(w[:,K], alpha_C[K])
            # update alphaHat
            alphaHat = alphaHat - eta * grad(loss)
            # calculate the loss give the current w and new alphaHats
            loss = lossFunction(1) + ... + lossFunction(K)
            w = w - eta * grad(loss)
            getMAE(alphaHat, alpha)

In [2]:
# export
from tqdm.notebook import tqdm

import autograd.numpy as np
from autograd import grad
import autograd.scipy.stats as agss

import matplotlib.pyplot as plt


from multiinstance.dataset_utils import buildDataset
from multiinstance.utils import *
from multiinstance.distanceApproaches import *
from multiinstance.agglomerative_clustering import AgglomerativeClustering
from numba import set_num_threads

import scipy.stats as ss
from multiinstance.data.realData import buildDataset as getRealDS

from glob import glob

import scipy.stats as ss

KeyboardInterrupt: 

In [None]:
set_num_threads(8)

In [None]:
def bimodal():
    if np.random.binomial(1,.5):
        return np.random.beta(2,10)
    return np.random.beta(10,3)

In [None]:
# export
def getGlobalAlphaHat(dsi,reps=10):
    alphaHats = np.zeros(reps)
    for rep in tqdm(range(reps),total=reps,desc="getting global estimates"):
        P, U = list(zip(*[getBootstrapSample(*dsi.getBag(int(i))) for i in range(dsi.N)]))
        p = np.concatenate(P)
        u = np.concatenate(U)
        alphaHats[rep],_ = estimate(p,u)
    return alphaHats
    

def initDS(ds_size=100,n_alpha_ests=50, nP=None, nU=None,
           alphaDistr=lambda: np.random.uniform(0.1,.5),posMean=None, negMean=None,cov=None):
    dsi = buildDataset(ds_size,alphaDistr=alphaDistr, nP=nP,
                       nU=nU,posMean=posMean, negMean=negMean,cov=cov)
    return addEsts(dsi)

def addEsts(dsi,n_alpha_ests=10):
    dsi.alphaHats,dsi.curves = getBagAlphaHats(dsi,numbootstraps=n_alpha_ests)
    dsi.alphaHats = dsi.alphaHats.reshape((-1,n_alpha_ests))
    dsi.globalAlphaHats = getGlobalAlphaHat(dsi,reps=n_alpha_ests)
    return dsi

In [None]:
# export
def aL0(w,n,globalAlphaHats,a0,varLambda=.1, anchorLambda=.1):
    def loss(localAlphaHats):
        lossVal = 0
#         localAlphaHats = 1 / (1 + np.exp(-1 * localAlphaHats))
        for wi, aH in zip(w, globalAlphaHats):
            tilde = 1 / np.sum(np.multiply(n,wi))
            wiXA = np.multiply(wi,localAlphaHats)
            tilde = tilde * np.sum(np.multiply(wiXA,
                                               n))
            lossVal = lossVal + .5 * np.square(aH - tilde)
        lossVal = lossVal + varLambda * np.sum(np.var(localAlphaHats,axis=1))
        lossVal = lossVal + anchorLambda * np.sum(np.square(localAlphaHats - a0))
        return lossVal
    return loss

def wL0(localAlphaHats, n, globalAlphaHats,regLambda=0, wOneLambda=0):
#     localAlphaHats = 1 / (1 + np.exp(-1 * localAlphaHats))
    def loss(w):
        lossVal = 0
        for wi,aH in zip(w, globalAlphaHats):
            den = 1 / np.sum(np.multiply(n,wi))
            wiXA = np.multiply(wi,localAlphaHats)
            dot = np.sum(np.multiply(wiXA,n))
            tilde =  den * dot
            lossVal = lossVal + .5 * np.square(aH - tilde)
            # The weights across all local estimates for each global estimate should sum to 1
            lossVal = lossVal + wOneLambda * .5 * np.sum(np.square(wi - 1))
        lossVal = lossVal + regLambda * np.linalg.norm(w)
        return lossVal
    return loss

def g1(dsi, n_epochs=100,eta=1,varLambda=.1, anchorLambda=.1, regLambda=1e-5, wOneLambda=1):
    NBags = dsi.numU.shape[0]
    globalAlphaHats = dsi.globalAlphaHats
    # initialize values for gradient method
    a = dsi.alphaHats
    a0 = dsi.alphaHats
    n = np.tile(dsi.numU.reshape((-1,1)), (1,a.shape[1]))
    w = np.random.uniform(low=0.01, high=1,size=(len(globalAlphaHats),n.shape[0],n.shape[1]))
    maes = [np.mean(np.abs(a.mean(1) - dsi.trueAlphas.flatten()))]
    for i in tqdm(range(n_epochs), total=n_epochs,leave=False):
        # A iteration
        alphaLossFn = aL0(w,n,globalAlphaHats,a0,varLambda=varLambda, anchorLambda=anchorLambda)
        alphaGrad = grad(alphaLossFn)
        a = a - eta * alphaGrad(a)
        a = np.maximum(np.zeros_like(a),np.minimum(a,np.ones_like(a)))
        # W iteration
        wLossFn = wL0(a,n,globalAlphaHats,regLambda=regLambda, wOneLambda=wOneLambda)
        wGrad = grad(wLossFn)
        w = w - eta * wGrad(w)
        maes.append(np.mean(np.abs(a.mean(1) - dsi.trueAlphas.flatten())))
    return {"maes":maes,
            "alphaHats": a,
            "weights": w,
           "baseline_mae": np.mean(np.abs(dsi.trueAlphas.flatten() - globalAlphaHats.mean()))}

In [None]:
def yangDistributionDifference(posMean, negMean, cov, p=1):
        """
        Eq. (7) from :

        Yang, R., Jiang, Y., Mathews, S. et al.
        Data Min Knowl Disc (2019) 33: 995.
        https://doi.org/10.1007/s10618-019-00622-6
        """
        sampleSize = 1000
        #negSample = np.random.beta(aNeg, bNeg, sampleSize)
        #posSample = np.random.beta(aPos, bPos, sampleSize)
        #negPDF_neg = ss.beta.pdf(negSample,aNeg,bNeg)
        #posPDF_neg = ss.beta.pdf(negSample,aPos,bPos)
        #negPDF_pos = ss.beta.pdf(posSample,aNeg,bNeg)
        #posPDF_pos = ss.beta.pdf(posSample,aPos,bPos)
        posSample = np.random.multivariate_normal(mean=posMean, cov=cov,size=sampleSize)
        negSample = np.random.multivariate_normal(mean=negMean, cov=cov,size=sampleSize)
        negPDF_neg = ss.multivariate_normal.pdf(negSample,mean=negMean, cov=cov)
        posPDF_neg = ss.multivariate_normal.pdf(negSample,mean=posMean,cov=cov)
        negPDF_pos = ss.multivariate_normal.pdf(posSample,mean=negMean,cov=cov)
        posPDF_pos = ss.multivariate_normal.pdf(posSample,mean=posMean,cov=cov)
        z = np.zeros(sampleSize)
        pdfDiffPos_NEG, pdfDiffNeg_NEG, pdfMax_NEG = _yangHelper(negPDF_neg, posPDF_neg, z)
        pdfDiffPos_POS, pdfDiffNeg_POS, pdfMax_POS = _yangHelper(negPDF_pos, posPDF_pos, z)
        return _yH2(pdfDiffNeg_NEG, negPDF_neg, pdfDiffPos_POS, posPDF_pos, posPDF_neg, negPDF_pos, pdfMax_NEG, pdfMax_POS,p,sampleSize)

def _yangHelper(negPDF,posPDF,z):
        pdfDiff = negPDF - posPDF
        pdfDiffNeg = np.maximum(pdfDiff, z)
        minus1 = -1 * pdfDiff
        pdfDiffPos = np.maximum(minus1, z)
        pdfMax = np.maximum(negPDF, posPDF)
        return pdfDiffPos, pdfDiffNeg, pdfMax

def _yH2(pdfDiffNeg_NEG, negPDF_NEG, pdfDiffPos_POS, posPDF_POS, posPDF_NEG, negPDF_POS, pdfMax_NEG, pdfMax_POS,p,sampleSize):
        numerator1 = np.mean(pdfDiffNeg_NEG / negPDF_NEG)
        numerator2 = np.mean(pdfDiffPos_POS / posPDF_POS)
        sumVecs = np.power(numerator1, np.ones_like(numerator1) * p) + np.power(numerator2, np.ones_like(numerator2) * p)
        dPHat = np.power(sumVecs, np.ones_like(sumVecs) * (1/p))
        dTermNeg = (posPDF_NEG * 0.5) + (negPDF_NEG * 0.5)
        dTermPos = (posPDF_POS * 0.5) + (negPDF_POS * 0.5)
        denominator = (np.sum(pdfMax_NEG / dTermNeg) + np.sum(pdfMax_POS / dTermPos)) / (2 * sampleSize)
        return dPHat / denominator

In [None]:
# export 
def plotResults(dsi,res):
    # plot results
    fig,ax = plt.subplots(1,2,figsize=(12,4))
    # Plot MAEs
    ax[0].plot(res["maes"],label="gradient")
    # add global baseline
    globalMAE = np.mean(np.abs(dsi.trueAlphas - dsi.globalAlphaHats.mean()))
    ax[0].hlines(globalMAE, 0,len(res["maes"]),color="black",label="global")
    ax[0].legend()
    ax[0].set_title("Gradient Method MAE")
    # Plot final alphaHat
    N = len(dsi.numU)
    K = len(dsi.globalAlphaHats)
    for i in range(N):
        ax[1].fill_between(np.array([res["alphaHats"][i].min(),
                                     res["alphaHats"][i].max()]),
                           y1=0,
                           y2=dsi.numU[i]+.25,
                            alpha=.25,color="red")
        ax[1].vlines(res["alphaHats"][i].mean(),0,dsi.numU[i]+1.5,color="red")
    ax[1].vlines(dsi.globalAlphaHats.mean(),
                 0,
                 max(dsi.numU),
                 color="black",label=r"$\hat{\alpha_{c_i}}$")
    ax[1].fill_between(np.array([
        dsi.globalAlphaHats.min(),
        dsi.globalAlphaHats.max()]),
    y1=0,y2=np.max(dsi.numU),color="black",alpha=.25)
    
    for i in range(N):
        ax[1].fill_between(np.array([
            dsi.alphaHats[i].min(),
            dsi.alphaHats[i].max()
        ]), y1=0,y2=dsi.numU[i],color="blue",alpha=.25)
    ax[1].vlines(dsi.alphaHats.mean(1),
                 0,
                 dsi.numU-.15,
                 color="blue",label=r"$\hat{\alpha}_0$")
    ax[1].vlines(dsi.trueAlphas,0,dsi.numU - .25,color="green",label=r"$\alpha$")
    ax[1].vlines(dsi.trueGlobalClassPrior,0,dsi.numU.max(),color="orange",label=r"$\alpha_c$")
    ax[1].set_title("Alphas")
#     ax[1].set_xlim(0,1)
    ax[1].legend(loc="upper right", bbox_to_anchor=(1.25, 1))
    # plot weights
    #ax[2].vlines(res["weights"],0,np.tile(dsi.numU,(K,1)))
    plt.show()

## The global estimate is always closer to the larger bag, drawing small bags away from their true class prior

In [None]:
ds0 = buildDataset(size=1,
             alphaDistr=lambda: np.random.choice([.2]),
             nP=50,nU=100,
             posMean=1,negMean=5,cov=1)

ds1 = buildDataset(size=1,
             alphaDistr=lambda: np.random.choice([.8]),
             nP=75,nU=1000,
             posMean=1,negMean=5,cov=1)

ds0.merge(ds1)

ds0 = addEsts(ds0,n_alpha_ests=1)

g0Res = g1(ds0,n_epochs=1000,varLambda=0, anchorLambda=0,regLambda=0, wOneLambda=1)
plotResults(ds0,g0Res)
g1Res = g1(ds0,n_epochs=1000,varLambda=0, anchorLambda=0,regLambda=0, wOneLambda=0)
plotResults(ds0,g1Res)


In [None]:
g0Res["maes"][-1], g1Res["maes"][-1]

In [None]:
g0Res["alphaHats"], g0Res["weights"]

In [None]:
g1Res["alphaHats"], g1Res["weights"]

## Estimated Expected Global Class Prior

In [None]:
np.sum(np.multiply(np.multiply(g0Res["alphaHats"],
                               g0Res["weights"][0]),
                   ds0.numU.reshape((-1,1)))) / g0Res["weights"].flatten().dot(ds0.numU)

## True Global Class Prior

In [None]:
aC = ds0.trueAlphas.flatten().dot(ds0.numU) / ds0.numU.sum()

In [None]:
aC

## Estimated Global Class Prior

In [None]:
ds0.globalAlphaHats.mean()

## MAE in Global Estimates compared to true global alpha

In [None]:
np.mean(np.abs(aC - ds0.globalAlphaHats))

In [None]:
ae1,ae2,ae3,ae4, bae,lae = 0,0,0,0,0,0
N = 0
for rep in tqdm(range(10),total=10,desc="reps"):
    print("rep: {}".format(rep))
    # build dataset
    n_epochs = 1000
    dsi = initDS(ds_size=100,n_alpha_ests=50,alphaDistr=lambda:np.random.uniform(.05,.95))
    g4Res = g1(dsi,n_epochs=n_epochs,varLambda=0, anchorLambda=0, regLambda=0, wOneLambda=.01)
    plotResults(dsi,g4Res)
    g3Res = g1(dsi,n_epochs=n_epochs,varLambda=0, anchorLambda=0, regLambda=0, wOneLambda=0)
    plotResults(dsi,g3Res)
#     ae1 += g1Res["maes"][-1] * dsi.N
#     ae2 += g2Res["maes"][-1] * dsi.N
    ae3 += g3Res["maes"][-1] * dsi.N
    ae4 += g4Res["maes"][-1] * dsi.N
    bae += g4Res["baseline_mae"] * dsi.N
    lae += g4Res["maes"][0] * dsi.N
    N += dsi.N

In [None]:
print(" g1: {:.4f}\n g2: {:.4f}\n g3: {:.4f}\n g4: {:.4f}\n global: {:.4f}\n local: {:.4f}".format(ae1/N, ae2/N, ae3/N, ae4/N, bae / N, lae/N))

In [None]:
# export
def initRealDS(fileName, size=100,n_alpha_ests=10):
    dsi = getRealDS(fileName,size,
                    nPDistr=lambda: np.random.choice(np.arange(1,5).astype(int)),
                    nUDistr=lambda: np.random.choice(np.arange(20,30).astype(int)),
                    alphaDistr=lambda: np.random.uniform(0.05,1))
    return addEsts(dsi)

In [None]:
fileN

In [None]:
ae0,ae1,bae,lae = 0, 0, 0, 0
N = 0
# fileNames = glob("/home/dzeiberg/ClassPriorEstimation/rawDatasets/*.mat")[1:]
fileNames = glob("/ssdata/ClassPriorEstimationPrivate/data/rawDatasets/*.mat")
np.random.shuffle(fileNames)
for fileName in tqdm(fileNames, total=len(fileNames),desc="reps"):
    name = fileName.split("/")[-1].replace(".mat","")
    # build dataset
    size = 100
    dsi = initRealDS(fileName,size=size,
                     n_alpha_ests=10)
    print("dataset: {}".format(name))
    # build dataset
    n_epochs = 500
    # Run gradient method
    g1Results = g1(dsi,n_epochs=n_epochs, varLambda=0, anchorLambda=0, regLambda=0, wOneLambda=0.1)
    plotResults(dsi,g1Results)
    ae0 += g1Results["maes"][-1] * dsi.N
    bae += g1Results["baseline_mae"] * dsi.N
    lae += g1Results["maes"][0] * dsi.N
    N += dsi.N

In [None]:
# print("gradient1: {}\n gradient2: {}\n global: {}\n local: {}".format(ae0/N, ae1/N, bae / N, lae/N))