In [None]:
# default_exp gradientMethod

# Gradient Based Estimation

$\hat{\alpha_i}$: the local dictCurve estimate for the $i^{th}$ bag

$\hat{\alpha_{c_i}}$: the $i^{th}$ global distCurve estimate using bootstrapped sample

$w_{ji}$: the contribution of bag j to the $i^{th}$ global estimate

$\tilde{\alpha_i}$: the expected global class prior given the current contribution values and local estimates for each bag

$\tilde{\alpha_i} = \frac{w_{1i} \cdot \hat{\alpha_1} \cdot n_1 \dots w_{Ni} \cdot \hat{\alpha_N} \cdot n_N}{w_{1i} \cdot n_1 \dots w_{Ni} \cdot n_N} $



Loss for cluster $c_i$


$\mathcal{L}_{c_i} = \frac{1}{2}(\tilde{\alpha_i} - \hat{\alpha_{c_i}})^2$

    def gradientMethod(ds):
        alphaHat : init alphaHat for each bag
        alpha_C : get K global alpha estimates
        init W randomly
        for each iteration:
            # calcualte loss given the current values of alphaHat and w
            loss = lossFunction(w[:,1], alpha_C[1]) + ... + lossFunction(w[:,K], alpha_C[K])
            # update alphaHat
            alphaHat = alphaHat - eta * grad(loss)
            # calculate the loss give the current w and new alphaHats
            loss = lossFunction(1) + ... + lossFunction(K)
            w = w - eta * grad(loss)
            getMAE(alphaHat, alpha)

In [None]:
# export
from tqdm.notebook import tqdm

import autograd.numpy as np
from autograd import grad
import autograd.scipy.stats as agss

import matplotlib.pyplot as plt


from multiinstance.dataset_utils import buildDataset
from multiinstance.utils import *
from multiinstance.distanceApproaches import *
from multiinstance.agglomerative_clustering import AgglomerativeClustering
from numba import set_num_threads

import scipy.stats as ss
from multiinstance.data.realData import buildDataset as getRealDS

from glob import glob

In [None]:
set_num_threads(20)

In [None]:
def bimodal():
    if np.random.binomial(1,.5):
        return np.random.beta(2,10)
    return np.random.beta(10,3)

In [None]:
# export
def getAlphaHat(dsi,reps=10):
    P, U = list(zip(*[dsi.getBag(int(i)) for i in range(dsi.N)]))
    p = np.concatenate(P)
    u = np.concatenate(U)
    alphaHats,_ = getEsts(p,u,reps)
    return alphaHats
    

In [None]:
def initDS(ds_size=100,n_alpha_ests=50, nP=None, nU=None,
           alphaDistr=lambda: np.random.uniform(0.1,.5),posMean=None, negMean=None,cov=None):
    dsi = buildDataset(ds_size,alphaDistr=alphaDistr, nP=nP,
                       nU=nU,posMean=posMean, negMean=negMean,cov=cov)

#     dsi = addTransformScores(dsi)
    dsi.alphaHats,dsi.curves = getBagAlphaHats(dsi,numbootstraps=n_alpha_ests)
    dsi.globalAlphaHats = getAlphaHat(dsi,reps=n_alpha_ests)
    return dsi

In [None]:
# export

def getAlphaLoss(w,n, alphaHats):
    def loss(localAlphaHats):
        lossVal = 0
        for wi, aH in zip(w, alphaHats):
            tilde = (1 / np.dot(wi,n)) * np.dot(np.multiply(localAlphaHats,wi),n)
            lossVal = lossVal + .5 * np.square(aH - tilde)
        return lossVal
    return loss

def getAlphaLossWithLL(w,n, alphaHats,aMLEVals, bMLEVals, locMLE, scaleMLE):
    def loss(localAlphaHats):
        lossVal = 0
        for wi, aH in zip(w, alphaHats):
            tilde = (1 / np.dot(wi,n)) * np.dot(np.multiply(localAlphaHats,wi),n)
            lossVal = lossVal + .5 * np.square(aH - tilde)
        # add negative log-likelihood for each local bag to loss
        for aHats, aMLE, bMLE,loc, scale in zip(localAlphaHats, aMLEVals,
                                     bMLEVals, locMLE, scaleMLE):
            lossVal = lossVal - 1e-3 * agss.beta.pdf((aHats - loc)/scale, a=aMLE, b=bMLE).sum()
        return lossVal
    return loss
    
def getWLoss(a,n, alphaHats, regLambda=1e-5):
    def loss(w):
        lossVal = 0
        for wi,aH in zip(w, alphaHats):
            den = (1 / np.dot(wi,n))
            aXw = np.multiply(a,wi)
            dot = np.dot(aXw,n)
            tilde =  den * dot
            lossVal = lossVal + .5 * np.square(aH - tilde)
        lossVal = lossVal + regLambda * np.linalg.norm(w)
        return lossVal
    return loss

In [None]:
def g2(dsi, n_epochs=100,eta=1,regLambda=1e-5):
    NBags = dsi.numU.shape[0]
    globalAlphaHats = dsi.globalAlphaHats
    # initialize values for gradient method
    a = dsi.alphaHats
    n = np.tile(dsi.numU.reshape((-1,1)), (1,a.shape[1])).flatten()
    w = np.random.uniform(low=0.01, high=1,size=(len(globalAlphaHats),
                                                 n.shape[0]))
    maes = [np.mean(np.abs(a.mean(1) - dsi.trueAlphas.flatten()))]
    for i in tqdm(range(n_epochs), total=n_epochs):
        # A iteration
        alphaLossFn = getAlphaLoss(w,n,globalAlphaHats)
        alphaGrad = grad(alphaLossFn)
        a = a - eta * alphaGrad(a.flatten()).reshape(a.shape)
        # W iteration
        wLossFn = getWLoss(a.flatten(),n,globalAlphaHats,regLambda=regLambda)
        wGrad = grad(wLossFn)
        w = w - eta * wGrad(w)
        maes.append(np.mean(np.abs(a.mean(1) - dsi.trueAlphas.flatten())))
    return maes

In [None]:
def g2LL(dsi, n_epochs=100,eta=1):
    NBags = dsi.numU.shape[0]
    globalAlphaHats = dsi.globalAlphaHats
    # initialize values for gradient method
    a = dsi.alphaHats
    n = np.tile(dsi.numU.reshape((-1,1)), (1,a.shape[1])).flatten()
    w = np.random.uniform(low=0.01, high=1,size=(len(globalAlphaHats),
                                                 n.shape[0]))
    maes = [np.mean(np.abs(a.mean(1) - dsi.trueAlphas.flatten()))]
    for i in tqdm(range(n_epochs), total=n_epochs):
        aMLEVals = np.zeros(NBags)
        bMLEVals = np.zeros_like(aMLEVals)
        locMLE =np.zeros_like(aMLEVals)
        scaleMLE = np.zeros_like(aMLEVals)
        for bagNum in range(NBags):
            try:
                assert ~np.any(np.isinf(a))
                aMLEVals[bagNum],bMLEVals[bagNum],locMLE[bagNum],scaleMLE[bagNum] = ss.beta.fit(a[bagNum])
            except:
                print(a[bagNum])
                raise
        # A iteration
        alphaLossFn = getAlphaLossWithLL(w,n,globalAlphaHats,aMLEVals, bMLEVals, locMLE, scaleMLE)
        alphaGrad = grad(alphaLossFn)
        agrad = alphaGrad(a.flatten()).reshape(a.shape)
        a = a - eta * np.maximum(np.ones_like(agrad) * -1, np.minimum(np.ones(agrad.shape),agrad))
#         print(a)
        assert ~np.isinf(a).any() and ~np.isnan(a).any()
        # W iteration
        wLossFn = getWLoss(a.flatten(),n,globalAlphaHats)
#         mu = np.tile(aMLEVals / (aMLEVals + bMLEVals), (1,a.shape[1]))
#         wLossFn = getWLoss(mu.flatten(),n,globalAlphaHats)
        wGrad = grad(wLossFn)
        wgradval = wGrad(w)
        wgradval = np.maximum(np.ones_like(wgradval) * -1, np.minimum(wgradval, np.ones_like(wgradval)))
        w = w - eta * wgradval
        maes.append(np.mean(np.abs(a.mean(1) - dsi.trueAlphas.flatten())))
    return maes

In [None]:
def yangDistributionDifference(posMean, negMean, cov, p=1):
        """
        Eq. (7) from :

        Yang, R., Jiang, Y., Mathews, S. et al.
        Data Min Knowl Disc (2019) 33: 995.
        https://doi.org/10.1007/s10618-019-00622-6
        """
        sampleSize = 1000
        #negSample = np.random.beta(aNeg, bNeg, sampleSize)
        #posSample = np.random.beta(aPos, bPos, sampleSize)
        #negPDF_neg = ss.beta.pdf(negSample,aNeg,bNeg)
        #posPDF_neg = ss.beta.pdf(negSample,aPos,bPos)
        #negPDF_pos = ss.beta.pdf(posSample,aNeg,bNeg)
        #posPDF_pos = ss.beta.pdf(posSample,aPos,bPos)
        posSample = np.random.multivariate_normal(mean=posMean, cov=cov,size=sampleSize)
        negSample = np.random.multivariate_normal(mean=negMean, cov=cov,size=sampleSize)
        negPDF_neg = ss.multivariate_normal.pdf(negSample,mean=negMean, cov=cov)
        posPDF_neg = ss.multivariate_normal.pdf(negSample,mean=posMean,cov=cov)
        negPDF_pos = ss.multivariate_normal.pdf(posSample,mean=negMean,cov=cov)
        posPDF_pos = ss.multivariate_normal.pdf(posSample,mean=posMean,cov=cov)
        z = np.zeros(sampleSize)
        pdfDiffPos_NEG, pdfDiffNeg_NEG, pdfMax_NEG = _yangHelper(negPDF_neg, posPDF_neg, z)
        pdfDiffPos_POS, pdfDiffNeg_POS, pdfMax_POS = _yangHelper(negPDF_pos, posPDF_pos, z)
        return _yH2(pdfDiffNeg_NEG, negPDF_neg, pdfDiffPos_POS, posPDF_pos, posPDF_neg, negPDF_pos, pdfMax_NEG, pdfMax_POS,p,sampleSize)

def _yangHelper(negPDF,posPDF,z):
        pdfDiff = negPDF - posPDF
        pdfDiffNeg = np.maximum(pdfDiff, z)
        minus1 = -1 * pdfDiff
        pdfDiffPos = np.maximum(minus1, z)
        pdfMax = np.maximum(negPDF, posPDF)
        return pdfDiffPos, pdfDiffNeg, pdfMax

def _yH2(pdfDiffNeg_NEG, negPDF_NEG, pdfDiffPos_POS, posPDF_POS, posPDF_NEG, negPDF_POS, pdfMax_NEG, pdfMax_POS,p,sampleSize):
        numerator1 = np.mean(pdfDiffNeg_NEG / negPDF_NEG)
        numerator2 = np.mean(pdfDiffPos_POS / posPDF_POS)
        sumVecs = np.power(numerator1, np.ones_like(numerator1) * p) + np.power(numerator2, np.ones_like(numerator2) * p)
        dPHat = np.power(sumVecs, np.ones_like(sumVecs) * (1/p))
        dTermNeg = (posPDF_NEG * 0.5) + (negPDF_NEG * 0.5)
        dTermPos = (posPDF_POS * 0.5) + (negPDF_POS * 0.5)
        denominator = (np.sum(pdfMax_NEG / dTermNeg) + np.sum(pdfMax_POS / dTermPos)) / (2 * sampleSize)
        return dPHat / denominator

In [None]:
# for rep in tqdm(range(10),total=10,desc="reps"):
#     # build dataset
#     n_epochs = 60
#     n_epochs2 = 60
#     dsi = initDS(ds_size=50, n_alpha_ests=10)
#     # Run gradient method
#     maes = g2(dsi,n_epochs=n_epochs)
#     # Run agglomerative clustering
#     maes2 = g2LL(dsi,n_epochs=n_epochs2)
#     # plot results
#     fig,ax = plt.subplots(1,5,figsize=(20,4))
#     # Plot MAEs
#     ax[0].plot(maes,label="gradient")
#     ax[1].plot(maes2, label="gradient w/ LL")
#     globalMAE = np.mean(np.abs(dsi.trueAlphas - dsi.globalAlphaHats.mean()))
#     ax[0].hlines(globalMAE, 0,len(maes),color="black")
#     ax[1].hlines(globalMAE, 0,len(maes2),color="black")
#     ax[0].legend()
#     ax[0].set_title("Gradient Method MAE")
#     ax[1].set_title("Gradient Method with LL MAE")
#     ax[2].hist(dsi.numP)
#     ax[2].set_title("Num Positive")
#     ax[3].hist(dsi.numU)
#     ax[3].set_title("Num Unlabeled")
#     ax[4].hist([h[:n].sum() for h,n in zip(dsi.hiddenLabels, dsi.numU)])
#     ax[4].set_title("Num Unlabeled Positive")
#     fig.suptitle("Distr Distance: {:.4f}    dim:{}".format(yangDistributionDifference(dsi.posDistMean,dsi.negDistMean,dsi.cov),
#                                                           dsi.posDistMean.shape))
#     plt.savefig("figs/nb_10/fig_{}.pdf".format(rep),format="pdf")
#     plt.show()

In [None]:
np.random.choice(np.arange(0,5).astype(int))

In [None]:
def initRealDS(filename, size=100,n_alpha_ests=10):
    dsi = getRealDS(fileName,size,
                    nPDistr=lambda: np.random.choice(np.arange(1,5).astype(int)),
                    nUDistr=lambda: np.random.choice(np.arange(20,30).astype(int)),
                    alphaDistr=lambda: np.random.uniform(0.05,1))
    dsi.alphaHats,dsi.curves = getBagAlphaHats(dsi,numbootstraps=n_alpha_ests)
    dsi.globalAlphaHats = getAlphaHat(dsi,reps=n_alpha_ests)
    return dsi

In [None]:
fileNames = glob("/home/dzeiberg/ClassPriorEstimation/rawDatasets/*.mat")[1:]
for fileName in tqdm(fileNames, total=len(fileNames),desc="reps"):
    name = fileName.split("/")[-1].replace(".mat","")
    # build dataset
    n_epochs = 5000
    size = 100
    dsi = initRealDS(fileName,size=size,
                     n_alpha_ests=10)
    # Run gradient method
    maes = g2(dsi,n_epochs=n_epochs,eta=.1,regLambda=0.01)
    agg = AgglomerativeClustering(dsi, 0.5)
    agg.cluster()
    aggMAES = agg.meanAbsErrs
    fig,ax = plt.subplots(1,6,figsize=(24,4))
    # Plot MAEs
    ax[0].plot(maes,label="gradient")
    ax[1].plot(aggMAES,label="clustering")
    globalMAE = np.mean(np.abs(dsi.trueAlphas - dsi.globalAlphaHats.mean()))
    ax[0].hlines(globalMAE, 0,len(maes),color="black",label="global")
    ax[1].hlines(globalMAE, 0,len(aggMAES),color="black",label="global")
    ax[0].set_ylim(0,1)
    ax[1].set_ylim(0,1)
    ax[0].set_title("Gradient Method MAE")
    ax[1].set_title("Clustering Method MAE")
    ax[2].hist(dsi.numP)
    ax[2].set_title("Num Positive")
    ax[3].hist(dsi.numU)
    ax[3].set_title("Num Unlabeled")
    ax[4].hist([h[:n].sum() for h,n in zip(dsi.hiddenLabels, dsi.numU)])
    ax[4].set_title("Num Unlabeled Positive")
    ax[5].hist(dsi.trueAlphas)
    ax[5].set_title("Class Priors")
    plt.suptitle(name)
    plt.savefig("figs/nb_10/real/reg-2/{}.pdf".format(name),format="pdf")
    plt.show()


number of iterations needed to pleateau seems to scale with the number of bags

converging to the global estimate MAE seems to indicate that the local estimates are just becoming the global ones
    reducing the number of iterations won't do the trick
    adding more weight regularization 
    