In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

from easydict import EasyDict

import scipy.stats as ss

In [None]:
from glob import glob

In [None]:
from multiinstance.data.realData import buildDataset as buildReal
from multiinstance.utils import addTransformScores, addGlobalEsts, getBagAlphaHats,getTransformScores

In [None]:
mu1 = 2
mu0 = 1

In [None]:
NP= 2
NU = 10
alpha=.25
S0 = NP / (NP + NU)
S1 = 1 - S0
C = np.random.normal(loc=5,scale=1, size=NP)
M,mL = getM(NU,alpha)

In [None]:
def getM(NU,alpha):
    M = np.concatenate((np.random.normal(loc=mu1,size=int(alpha*NU)),
                        np.random.normal(loc=mu0, size=int((1-alpha) * NU))))
    mL = np.concatenate((np.ones(int(alpha*NU)),
                                   np.zeros(int((1-alpha) * NU))))
    return M, mL

In [None]:
def f(x,alpha_):
    return alpha_ * ss.norm.pdf(x,loc=mu1,scale=1) + (1 - alpha_) * ss.norm.pdf(x,loc=mu0)

def g(x):
    return ss.norm.pdf(x,loc=mu1)

def tau(x,alpha_):
    return S1 / ((f(x,alpha_) / g(x)) * S0 + S1)

def posterior(x,alpha_,tau_=None, sos1=None):
    if tau_ is None:
        tau_ = tau(x,alpha_)
    if sos1 is None:
        sos1 = S0/S1
    return alpha_ * sos1 * (tau_ / (1 - tau_))

def truePosterior(x,alpha_):
    f0 = ss.norm.pdf(x,loc=mu0)
    f1 = ss.norm.pdf(x,loc=mu1)
    return (f1 * alpha_) / (alpha_ * f1 + (1 - alpha_) * f0)

In [None]:
np.allclose(posterior(C,alpha), truePosterior(C,alpha)), np.allclose(posterior(M,alpha), truePosterior(M,alpha))

In [None]:
def auc(scores, alphas, ds):
    posts = [posterior(xi,alphai) for xi, alphai in zip(scores, alphas)]
    posteriors = np.concatenate(posts)
    labels = np.concatenate([ds.hiddenLabels[i][:ds.numU[i]] for i in range(ds.N)])
    return roc_auc_score(labels, posteriors)

In [None]:
dsi = EasyDict()
dsi.hiddenLabels = [np.concatenate((np.ones(int(alpha*NU)),
                                   np.zeros(int((1-alpha) * NU))))]
dsi.numU = [dsi.hiddenLabels[i].shape[0] for i in range(len(dsi.hiddenLabels))]
dsi.N = 1

auc([M], [alpha], dsi)

In [None]:
d2 = EasyDict()
d2.N = 100
alphas = np.random.beta(2,2,size=d2.N)
d2.numU = 1 + np.random.poisson(lam=100,size=d2.N)
scores, d2.hiddenLabels = list(zip(*[getM(n,a) for n,a in zip(d2.numU, alphas)]))
print("local: {:.3f}".format(auc(scores, alphas+ np.random.normal(0,.1,size=d2.N), d2)))
gAlpha = np.dot(alphas, d2.numU) / np.sum(d2.numU) + np.random.normal(0,.06)
print("global: {:.3f}".format(auc(scores, np.ones(d2.N)*gAlpha, d2)))

`<<<<<<< HEAD`

In [None]:
def aucReal(ds, alphas_):
    _,taus = list(zip(*[getTransformScores(ds,i) for i in range(ds.N)]))
    s0s1 = ds.numU / ds.numP
#     pvals = [a * s * (t / (1 - t)) for a,s,t in zip(alphas_, s0s1, taus)]
    pvals = []
    for a,s,t in zip(alphas_,s0s1, taus):
        p = a * s * (t / (1-t))
        p[np.isinf(p)] = 1
        pvals.append(p)
    posteriors = np.concatenate(pvals)
    labels = np.concatenate([ds.hiddenLabels[i][:ds.numU[i]] for i in range(ds.N)])
    return roc_auc_score(labels,posteriors)

`=======`

In [None]:
ds = buildDataset("/ssdata/ClassPriorEstimationPrivate/data/rawDatasets/abalone.mat",size=2,
                  nPDistr=lambda:np.random.choice([10]),
                  nUDistr=lambda:np.random.choice([50]),)

ds = addTransformScores(ds)
ds = addGlobalEsts(ds)
ds = addBagAlphaHats(ds)

In [None]:
def aucReal(ds, alphas_):
    _,taus = list(zip(*[getTransformScores(ds,i) for i in range(ds.N)]))
    s0s1 = ds.numU / ds.numP
#     pvals = [a * s * (t / (1 - t)) for a,s,t in zip(alphas_, s0s1, taus)]
    pvals = []
    for a,s,t in zip(alphas_,s0s1, taus):
        p = a * s * (t / (1-t))
        p[np.isinf(p)] = 1
        pvals.append(p)
    posteriors = np.concatenate(pvals)
    labels = np.concatenate([ds.hiddenLabels[i][:ds.numU[i]] for i in range(ds.N)])
    return roc_auc_score(labels,posteriors)

`>>>>>>> 76a8683cdc145054e4f8b6a1f4525a5b89f04434`

`<<<<<<< HEAD`

In [None]:
a = {"local":[],
     "global":[]}
     
for f in glob("/ssdata/ClassPriorEstimationPrivate/data/rawDatasets/*.mat"):
    dsi = buildReal(f,16,
                    alphaDistr=lambda: np.random.uniform(.25,.75),
                    nPDistr=lambda: 1 + np.random.poisson(25),
                    nUDistr=lambda: 1 + np.random.poisson(50))
    dsi = addTransformScores(dsi)
    dsi = addGlobalEsts(dsi,reps=100)
    dsi.alphaHats, dsi.curves = getBagAlphaHats(dsi,
                                                numbootstraps=100)
    a["local"].append(aucReal(dsi, dsi.alphaHats.mean(1)))
    a["global"].append(aucReal(dsi, dsi.globalAlphaHats.mean() * np.ones(dsi.N)))
    print("local: {:.3f}    global: {:.3f}".format(a["local"][-1],
                                                   a["global"][-1]))
    print("~~~~ AVG: local: {:.3f}    global: {:.3f}".format(np.mean(a["local"]),
                                                   np.mean(a["global"])))

`=======`

In [None]:
res = g1(ds)

In [None]:
np.mean(np.abs(res["alphaHats"].mean(1) - ds.trueAlphas.flatten()))

In [None]:
a = {"local":[],
     "global":[]}
     
for f in glob("/ssdata/ClassPriorEstimationPrivate/data/rawDatasets/*.mat"):
    dsi = buildReal(f,16,
                    alphaDistr=lambda: np.random.uniform(.25,.75),
                    nPDistr=lambda: 1 + np.random.poisson(25),
                    nUDistr=lambda: 1 + np.random.poisson(50))
    dsi = addTransformScores(dsi)
    dsi = addGlobalEsts(dsi,reps=100)
    dsi.alphaHats, dsi.curves = getBagAlphaHats(dsi,
                                                numbootstraps=100)
    a["local"].append(aucReal(dsi, dsi.alphaHats.mean(1)))
    a["global"].append(aucReal(dsi, dsi.globalAlphaHats.mean() * np.ones(dsi.N)))
    print("local: {:.3f}    global: {:.3f}".format(a["local"][-1],
                                                   a["global"][-1]))
    print("~~~~ AVG: local: {:.3f}    global: {:.3f}".format(np.mean(a["local"]),
                                                   np.mean(a["global"])))

`>>>>>>> 76a8683cdc145054e4f8b6a1f4525a5b89f04434`

`>>>>>>> 7a09ce788d5b4cc25e59ea0f95ecc79cebaa4bd8`

`<<<<<<< HEAD`

In [None]:
np.mean(np.abs(ds.alphaHats.mean(1) - ds.trueAlphas.flatten()))

In [None]:
a = {"local":[],
     "global":[]}
     
for f in glob("/ssdata/ClassPriorEstimationPrivate/data/rawDatasets/*.mat"):
    dsi = buildReal(f,16,
                    alphaDistr=lambda: np.random.uniform(.25,.75),
                    nPDistr=lambda: 1 + np.random.poisson(25),
                    nUDistr=lambda: 1 + np.random.poisson(50))
    dsi = addTransformScores(dsi)
    #dsi = addGlobalEsts(dsi,reps=100)
    #dsi.alphaHats, dsi.curves = getBagAlphaHats(dsi,
                                                #numbootstraps=100)
    globalAlpha = np.dot(dsi.trueAlphas.flatten(), dsi.numU) / np.sum(dsi.numU)
    
    a["local"].append(aucReal(dsi, dsi.trueAlphas.flatten()))
    a["global"].append(aucReal(dsi, globalAlpha * np.ones(dsi.N)))
    print("local: {:.3f}    global: {:.3f}".format(a["local"][-1],
                                                   a["global"][-1]))
    print("~~~~ AVG: local: {:.3f}    global: {:.3f}".format(np.mean(a["local"]),
                                                   np.mean(a["global"])))

`=======`

In [None]:
a = {"local":[],
     "global":[]}
     
for f in glob("/ssdata/ClassPriorEstimationPrivate/data/rawDatasets/*.mat"):
    dsi = buildReal(f,16,
                    alphaDistr=lambda: np.random.uniform(.25,.75),
                    nPDistr=lambda: 1 + np.random.poisson(25),
                    nUDistr=lambda: 1 + np.random.poisson(50))
    dsi = addTransformScores(dsi)
    #dsi = addGlobalEsts(dsi,reps=100)
    #dsi.alphaHats, dsi.curves = getBagAlphaHats(dsi,
                                                #numbootstraps=100)
    globalAlpha = np.dot(dsi.trueAlphas.flatten(), dsi.numU) / np.sum(dsi.numU)
    
    a["local"].append(aucReal(dsi, dsi.trueAlphas.flatten()))
    a["global"].append(aucReal(dsi, globalAlpha * np.ones(dsi.N)))
    print("local: {:.3f}    global: {:.3f}".format(a["local"][-1],
                                                   a["global"][-1]))
    print("~~~~ AVG: local: {:.3f}    global: {:.3f}".format(np.mean(a["local"]),
                                                   np.mean(a["global"])))

In [None]:
np.abs(dsi.alphaHats.mean(1) - dsi.trueAlphas.flatten()).max()

In [None]:
np.abs(globalAlpha - dsi.trueAlphas.flatten()).max()