In [None]:
from multiinstance.utils import *
from multiinstance.distanceApproaches import *
from multiinstance.data.syntheticData import buildDataset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dist_curve.curve_constructor import makeCurve
from tqdm.notebook import tqdm

In [None]:
from scipy.spatial.distance import pdist, squareform

In [None]:
import scipy.stats as ss

In [None]:
from dist_curve.model import getTrainedEstimator
estimator = getTrainedEstimator()

In [None]:
def getUnlabeledInstanceProbs(bagNum, wassMat, ds,v1=True):
    simMat = 1 / (np.exp(wassMat))
    scores = []
    for j in range(ds.N):
        if v1:
            scores.append(np.ones(ds.numU[j]) * simMat[bagNum, j] / ds.numU[j])
        else:
            scores.append(np.ones(ds.numU[j]) * simMat[bagNum, j])
    scores = np.concatenate(scores)
    probs = scores / scores.sum()

In [None]:
def getBagEst(bagNum, wassMat, ds, numRepeats=100,v1=True):
    P,U = list(zip(*[ds.getBag(i) for i in range(len(ds.numP))]))
    P = np.concatenate(P)
    U = np.concatenate(U)
    UProbs = getUnlabeledInstanceProbs(bagNum, wassMat, ds,v1=v1)
    ests= np.zeros(numRepeats)
    for rep in tqdm(range(numRepeats),total=numRepeats,leave=False, desc="repeating bootstrapping for bag {}".format(bagNum)):
        UBootIdxs = np.random.choice(np.arange(U.shape[0]), size=U.shape[0], replace=True, p=UProbs)
        UBoot = U[UBootIdxs]
        curve = makeCurve(P,UBoot)
        curve = (curve / curve.sum()).reshape((1,-1))
        ests[rep] = estimator.predict(curve)
    return np.mean(ests)

In [None]:
def getGlobalEst(ds,numRepeats=10):
    P,U = list(zip(*[ds.getBag(i) for i in range(len(ds.numP))]))
    P = np.concatenate(P)
    U = np.concatenate(U)
    ests= np.zeros(numRepeats)
    for rep in tqdm(range(numRepeats),total=numRepeats,leave=False, desc="repeating bootstrapping for global est"):
        UBootIdxs = np.random.choice(np.arange(U.shape[0]), size=U.shape[0], replace=True)
        UBoot = U[UBootIdxs]
        PBoot = P[np.random.choice(np.arange(P.shape[0]),size=P.shape[0], replace=True)]
        curve = makeCurve(PBoot,UBoot)
        curve = (curve / curve.sum()).reshape((1,-1))
        ests[rep] = estimator.predict(curve)
    return ests.mean()

In [None]:
def getExpectedAlpha(ds):
    return ds.numU.dot(ds.trueAlphas) / ds.numU.sum()

In [None]:
NReps = 10
DSize = 100
NBagReps = 10
absErrs = np.zeros((NReps, DSize))
absErrs1 = np.zeros((NReps, DSize))
globalAbsErrs= np.zeros_like(absErrs)
for rep in tqdm(range(NReps),total=NReps,leave=False,desc="dataset repetition"):
    dsi = buildDataset(DSize,alphaDistr=lambda: np.random.uniform(0.01,0.5))
    dsi = addTransformScores(dsi)
    wassMat = getWassersteinMat(dsi)
    globalEst = getGlobalEst(dsi)
    globalAbsErrs[rep] = np.abs(dsi.trueAlphas - globalEst).flatten()
    for bagNum in tqdm(range(dsi.N),total=dsi.N,leave=False, desc="processing bags for ds {}".format(rep)):
        alphaHat = getBagEst(bagNum, wassMat, dsi, numRepeats=NBagReps,v1=False)
        absErrs[rep,bagNum] = np.abs(alphaHat - dsi.trueAlphas[bagNum])
        alphaHat = getBagEst(bagNum, wassMat, dsi, numRepeats=NBagReps,v1=True)
        absErrs1[rep,bagNum] = np.abs(alphaHat - dsi.trueAlphas[bagNum])
        

In [None]:
np.mean(globalAbsErrs), np.mean(absErrs), np.mean(absErrs1)

In [None]:
order = np.argsort(dsi.trueAlphas.flatten())
sns.heatmap(squareform(pdist(dsi.trueAlphas))[order][:,order])