In [None]:
from multiinstance.utils import *

import numpy as np

from dist_curve.curve_constructor import makeCurve

import scipy.stats as ss

import matplotlib.pyplot as plt

In [None]:
rnge = np.arange(-10,15,.1)
plt.plot(rnge,ss.norm.pdf(rnge, loc=5,scale=2))
plt.plot(rnge,ss.norm.pdf(rnge,loc=-1,scale=2))

In [None]:
def getSample():
    """
    Return a biased set of positives and unbiased set of unlabeled; bias added through double sampling
    Return:
        - pos: (1,p) float
        - unlabeled: (1, u) float
        - alpha : float
    """
    NP = 100
    NU = 1000
    P,U = 5,-1
    posMean = np.random.normal(loc=P,scale=2)
    pos = np.random.normal(loc=posMean,size=NP)
    alpha = np.random.beta(2,2)
    unlabeled = np.concatenate((np.random.normal(loc=P,size=int(alpha*NU)),
                                np.random.normal(loc=U,size=int((1-alpha)*NU))))
    return pos, unlabeled,alpha

In [None]:
def minmax(c):
    "return 0-1 scaled vector"
    return (c - c.min()) / (c.max() - c.min())

In [None]:
p,u,alpha = getSample()
plt.hist(p,color="blue",density=True)
plt.hist(u,color="red",density=True,alpha=.5)
plt.plot(rnge, ss.norm.pdf(rnge,5,2))
plt.show()
curve = makeCurve(p.reshape((-1,1)),u.reshape((-1,1)),)
curve = (curve - curve.min()) / (curve.max() - curve.min())
plt.plot(curve)
plt.vlines(100*alpha,0,1)
alphaHats,curves = getEsts(p.reshape((-1,1)),u.reshape((-1,1)))
print(np.mean(np.abs(alphaHats - alpha)))

In [None]:
p2,u2,alpha2 = getSample()
plt.hist(p2,color="blue",density=True)
plt.hist(u2,color="red",density=True,alpha=.5)
plt.plot(rnge, ss.norm.pdf(rnge,5,2))
plt.show()
plt.clf()
curve = minmax(makeCurve(p2.reshape((-1,1)),u2.reshape((-1,1)),))
plt.plot(curve)
plt.vlines(100*alpha2,0,1)
alphaHats,curves = getEsts(p2.reshape((-1,1)),u2.reshape((-1,1)))
print(np.mean(np.abs(alphaHats - alpha2)))

In [None]:
plt.hist(np.concatenate((p,p2)),color="blue",density=True)
# plt.hist(u2,color="red",density=True,alpha=.5)
plt.plot(rnge, ss.norm.pdf(rnge,5,2))
plt.show()
plt.clf()
curve0 = minmax(makeCurve(np.concatenate((p,p2)).reshape((-1,1)),
                         np.concatenate((u,u2)).reshape((-1,1))))
plt.plot(curve0)
plt.vlines(np.array([alpha,
                     alpha2,
                     alpha*.5+alpha2*.5])*100,0,1)
alphaHats,curves = getEsts(np.concatenate((p,p2)).reshape((-1,1)),
                         np.concatenate((u,u2)).reshape((-1,1)))
print(np.mean(np.abs(alphaHats.mean() - np.array([alpha,alpha2]))))

In [None]:
ps = []
for _ in range(100):
    p,u,alpha = getSample()
    ps.append(p)

plt.hist(np.concatenate(ps),density=True)
plt.plot(rnge, ss.norm.pdf(rnge,5,2))

In [None]:
def getBiasedPos(size=1000):
    weights = np.random.dirichlet(np.ones(3))
    p = np.concatenate((np.random.normal(loc=-3,size=int(weights[0] * size)),
                        np.random.normal(loc=0,size=int(weights[1] * size)),
                        np.random.normal(loc=3,size=int(weights[2] * size))))
    assignments = np.zeros(size)
    assignments[int(weights[0]*size) : int(weights[1] * size)] = 1
    assignments[int(weights[1] * size):] = 2
    return p,assignments

In [None]:
p,assign = list(zip(*[getBiasedPos(size=10) for _ in range(1000)]))
p = np.concatenate(p)
_ = plt.hist(p,density=True,bins=50)
r = np.arange(-5,8,.1)
plt.plot(r,ss.norm.pdf(r,loc=-3)/3 + ss.norm.pdf(r,loc=0)/3 + ss.norm.pdf(r,loc=3)/3)

In [None]:
p,u,alpha = getSample()
plt.plot(makeCurve(p.reshape((-1,1)),u.reshape((-1,1)),num_curves_to_average=1))
plt.vlines(alpha*100,0,40)