In [None]:
from multiinstance.em import generateBags

import numpy as np
import matplotlib.pyplot as plt

import scipy.stats as ss
from sklearn.cluster import KMeans

from multiinstance.utils import estimate

starting in the simplest case: 1 bag with 1 pair of components

In [None]:
posMeans = [[-2,-6], [6,6]]
negMeans = [[0,-2], [4,2]]

In [None]:
bags = generateBags(2,pos_means=posMeans,
                    neg_means=negMeans,NPos=2000,NUnlabeled=10000)

In [None]:
fig,ax = plt.subplots(1,2,figsize=(10,4))
for b,a in zip(bags,ax):
    a.scatter(b.x_unlabeled[:,0], b.x_unlabeled[:,1],color="blue",alpha=.5)
    a.scatter(b.X_pos[:,0], b.X_pos[:,1],color="red",alpha=.5)

## Run K-Means Globally

In [None]:
globalPositives = np.concatenate([b.X_pos for b in bags])

In [None]:
kmeans= KMeans(n_clusters=2).fit(globalPositives)

In [None]:
kmeans.cluster_centers_

## Cluster Unlabeled Instances

In [None]:
for i,b in enumerate(bags):
    bags[i].unlabeled_cluster_assignment =kmeans.predict(b.x_unlabeled)

## Run DistCurve on Each Cluster

In [None]:
NClusters = kmeans.cluster_centers_.shape[0]
global_etas = np.zeros(NClusters)
for clusterNum in range(NClusters):
    unlabeled = np.concatenate([b.x_unlabeled[b.unlabeled_cluster_assignment == clusterNum] for b in bags])
    pos = globalPositives[kmeans.labels_ == clusterNum]
    global_etas[clusterNum], _ = estimate(pos, unlabeled)

## Run NNPU to get Posterior

## Convert Posterior To Component Density Ratio

In [None]:
def densityRatio(xi, compNum):
    p_x_y0 = ss.multivariate_normal.logpdf(xi, mean=negMeans[compNum],cov=np.eye(2))
    p_x_y1 = ss.multivariate_normal.logpdf(xi, mean=posMeans[compNum],cov=np.eye(2))
    return np.exp(p_x_y0 - p_x_y1)

## Run EM Algorithm to get alpha, pi and rho

In [None]:
for i,b in enumerate(bags):
    etas = []
    for compNum in range(len(posMeans)):
        eta_i_j = np.array(.5)
        ratios = np.array([densityRatio(xi,compNum) for xi in b.x_unlabeled[b.unlabeled_cluster_assignment == compNum]])
        for i in range(50):
            den = eta_i_j + (1 - eta_i_j) * ratios
            eta_i_j = np.mean(eta_i_j / den)
        etas.append(eta_i_j)

    eta_j = np.array(etas)
    gamma_j = np.unique(bags[0].unlabeled_cluster_assignment,
                        return_counts=True)[1]/b.unlabeled_cluster_assignment.shape[0]
    alpha_j = eta_j.dot(gamma_j)
    print("alpha: {:.3f}, {:.3f}".format(alpha_j,b.alpha))
    pi_j = np.multiply(eta_j, gamma_j) / alpha_j
    print("pi",pi_j, b.pi)
    rho_j = np.multiply(1 - eta_j, gamma_j) / (1 - alpha_j)
    print("rho",rho_j, b.rho)