In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def plot_error(rpca_errs, amc_errs, idx, iters, vals_list):
    """
    Plot a line graph with 95% CI shaded!
    """

    #create pandas dataframe
    m_list = vals_list
    rpca_errs_dataframe = pd.DataFrame([obj[idx] for obj in rpca_errs])
    rpca_errs_dataframe['m'] = pd.Series(m_list*iters, index=rpca_errs_dataframe.index)
    amc_errs_dataframe = pd.DataFrame([obj[idx] for obj in amc_errs])
    amc_errs_dataframe['m'] = pd.Series(m_list*iters, index=amc_errs_dataframe.index)

    #create plot
    sns.lineplot(x='m', y=0, data=rpca_errs_dataframe, marker=True, palette=sns.color_palette("GnBu_d",2))
    sns.lineplot(x='m', y=0, data=amc_errs_dataframe, marker=True, palette = sns.color_palette("GnBu_d",2))
    if idx == 6:
        plt.ylabel("Accuracy (%)")
        plt.title('Overall Accuracy')
    if idx == 0:
        plt.ylabel("Accuracy (%)")
        plt.title('Head Slice Accuracy')
    if idx == 3:
        plt.ylabel("Accuracy (%)")
        plt.title('Head-Torso Overlap Accuracy')

    plt.xlabel("Overlap Portion of Head-Torso")
    plt.legend(['Model 0', 'Model 1'])
    
    sns.despine()
    plt.tight_layout()

In [4]:
def eval_model(model,X,L,Y,overlap_idx):
        #overall accuracy of the model
        overall_score = model.score(X, Y) 
        
        #accuracy of the model on the portion each LF covers
        slice_scores = model.score_on_LF_slices(X, Y, L) 
        try: 
            #accuracy of the model on the overlap between head and torso LF
            overlap_scores = model.score_on_LF_slices(X[overlap_idx,:], Y[overlap_idx], L[overlap_idx,:]) 
        except: 
            overlap_scores = [0,0,0] 
        return  [list(slice_scores)+list(overlap_scores)+list([overall_score])][0]

In [5]:
from random import shuffle

def shuffle_matrices(matrices):
    """Shuffle each member of a list of matrices having the same first dimension
    (along first dimension) according o the same shuffling order."""
    N = matrices[0].shape[0]
    idxs = list(range(N))
    shuffle(idxs)
    out = []
    for M in matrices:
        if M.shape[0] != N:
            raise ValueError("All matrices must have same first dimension.")
        out.append(M[idxs])
    return out

In [6]:
def generate_dataset(mu_0, mu_1, n=1000, plot=False):
    """Generate (x, y) mixture of gaussians data, where x \in R^d and 
    y \in {-1, 1}."""
    dim = mu_0.shape[0]
    nc = n // 2
    I = np.diag(np.ones(dim))

    # Generate data
    X_0 = multivariate_normal(mu_0, I, size=nc)
    X_1 = multivariate_normal(mu_1, I, size=nc)

    # Plot the data
    if plot:
        plt.scatter(X_0[:,0], X_0[:,1])
        plt.scatter(X_1[:,0], X_1[:,1])
        plt.show()

    # Shuffle and merge data
    return shuffle_matrices([
        np.vstack([X_0, X_1]),
        np.array(np.concatenate([-np.ones(nc), np.ones(nc)]), dtype=int)])

In [7]:
def generate_multi_mode_data(n, mus, props, labels, plot=False):
    """Generate multi-mode data
    
    Args:
        - n: [int] Number of data points to generate
        - mus: [list of d-dim np.arrays] centers of the modes
        - props: [list of floats] proportion of data in each mode
        - labels: [list of ints] class label of each mode
        - plot: [bool] Whether to plot the data
    
    Returns:
        - X: [n x d-dim array] Data points
        - Y: [n-dim array] Data labels
        - C: [n-dim array] Index of the mode each data point belongs to
    """
    assert sum(props) == 1.0
    ns = [int(n*prop) for prop in props]
    d = mus[0].shape[0]
    I_d = np.diag(np.ones(d))

    # Generate data
    Xu = [np.random.multivariate_normal(mu, I_d, size=ni) for mu, ni in zip(mus, ns)]
    Yu = [l*np.ones(ni) for ni, l in zip(ns, labels)]
    Cu = [i*np.ones(ni) for i, ni in enumerate(ns)]

    # Plot the data
    if plot:
        for Xi in Xu:
            plt.scatter(Xi[:, 0], Xi[:, 1])
        plt.show()

    # Generate labels and shuffle
    return shuffle_matrices([np.vstack(Xu), np.hstack(Yu), np.hstack(Cu)])

In [8]:
def generate_label_matrix(n, accs, covs, Y, C, overlap_portion=0.3, overlap_acc=1.0):
    """Generate label matrix. We assume that the last LF is the head LF and the 
    one before it is the torso LF it will interact with.
    
    Args:
        - n: [int] Number of data points
        - accs: [list of floats] accuracies of LFs
        #TODO: covs isn't the overall coverage, but coverage on the associated mode
        - covs: [list of floats] coverage for each LF for its mode
        - Y: [n-dim array] Data labels
        - C: [n-dim array] Index of the mode each data point belongs to
        - overlap_portion: [float] % of "head" LF that overlaps with "torso" LF
        TODO: Not using overlap_acc yet!
        - overlap_acc: [float] Accuracy of torso LF | head LF on overlap_portion
    
    Returns:
        - L: [n x d-dim array] Data points
        - overlap_idx: [n-dim array] Index of where head and torso LF overlap
    """
    m = np.shape(accs)[0]

    # Construct a label matrix with given accs and covs
    L = np.zeros((n, m))
    for i in range(n):
        j = int(C[i])
        if np.random.random() < covs[j]:
            if np.random.random() < accs[j]:
                L[i, j] = Y[i]
            else:
                L[i, j] = -Y[i]
                
    #Change labeling patterns of LF[-2] and LF[-1] so they have some overlap
    for i in range(n):
        j = int(C[i])
        if j == int(np.max(C)):
            if np.random.random() < overlap_portion:
                L[i, j-1] = -Y[i] #downvote LF1 on overlap
                L[i, j] = Y[i] #upvote LF2 on overlap

    overlap_idx = [i for i in range(n) if (L[i,-2] != 0 and L[i,-1] != 0)]
    accs_emp = np.array([np.mean(L[:,j] == Y)/np.mean(L[:,j] != 0) for j in range(m)])
    return L, overlap_idx

In [9]:
def generate_data(N,accs,covs,overlap_portion=0.3):
    """
    Generate simulation data with given params. 
    #TODO: HARDCODE LF[-2] and LF[-1] as head and torso LF
    Args:
        - N: [int] total number of datapoints (samples)
        - accs: [list of floats] accuracies for LFs
        - covs: [list of floats] coverages for LFs
        - overlap_portion: [float] % of "head" LF that overlaps with "torso" LF
    """
    #feature and label generation
    mus = [
    np.array([-3, 0]), # Mode 1: Y = -1
    np.array([3, 0]), # Mode 2: Y = 1
    np.array([6, -3]) # Mode 3: Y = -1
    ]
    props = [0.25, 0.5, 0.25]
    labels = [-1, 1, -1]
    
    X, Y, C = generate_multi_mode_data(N, mus, props, 
                                               labels, plot=False)
    
    #labeling function generation
    L, overlap_idx = generate_label_matrix(N, accs, covs, 
                                           Y, C, overlap_portion=overlap_portion)
    
    return X,Y,L,overlap_idx

In [10]:
N = 10000
covs = np.array([0.9, 0.9, 0.9])
m = np.shape(covs)[0]
model_0_scores = []
model_1_scores = []
model_2_scores = []

overlap_portion = 0.05
accs = np.array([0.75, 0.75, 0.75])
X,Y,L,overlap_idx = generate_data(N,accs,covs,overlap_portion)

In [11]:
def train_models(X,L, accs):
    """
    Trains baseline, oracle, and attention model
    Args:
        - X: features
        - L: LF matrix
        - accs: [list of floats] accuracies for LFs
    Returns:
        - model_[0,1,2]: trained baseline, oracle, and attention model
    """

    m = np.shape(L)[1] #num LFs
    d = X.shape[1] #num features

    #baseline model, no attention
    model_0 = SliceDPModel(d, LinearModule, m, accs, r=4, rw=False)
    model_0.train(X, L, batch_size=1000, n_epochs=250, lr=0.1, print_every=250)

    #oracle, manual reweighting
    #TODO: currently hardcode weights so LF[-1] has double the weight
    weights = np.ones(m)
    weights[-1] = 2.0
    model_1 = SliceDPModel(d, LinearModule, m, accs, r=4, rw=True, L_weights=list(weights))
    model_1.train(X, L, batch_size=1000, n_epochs=250, lr=0.1, print_every=250)

    #our model, with attention
    model_2 = SliceDPModel(d, LinearModule, m, accs, r=2, rw=True)
    model_2.train(X, L, batch_size=1000, n_epochs=250, lr=0.1, print_every=250)
    
    return model_0, model_1, model_2

In [12]:
class LinearModule(nn.Module):
    def __init__(self, input_dim, output_dim, bias=False):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, output_dim, bias=bias)
    
    def forward(self, x):
        return self.input_layer(x)

In [13]:
class Classifier(nn.Module):

    def predict(self, x):
        yp = self.predict_proba(x).squeeze().detach().numpy()
        return np.where(yp > 0.5, 1, -1)

    def score(self, X_np, Y_np):
        X = self._convert_np_data(X_np)
        return np.where(self.predict(X) == Y_np, 1, 0).sum() / X.shape[0]
    
    def score_on_LF_slices(self, X_np, Y_np, L_np):
        """Return the score for each coverage set of each LF"""
        m = L_np.shape[1]
        Yp = np.tile(self.predict(self._convert_np_data(X_np)), (m, 1))
        Yp = np.abs(L_np).T * Yp
        return 0.5 * (Yp @ Y_np / np.sum(np.abs(L_np), axis=0) + 1)
    
    def train(self, X_np, L_np, batch_size=10, n_epochs=10, lr=0.01, 
        momentum=0.9, print_every=10):
        """Train a standard supervised model using SGD with momentum."""
        X, L = map(self._convert_np_data, [X_np, L_np])

        # Create DataLoader
        train_loader = DataLoader(TensorDataset(X, L), batch_size=batch_size)

        # Set optimizer as SGD w/ momentum
        optimizer = optim.SGD(self.parameters(), lr=lr, momentum=momentum)

        # Train model
        for epoch in range(n_epochs):
            running_loss = 0.0
            for batch, data in enumerate(train_loader):

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward + backward + optimize
                loss = self.loss(*data)
                loss.backward()
                optimizer.step()
                running_loss += loss.detach()
            
            # Print loss every 10 epochs
            if epoch % print_every == 0 or epoch == n_epochs - 1:
                avg_loss = running_loss / batch
                print(f"[Epoch {epoch}] Loss: {avg_loss:0.3f}")
        
        print('Finished Training')
    
    def _convert_np_data(self, X_np, convert_binary=False):
        X_np = np.copy(X_np)

        # Optionally: Convert from {-1,1} --> {0,1}
        if convert_binary:
            X_np[X_np == -1] = 0

        return torch.from_numpy(X_np).float()
    
    def print_params(self):
        for name, param in self.named_parameters():
            print("\n", name, param)

class SliceDPModel(Classifier):
    def __init__(self, input_dim, input_module_class, m, accs, r=1, rw=False, L_weights=[]):
        """Online / joint data programming model
        Assumes balanced, binary class problem with conditionally ind. LFs that
        output binary labels or abstain, \lambda_i \in {-1,0,1}
        Args:
            - input_dim: Input data vector dimension
            - input_module_class: Class that initializes with args (input_dim,
                output_dim)
            - m: Number of label sources
            - accs: The LF accuracies, computed offline
            - r: Intermediate representation dimension
            - rw: Whether to use reweighting of representation for Y_head
            - L_weights: If provided, manually weight L_heads using L_weights
        """
        super().__init__()
        self.k = 1 # Fixed here for binary setting
        self.m = m
        self.r = r
        self.rw = rw
        self.L_weights = torch.from_numpy(np.array(L_weights, dtype=np.float32))

        # Basic binary loss function
        self.loss_fn = nn.BCEWithLogitsLoss(reduce=False)

        # Input module
        self.input_layer = nn.Sequential(
            input_module_class(input_dim, self.r),
            nn.Sigmoid()
        )

        # Attach an [r, m] linear layer to predict the labels of the LFs
        self.L_head = nn.Linear(self.r, self.m, bias=False)

        # Attach the "DP head" which outputs the final prediction
        y_d = 2 * self.r if self.rw else self.r
        self.Y_head = nn.Linear(y_d, self.k, bias=False)

        # Start by getting the DP marginal probability of Y=1, using the
        # provided LF accuracies, accs, and assuming cond. ind., binary LFs
        self.w = torch.from_numpy(np.log(accs / (1-accs))).float()

    def forward_L(self, x):
        """Returns the unnormalized predictions of the L_head layer."""
        return self.L_head(self.input_layer(x))
    
    def forward_Y(self, x):
        """Returns the output of the Y head only, over re-weighted repr."""
        b = x.shape[0]
        xr = self.input_layer(x)

        # Concatenate with the LF attention-weighted representation as well
        if self.rw:

            # A is the [bach_size, 1, m] Tensor representing the relative
            # "confidence" of each LF on each example
            # NOTE: Should we be taking an absolute value / centering somewhere
            # before here to capture the "confidence" vs. prediction...?
            A = F.softmax(self.forward_L(x)).unsqueeze(1)

            # We then project the A weighting onto the respective features of
            # the L_head layer, and add these attention-weighted features to Xr
            if self.L_weights.shape[0] > 0:
                # Manually reweight
                W = self.L_weights.repeat(4,1).transpose(0,1).repeat(b, 1, 1)
            else:
                # Use learned weights from L_head
                W = self.L_head.weight.repeat(b, 1, 1)

            xr = torch.cat([xr, torch.bmm(A, W).squeeze()], 1)

        # Return the list of head outputs + DP head
        return self.Y_head(xr).squeeze()
    
    def loss(self, x, L):
        """Returns the loss consisting of summing the LF + DP head losses
        Args:
            - x: A [batch_size, d] torch Tensor
            - L: A [batch_size, m] torch Tensor with elements in {-1,0,1}
        """

        # Convert label matrix to [0,1] scale, and create abstain mask
        L_01 = (L + 1) / 2
        L_mask = torch.abs(L_01)
        nb = torch.sum(L_mask)
        
        # LF heads loss
        # NOTE: Here, we only add non-abstains to the loss
        # loss_1 = torch.sum(self.loss_fn(self.forward_L(x), L_01) * L_mask) / nb
        # NOTE: Here, we add *all* data points to the loss
        loss_1 = torch.mean(self.loss_fn(self.forward_L(x), L_01))

        # Compute the noise-aware DP loss w/ the reweighted representation
        # Note: Need to convert L from {0,1} --> {-1,1}
        loss_2 = torch.mean(
            self.loss_fn(self.forward_Y(x), F.sigmoid(2 * L @ self.w))
        )

        # Just take the unweighted sum of these for now...
        return (10*loss_1 + loss_2) / 2
    
    def predict_proba(self, x):
        return F.sigmoid(self.forward_Y(x))

In [14]:
N = 10000
covs = np.array([0.9, 0.9, 0.9])
m = np.shape(covs)[0]
model_0_scores = []
model_1_scores = []
model_2_scores = []

overlap_portion = 0.05
accs = np.array([0.75, 0.75, 0.75])
X,Y,L,overlap_idx = generate_data(N,accs,covs,overlap_portion)

#train the models and score the models (I KNOW ITS UGLY)
model_0, model_1, model_2 = train_models(X,L,accs)
for model, m_scores in zip([model_0, model_1, model_2],[model_0_scores, model_1_scores, model_2_scores]):
    a,b,c,d,e,f,g = eval_model(model,X,L,Y,overlap_idx)
    m_scores.append([a,b,c,d,e,f,g])

[Epoch 0] Loss: 4.177
[Epoch 249] Loss: 4.012
Finished Training
[Epoch 0] Loss: 4.245
[Epoch 249] Loss: 4.011
Finished Training
[Epoch 0] Loss: 4.316
[Epoch 249] Loss: 4.061
Finished Training


In [15]:
import pandas as pd

scores = pd.DataFrame([[model_0_scores[0][i] for i in [0,1,2,5,6]], [model_1_scores[0][i] for i in [0,1,2,5,6]], [model_2_scores[0][i] for i in [0,1,2,5,6]]])
scores.columns = (['LF0_slice', 'LF1_slice', 'LF2_slice', 'LF1+2_overlap', 'Overall'])
scores.index = (['Baseline', 'Manual', 'Ours'])
scores

Unnamed: 0,LF0_slice,LF1_slice,LF2_slice,LF1+2_overlap,Overall
Baseline,0.995111,0.985046,0.966578,0.968254,0.9833
Manual,0.995556,0.985262,0.969251,0.968254,0.9842
Ours,0.999556,0.983962,0.976827,0.97619,0.9861
