In [None]:
%matplotlib inline
import sys, os, pdb, warnings
sys.path.insert(0, './core/')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from minilib import *
from utmLib import utils

np.set_printoptions(suppress=True, linewidth=120, precision=4)
pd.set_option('display.max_columns', 15)
pd.set_option('display.width', 1000)
plt.rcParams["figure.figsize"] = [10,2]

testing = 0

In [None]:
def visualize_imgs(img_array, rows = 2, cols = 8, selected = None, label_array = None):
    if img_array[0].size == 196:
        process = lambda x:x.reshape(14,14)
    else:
        process = lambda x:x.reshape(28,28)
    
    # random select some examples for display if not specified
    if selected is None:
        assert(img_array.shape[0] >= rows * cols)
        selected = np.random.choice(img_array.shape[0], rows * cols, replace = False)
    else:
        assert(selected.size >= rows * cols)
    
    k = 1
    labels = []
    fid = plt.figure()
    
    for i in range(rows):
        for j in range(cols):
            plt.subplot(rows, cols, k)
            plt.imshow(process(img_array[selected[k-1]]), cmap='gray' )
            if label_array is not None:
                labels.append(label_array[selected[k-1]])
            k += 1
            plt.axis('off')
    
    if len(labels):
        print(labels)
    plt.show()

In [None]:
def adversial_step(masses, delta = 0.5):
    
    def get_w(alpha):
        w = np.exp( -masses / alpha )
        w = w / np.sum(w) * N
        return w
    
    def valid(alpha):
        w = get_w(alpha)
        return np.log(np.power(w,w)).sum() <= M
    
    # use the idea of binary search, time complexity O(N * lgN)
    N = masses.size
    M = N * delta
    l = 0.01
    r = 2 ** 10
    
    # need to gurantee that r is big enough 
    while not valid(r):
        r = r * 2
    
    # binary search a valid alpha in range [l,r] 
    while r - l > 1e-2:
        m = (l+r) / 2

        if valid(m):
            r = m 
        else:
            l = m
    
    return get_w(r)


In [None]:
def py_learning_step(step, R, data_loader, model, optimizer, scheduler, max_epoch):
    model.train()
    R = R.astype('f4').reshape(-1, 1)
    R_tensor = torch.from_numpy( R ).to(model.device)
    
    for e in range(max_epoch):
        acc_loss = 0
        total = 0

        for X, Y, _i in data_loader:
            X = X.to(model.device)
            Y = Y.to(model.device)
            
            optimizer.zero_grad()
            out = model.forward(X)
            loss = model.loss(out, Y, R = R_tensor[_i])
            loss.backward()
            optimizer.step()
            
            acc_loss += loss.item()
            total += X.shape[0]
        
        # step is global variable
        scheduler.step()
        if VERBOSE:
            print(f'Step {step}, Learning epoch {e}, avg loss: {acc_loss/total}', end = '\r')
    return
    

In [None]:
from models.ours.Gaussians import MultivariateGaussain

def mg_learning_step(R, data, **kwargs):
    # learn a gaussian distribution from weighted data
    N, D = data.shape 
    mg = MultivariateGaussain()
    R = R.reshape(N, 1)
    mg.mu = np.mean( R * data, axis = 0)
    
    mat = data - mg.mu.reshape(1, D)
    mat2 = mat.copy()
    mat2 = mat2 * R / N
    S2 = mat.T @ mat2
    
    mg.S = S2
    return mg

def mixmg_learning_step(R, data, n_comp, **kwargs):
    step = kwargs['step']
    total = kwargs['total']
    adv = kwargs['adv']
    wrapper = kwargs['wrapper']
    
    if wrapper[0] is None:
        wrapper[0] = MixMGLearner(max_iter = 0, n_components = n_comp, reg_covar = 1e-4).fit(data)
    
    mixmg_object = wrapper[0]
    
    if not adv:
        niter = total    
    elif step == 0 or step == total-1 :
        niter = 16
    else:
        niter = 4
        
    for _ in range(niter):
        mixmg_object._estep()
        mixmg_object._mstep(data, R)
    
    return mixmg_object.get_model()

In [None]:
# load all digits datasets
import numpy as np
import loader
from functools import partial
from copy import deepcopy
from collections import defaultdict

mnist_dir = './data/digits/mnist'
ch74_dir = './data/digits/chars74k'
dida_dir = './data/digits/dida'
NUM_PER_CLASS = 50
DOWN_SAMPLE = False

# fix seed for fixed split 
np.random.seed(3)

# load all dataset and convert into certain format
(mnist_train, mnist_train_label), (mnist_test, mnist_test_label) = loader.read_mnist(mnist_dir, \
                                                                     down_sample = DOWN_SAMPLE, with_label = True)

ch74 = loader.read_chars74k(ch74_dir, NUM_PER_CLASS, with_label = True)
ch74_imgs, ch74_labels = list(zip(*ch74))
ch74 = np.array(list(map(partial(loader.transform_to_mnist, down_sample = DOWN_SAMPLE, normalize = False) ,ch74_imgs)))
ch74_labels = np.array(ch74_labels, dtype = 'i4')

dida = loader.read_dida(dida_dir, 1000, with_label = True)
dida_imgs, dida_labels = list(zip(*dida))
dida = np.array(list(map(partial(loader.transform_to_mnist, down_sample = DOWN_SAMPLE, normalize = True) ,dida_imgs)))
dida_labels = np.array(dida_labels, dtype = 'i4')

# augment mnist dataset for both black and white background
mnist_train = np.vstack([mnist_train, 1-mnist_train])
mnist_test =  np.vstack([mnist_test, 1-mnist_test])
mnist_train_label = np.hstack([mnist_train_label, mnist_train_label])
mnist_test_label = np.hstack([mnist_test_label, mnist_test_label])

# gather all dataset together in one dictionary
all_datasets = defaultdict(dict)
all_datasets['mnist']['train'] = (mnist_train, mnist_train_label)
all_datasets['mnist']['test'] = (mnist_test, mnist_test_label)
all_datasets['dida']['all'] = (dida, dida_labels)
all_datasets['ch74']['all'] = (ch74, ch74_labels)

# shuffle all datasets along with its label
for ds_name in all_datasets:
    current_splits = list(all_datasets[ds_name].keys())
    for split in current_splits:
        data, label = all_datasets[ds_name][split]
        label = label.astype('i8')
        inds = np.arange(data.shape[0])
        np.random.shuffle(inds)
        all_datasets[ds_name][split] = (data[inds], label[inds])
        
        if split == 'train':
            # split into train and val
            train_ratio = 0.8
            names = ['train', 'val']
        elif split == 'all':
            # split into train and test for dida and ch74
            train_ratio = 0.5
            names = ['train', 'test']
        else:
            train_ratio = 0
        
        if train_ratio == 0:
            continue
        
        data, label = all_datasets[ds_name][split]
        size = int(data.shape[0] * train_ratio)
        na,nb = names
        all_datasets[ds_name][na] = (data[:size], label[:size])
        all_datasets[ds_name][nb] = (data[size:], label[size:])


# do some jittering to the pixels of mnist_test
mnist_adv_gaussain = deepcopy(all_datasets['mnist']['test'][0])
mnist_adv_gaussain += np.clip(np.random.normal(size = mnist_adv_gaussain.shape, scale = 0.5), -0.2, 0.2)
mnist_adv_gaussain = np.clip(mnist_adv_gaussain, 0, 1)
all_datasets['mnist']['adv_gaussain'] = (mnist_adv_gaussain, all_datasets['mnist']['test'][1] )

n_jitter_pixel = int(mnist_train.shape[1] * 0.1)
mnist_adv_pixel =  deepcopy(all_datasets['mnist']['test'][0])
for item in mnist_adv_pixel:
    selected = np.random.choice(item.size, size = n_jitter_pixel, replace = False)
    item[selected] = np.random.uniform( size = selected.size )
all_datasets['mnist']['adv_pixel'] = (mnist_adv_pixel, all_datasets['mnist']['test'][1] )

# visulize the datasets
for ds_name in all_datasets:
    for split in all_datasets[ds_name]:
        dataset, labels = all_datasets[ds_name][split]
        assert(dataset.shape[0] == labels.size)
        print('The number of samples in {} is {}'.format(f'{ds_name} {split}' ,dataset.shape[0]))
        assert(np.all(0 <= dataset) and np.all(dataset <= 1))
        print(f"Pixel value range is from {np.min(dataset)} to {np.max(dataset)}.")
        visualize_imgs(dataset, label_array = labels)
        
        # reduce size if testing 
        if testing:
            all_datasets[ds_name][split] = (dataset[:1000], labels[:1000])


In [None]:
# map into embeded space using auto encoder
data_transform = utils.pkload('./output/cnn_auto_encoder.pkl')
for name in all_datasets:
    for split in all_datasets[name]:
        X,y = all_datasets[name][split]
        F = data_transform.transform(X)
        all_datasets[name][split] = (F,y)
        

In [None]:
from torch.utils.data import DataLoader, Dataset

class MyData(Dataset):
    def __init__(self, data, _x, _y):
        self.X = data[:, _x]
        self.Y = data[:, _y]
        self.total = data.shape[0]
    
    def __len__(self):
        return self.total
    
    def __getitem__(self, ind):
        return self.X[ind], self.Y[ind], ind

def five_number_statistic(logmass):
    p25, median, p75 = np.percentile(logmass, [25,50,75])
    average = np.mean(logmass)
    std = np.std(logmass)
    ret = (p25, median, p75, average, std)
    return list(np.round(ret, 4))

In [None]:
# evaluation function for image classification
def evaluate(model, img_arr, device):
    imgs = img_arr.reshape(-1, 1, 14, 14)
    test = torch.from_numpy(imgs).to(device)
    test_loader = torch.utils.data.DataLoader(test, batch_size=1000, shuffle=False, drop_last=False)

    ret = []
    with torch.no_grad():
        for X in test_loader:
            outputs = model(X)
            _, predicted = torch.max(outputs, 1)
            ret.append(predicted)
    ret = torch.cat(ret, dim = 0)
    ret = ret.cpu().numpy()
    return ret


In [None]:
def train_model(train_data, val_data, train_conf, model_conf, lr, wd, device):
    model_conf.device = device
    xid = model_conf.xid
    yid = model_conf.yid
    num_parents = model_conf.num_parents
    structure = model_conf.structure
    
    batch_size = train_conf.batch_size
    init_epoch = train_conf.init_epoch
    n_epoch = train_conf.n_epoch
    n_step = train_conf.n_step
    adv = train_conf.adversial
    delta = train_conf.delta
    
    model = PGNN( len(xid) , num_parents, model_conf)
    model.moveto(device)
    model.loss = partial(model.weighted_loss_func, G = structure)
    
    # init
    px_container = [None]  # for mixmg px part use only in concurrent environment
    R = np.ones( shape = (train_data.shape[0], ) )
    total_epochs = init_epoch + n_epoch + n_step -1 
    
    optimizer = torch.optim.Adam(model.parameters(), weight_decay = wd )
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, anneal_strategy='cos', pct_start=0.25,
                            epochs= total_epochs, steps_per_epoch = 1, verbose = False)

    py = NeuralNetCondMG()
    py.nn = model
    py.gbn = GBN(structure).fit(train_data[:, yid], var_thresh = 1e-2)

    # init training of NN
    data_loader = DataLoader( MyData(train_data, xid, yid) , batch_size = batch_size)
    py_learning_step('Init', R, data_loader, model, optimizer, scheduler, max_epoch = init_epoch)

    # begining of (adversial robust) training 
    for step in range(n_step):
        # conduct learning step first
        model.train()
        mg = px_learning_step(R, train_data[:, xid], step = step, total = n_step,
                              adv = adv, wrapper = px_container)
        py_learning_step(step, R, data_loader, model, optimizer, scheduler, max_epoch = n_epoch)
        model.eval()

        # evaluate the mass of each data
        cnet = ContCNet(mg, py, xid, yid)
        masses = cnet.mass(train_data, logmode = True)

        # conduct adversial step
        R = adversial_step(masses, delta = delta)

        # verify if R satisfy the constrains 
        sat1 = np.sum(R) <= ( train_data.shape[0] + EPS )
        sat2 = np.sum( np.log( np.power(R, R) ) ) <= ( train_data.shape[0] * delta + EPS )
        
        if not (sat1 and sat2):
            warnings.warn(f'Constrains not satisfied during adversial step.')
            break   

    model.eval()
    model.moveto('cpu')
    score = cnet.mass(val_data, logmode = True).mean()
    
    return cnet, score

In [None]:
from joblib import Parallel, delayed
from itertools import product
from copy import deepcopy

def run_exp(train_data, val_data, train_conf, nn_conf,
            completion = False, param = None, 
            n_test = 200, n_rows = 2, n_cols = 10):
    
    
    xid = nn_conf.xid
    yid = nn_conf.yid
    model_conf = deepcopy(nn_conf)
    structure = create_graph(train_data[:, yid], 
                    max_parents = max(MIN_PARENT, int(PARENT_RATIO * train_data.shape[1])), 
                    corr_thresh = CORR_THRESH)

    model_conf.num_parents = [len(structure.V[i].parents) for i in range(structure.N)]
    model_conf.structure = structure

    # do hyper selection if param is not specified
    if param is None:
        if testing:
            learning_rates = [5e-3, 1e-2]
            weight_decays = [1e-4]
        else:
            learning_rates = [1e-2, 3e-3, 1e-3]
            weight_decays = [1e-4, 1e-5, 0]

        all_params = list(product(learning_rates, weight_decays))

        results = Parallel(n_jobs = 2, prefer = 'threads')(
            delayed(train_model)(train_data, val_data, train_conf, model_conf,
                    *comb, 'cuda:{}'.format(i%2)) 
            for i, comb in enumerate(all_params)
        )

        models, scores = list(zip(*results))
        ind = np.argmax(scores)
        cnet = models[ind]
        print('Best hyper parameter is: {}'.format(all_params[ind]))
    else:
        cnet = train_model(train_data, val_data, train_conf, model_conf, *param, 'cuda:0')[0]

    # conduct LL test if not completion task 
    if not completion:
        # evaluate the loglikelihood 
        for ds_name in all_datasets:
            for split in all_datasets[ds_name]:
                if not ('adv' in split or 'test' in split):
                    continue
                dataset, labels = all_datasets[ds_name][split]
                
                cur_mass = cnet.mass(dataset, logmode = True)
                print('-'*100)
                print(f'LL on {ds_name} {split}:')
                print('p25:{} median:{} p75:{} Mean:{} Std:{}'.format(*five_number_statistic(cur_mass)))
                print('-'*100)
        return

    
    # conduct image completion test, assume it is always given 
    plt.rcParams["figure.figsize"] = [ncols, nrows]
    mnist_test = all_datasets['mnist']['test'][0]
    
    for ds_name in all_datasets:
        for split in all_datasets[ds_name]:
            if not ('adv' in split or 'test' in split):
                continue

            dataset, labels = all_datasets[ds_name][split]
            name = ds_name + ' ' + split

            print('-'*100)
            print(f'Completion on {name}')
            np.random.seed(7)
            selected = np.random.choice(dataset.shape[0], n_test, replace = False)
            preds = []
            rmses = []
            for i, item in enumerate(dataset[selected]):
                if 'adv_' in name:
                    gt = mnist_test[selected[i]][cnet.yids]
                else:
                    gt = item[cnet.yids]
                item[cnet.yids] = np.nan
                img = cnet.map_via_cutset_sampling(item, cnet.yids)
                # use gradient ascent to optimize the assignment for restricted domain 
                img = cnet.optimize_assignment(img, cnet.yids)
                img = np.clip(img, 0, 1)
                preds.append( img )
                diff = img[cnet.yids] - gt
                rmses.append( np.sqrt( np.mean( np.square(diff) )) )

            print('RMSE - p25:{} median:{} p75:{} Mean:{} Std:{}'.format(*five_number_statistic(rmses)))
            pred_labels = evaluate(img_classifier, np.array(preds))
            acc = np.sum(pred_labels == labels[selected]) / pred_labels.size
            print('On {} {} image complection, accuracy is {}'.format(ds_name, split, acc))

            visualize_imgs(preds, nrows, ncols, np.arange(N_TEST))
            print('-' * 100)


# Experiement of Loglikelihoods

In [None]:
# define some common parameters across experiments
import torch
from models.ours.ContCNet import ContCNet
from utmLib.ml.GBN import GBN
from models.ours.NNCondMG import MyObject, create_graph, PGNN, NeuralNetCondMG
from utmLib import utils

############################################################
# this is global variables 
PARENT_RATIO = 0.3
CORR_THRESH = 0.09
MIN_PARENT = 3
EPS = 1e-2
VERBOSE = True
# px_learning_step = mg_learning_step
px_learning_step = partial(mixmg_learning_step, n_comp = 3)
############################################################
# img_classifier = utils.pkload('output/lenet5.pkl')
# img_classifier.eval()


In [None]:
train_data = all_datasets['mnist']['train'][0]
val_data = all_datasets['mnist']['val'][0]

# find cutset variables using heuristical algorithm
COND_RATIO = 0.3
MIN_COND_NUM = 5
xid = list( variance_methods( train_data , (COND_RATIO, MIN_COND_NUM) ) )
yid = list( np.setdiff1d( np.arange(train_data.shape[1]), xid) ) 

############################################################
pgnn_conf = MyObject()
pgnn_conf.depth = 3
pgnn_conf.drop_out = 0.0
pgnn_conf.compress_rate = 2
pgnn_conf.prec_thresh = (1e-2, 1e+2)
pgnn_conf.feature_size = len(yid)
pgnn_conf.max_header_size = len(yid)
pgnn_conf.xid = xid
pgnn_conf.yid = yid

train_conf = MyObject()
train_conf.init_epoch = 50
train_conf.n_step = 100
train_conf.n_epoch = 1
train_conf.batch_size = 512
train_conf.adversial = True
train_conf.delta = 1.0

# run experiment
run_exp(train_data, val_data, train_conf, pgnn_conf)

In [None]:
# non-adversial case
train_conf.adversial = False
train_conf.n_step, train_conf.n_epoch = train_conf.n_epoch, train_conf.n_step
run_exp(train_data, val_data, train_conf, pgnn_conf)
