In [None]:
import os 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=""
from utils import *

# 0. Read data


Persistence diagrams are assumed to be in .hdf5 file format with filtrations given as keys (i.e. "alpha", "rips"...) and homological dimensions given as sub-keys (i.e. "0", "1"...). These keys and sub-keys lead to a dictionnary, whose keys correspond to data indexes starting at 0. For instance, the 10th 0-dimensional persistence diagram computed with the rips filtration, which is an array of size (num_pts, 2), is accessed with data["rips"]["0"]["10"].

Features are given in .csv file format, with the first columns giving the targets.

Provide names of different tasks (as in the corresponding .csv file).

In [None]:
tasks = [("topic", "classification")]

Read train data.

In [None]:
path_to_train_feat = "../datasets/reddit/train.csv"
train_feat = pd.read_csv(path_to_train_feat)

In [None]:
path_to_train_diag = "../datasets/reddit/train_diag.hdf5"
train_diag = diag_to_dict(h5py.File(path_to_train_diag, "r"))

Specify if test set is 1. a fraction of train set (False) / 2. a separate set with possibly missing labels (True).

In [None]:
use_fraction_of_train_for_test = True

If test set is fraction of training set, specify ratio.

In [None]:
test_size = 0.2

if use_fraction_of_train_for_test == True:
    train_num_pts = train_feat.shape[0]    
    perm = np.random.permutation(train_num_pts)
    limit = np.int(test_size * train_num_pts)
    test_sub, train_sub = perm[:limit], perm[limit:] #np.sort(perm[:limit]), np.sort(perm[limit:])
    train_num_pts, test_num_pts = len(train_sub), len(test_sub)

Else, read test set.

In [None]:
if use_fraction_of_train_for_test == False:
    path_to_test_feat = "../../../../Documents/datasets/bridge/7/train.csv"
    test_feat = pd.read_csv(path_to_test_feat)
    train_num_pts, test_num_pts = train_feat.shape[0], test_feat.shape[0]

In [None]:
if use_fraction_of_train_for_test == False:
    path_to_test_diag = "../../../../Documents/datasets/bridge/7/train_diag.hdf5"
    test_diag = diag_to_dict(h5py.File(path_to_test_diag, "r"))
    filt = train_diag.keys()
    train_num_pts, test_num_pts = len(train_diag[filt[0]]), len(test_diag[filt[0]])

Convert data frame into numpy array.

In [None]:
train_F = np.array(train_feat)[:,1+len(tasks):]
num_features = train_F.shape[1]

if use_fraction_of_train_for_test == False:
    test_F = np.array(test_feat)[:,1+len(tasks):]

In [None]:
list_train_pred, list_test_pred, list_model = [], [], []

Visualization.

In [None]:
diag_example = tda.DiagramSelector(limit = np.inf, point_type = "finite").fit_transform(train_diag["0_degree"])

pre = tda.DiagramPreprocessor(use=True, scaler=MinMaxScaler()).fit(diag_example)
[mx,my],[Mx,My] = pre.scaler.data_min_, pre.scaler.data_max_
print("Minimum x = " + str(mx) + ", Maximum x = " + str(Mx) + ", Minimum y = " + str(my) + ", Maximum y = " + str(My))
#[mx],[Mx] = pre.scaler.data_min_, pre.scaler.data_max_
#print("Minimum x = " + str(mx) + ", Maximum x = " + str(Mx))
max_card, min_card = 0, 1e10
for i in range(len(diag_example)):
    max_card, min_card = max(max_card, diag_example[i].shape[0]), min(min_card, diag_example[i].shape[0])
print("Min cardinal = " + str(min_card) + ", Max cardinal = " + str(max_card))

In [None]:
train_feat.describe()

# 1. Model with Features

Reset vectors and size of train and test sets.

In [None]:
train_ratio, test_ratio = 1, 1
print("Num train points = " + str(len(np.arange(0,train_num_pts,train_ratio))))
print("Num test points  = " + str(len(np.arange(0,test_num_pts,test_ratio))))

Read labels.

In [None]:
task_name, task_type = tasks[0]

train_full_labels = train_feat[task_name]
if use_fraction_of_train_for_test == True:
    train_labels = train_full_labels[train_sub][::train_ratio]
    test_labels =  train_full_labels[test_sub][::test_ratio]
else:
    train_labels = train_full_labels[::train_ratio]
    
if task_type == "classification":
    train_labels = np.array(LabelEncoder().fit_transform(train_labels))
    if use_fraction_of_train_for_test == True:            
        test_labels = np.array(LabelEncoder().fit_transform(test_labels))
    
print("Task: " + task_name + " " + task_type + "\n")

In [None]:
train_V, test_V = [], []

Add features.

In [None]:
if use_fraction_of_train_for_test == True:
    train_V.append(train_F[train_sub,:][::train_ratio,:])
    test_V.append(train_F[test_sub,:][::test_ratio,:])
else:
    train_V.append(train_F[::train_ratio,:])
    test_V.append(test_F[::test_ratio,:])

Compute vectorizations.

In [None]:
vectorize_diagram = Pipeline([("Separator",     tda.DiagramSelector(limit=np.inf, point_type="finite")),
                              ("Rotator",       tda.DiagramPreprocessor(scaler=tda.BirthPersistenceTransform())),
                              ("Vectorizer",    tda.Landscape())])

param   =  {"Rotator__use":            True,
            "Vectorizer":              tda.PersistenceImage(), 
            "Vectorizer__resolution":  [30,30],
            "Vectorizer__bandwidth":   0.01,
            "Vectorizer__weight":      lambda x: x[1],
            #"Vectorizer__im_range":    [0.0, 0.13, 0.0, 1.0]
           }

vectorize_diagram = vectorize_diagram.set_params(**param)

In [None]:
diagram_types  = ["0_handmade"]

for dt in diagram_types:
    
    train_full_D_i = vectorize_diagram.fit_transform(train_diag[dt])
    np.save(dt, train_full_D_i)
    
    if use_fraction_of_train_for_test == True:
        train_D_i = train_full_D_i[train_sub,:][::train_ratio,:]
        test_D_i  = train_full_D_i[test_sub,:][::test_ratio,:]
    else:
        train_D_i = train_full_D_i[::train_ratio,:]
        test_D_i  = vectorize_diagram.transform(test_diag[dt])[::test_ratio,:]
    
    train_V.append(train_D_i)
    test_V.append(test_D_i)

Read vectorizations.

In [None]:
paths = {"train": [#"../../../../Documents/datasets/bridge/s3/train-0_alpha-PI-20-20-linear-5.npy",
                   #"../../../../Documents/datasets/bridge/s3/train-1_alpha-PI-20-20-linear-5.npy",
                   "../../../../Documents/datasets/bridge/s10/train-1_alpha-PI-20-20-linear-5.npy"], 
         "test":  []}

for i in range(len(paths["train"])):
    train_full_D_i = np.load(paths["train"][i])
    
    if use_fraction_of_train_for_test == True:
        train_D_i = train_full_D_i[train_sub,:][::train_ratio,:]
        test_D_i  = train_full_D_i[test_sub,:][::test_ratio,:]
    else:
        train_D_i = train_full_D_i[::train_ratio,:]
        test_D_i  = np.load(paths["test"][i])[::test_ratio,:]
        
    train_V.append(train_D_i)
    test_V.append(test_D_i)

Concatenate.

In [None]:
train_V, test_V = np.concatenate(train_V, 1), np.concatenate(test_V, 1)

Train and test.

In [None]:
# Predictor (sklearn, xgboost)
if task_type == "classification":
    list_of_feature_models = [("SVM", SVC()),               
                              ("RF", RandomForestClassifier()), 
                              ("AB", AdaBoostClassifier()), 
                              ("XGB", XGBClassifier())
                             ]
if task_type == "regression":
    list_of_feature_models = [("RF", RandomForestRegressor()), 
                              ("AB", AdaBoostRegressor()), 
                              ("XGB", XGBRegressor())
                             ]

#pre = hp.choice("pre", [[], [pca("pre.pca")]])
#if task_type == "classification" or task_type == "metric":
#    clf = hp.choice("clf", [svc("clf.svc"), xgboost_classification("clf.xgb")])
#    model = HyperoptEstimator(classifier = clf, preprocessing = pre)
#    model = HyperoptEstimator(classifier = any_classifier("clf"), preprocessing = any_preprocessing("pre"))
#if task_type == "regression":
#    reg = hp.choice("reg", [svr("reg.svr"), xgboost_regression("reg.xgb")])
#    model = HyperoptEstimator(regressor = any_classifier("reg"), preprocessing = any_preprocessing("pre"))
#    model = HyperoptEstimator(regressor = reg, preprocessing = pre)
       
for model_name, model in list_of_feature_models:        
    
    # Learning predictor
    model.fit(train_V, train_labels)
    list_model.append(model)

    # Prediction
    train_pred, test_pred = model.predict(train_V), model.predict(test_V)
    list_train_pred.append(train_pred[:,np.newaxis])
    list_test_pred.append(test_pred[:,np.newaxis])

    # Save predictions
    if use_fraction_of_train_for_test == False:
        np.savetxt(task_name + "-" + model_name, test_pred)
    
    # Evaluation on train set
    print("Train score of " + model_name + " = " + str(model.score(train_V, train_labels)))
    if task_type == "regression":
        print("Train MSE of " + model_name +" = " + str(mean_squared_error(train_pred, train_labels)))
        
    # Evaluation on test set
    if use_fraction_of_train_for_test == True:
            
        print("Test score of " + model_name + " = " + str(model.score(test_V, test_labels)))       
        if task_type == "regression":
            print("Test MSE of " + model_name + " = " + str(mean_squared_error(test_pred, test_labels)))
            plot_regression_result(test_labels, test_pred)
        if task_type == "classification":
            plot_confusion_matrix(confusion_matrix(test_labels, test_pred))
        
    print("\n\n")

Visualization.

In [None]:
idx = 100
plt.scatter(train_diag["0_handmade"][train_sub[idx]][:,0], train_diag["0_handmade"][train_sub[idx]][:,1])
plt.show()
plt.imshow(np.flip(np.reshape(train_V[idx,-900:], [30,30]), 0).astype(float))

In [None]:
index_of_RF = 1
fi = list_model[index_of_RF][1].feature_importances_
plt.imshow(np.flip(np.reshape(fi[-900:], [30,30]), 0))

# 2. Model with Kernels

Reset kernel matrices and size of train and test sets.

In [None]:
train_ratio, test_ratio = 1, 1
print("Num train points = " + str(len(np.arange(0,train_num_pts,train_ratio))))
print("Num test  points = " + str(len(np.arange(0,test_num_pts,test_ratio))))

Read labels.

In [None]:
task_name, task_type = tasks[0]

train_full_labels = train_feat[task_name]
if use_fraction_of_train_for_test == True:
    train_labels = train_full_labels[train_sub][::train_ratio]
    test_labels =  train_full_labels[test_sub][::test_ratio]
else:
    train_labels = train_full_labels[::train_ratio]
    
if task_type == "classification":
    train_labels = np.array(LabelEncoder().fit_transform(train_labels))
    if use_fraction_of_train_for_test == True:            
        test_labels = np.array(LabelEncoder().fit_transform(test_labels))
    
print("Task: " + task_name + " " + task_type + "\n")

In [None]:
train_M, test_M = [], []

Add kernel on features.

In [None]:
if train_F.shape[1] > 0:
    if use_fraction_of_train_for_test == True:
        train_M.append(pairwise_kernels(X = train_F[train_sub,:][::train_ratio], metric = "rbf")[np.newaxis,:])
        test_M.append(pairwise_kernels(X = train_F[test_sub,:][::test_ratio], 
                                       Y = train_F[train_sub,:][::train_ratio], metric = "rbf")[np.newaxis,:])
    else:
        train_M.append(pairwise_kernels(X = train_F[::train_ratio], metric = "rbf")[np.newaxis,:])
        test_M.append(pairwise_kernels(X = test_F[::test_ratio], 
                                       Y = train_F[::train_ratio], metric = "rbf")[np.newaxis,:])

Computation of kernels.

In [None]:
kernelize_diagram = Pipeline([("Separator",  tda.DiagramSelector(limit=np.inf, point_type="finite")),
                              ("Kernelizer", tda.SlicedWasserstein())])

param_kernel  = {"Kernelizer":                      tda.SlicedWasserstein(), 
                 "Kernelizer__num_directions":      10,
                 "Kernelizer__bandwidth":           10.0,
                }
                    
kernelize_diagram = kernelize_diagram.set_params(**param_kernel)

In [None]:
diagram_types  = ["1_alpha"]

for dt in diagram_types:
    
    train_full_K_i = kernelize_diagram.fit_transform(train_diag[dt])
    np.save(dt, train_full_K_i)
    
    if use_fraction_of_train_for_test == True:
        train_K_i = train_full_K_i[train_sub,:][:,train_sub][::train_ratio,:][:,::train_ratio]
        test_K_i  = train_full_K_i[test_sub,:][:,train_sub][::test_ratio,:][:,::train_ratio]
    else:
        train_K_i = train_full_K_i[::train_ratio,:][:,::train_ratio]
        test_K_i  = kernelize_diagram.transform(test_diag[dt])[::test_ratio,:][:,::train_ratio] 
    
    train_M.append(train_K_i[np.newaxis,:])
    test_M.append(test_K_i[np.newaxis,:])

Reading of kernels.

In [None]:
paths = {"train": ["../../../../Documents/datasets/bridge/s3/train-1_alpha-SW-10-20.npy"], 
         "test":  []}

for i in range(len(paths["train"])):
    train_full_K_i = np.load(paths["train"][i])
    
    if use_fraction_of_train_for_test == True:
        train_K_i = train_full_K_i[train_sub,:][:,train_sub][::train_ratio,:][:,::train_ratio]
        test_K_i  = train_full_K_i[test_sub,:][:,train_sub][::test_ratio,:][:,::train_ratio]
    else:
        train_K_i = train_full_K_i[::train_ratio,:][:,::train_ratio]
        test_K_i  = np.load(paths["test"][i])[::test_ratio,:][:,::train_ratio] 
    
    train_M.append(train_K_i[np.newaxis,:])
    test_M.append(test_K_i[np.newaxis,:])

Concatenate.

In [None]:
train_M, test_M = np.concatenate(train_M, 0), np.concatenate(test_M, 0)

Learning and applying best kernel combination.

In [None]:
mkl = RMGD()

if train_M.shape[0] == 1:
    train_K, test_K = train_M[0,:,:], test_M[0,:,:]
else:
    if task_type == "regression":
        hist, bin_edges    = np.histogram(train_labels)
        train_targets_mkl  = np.digitize(train_labels, bin_edges)
    if task_type == "classification":
        train_targets_mkl  = train_labels
    train_K = mkl.arrange_kernel(train_M, train_targets_mkl)
    test_K  = np.average(test_M, axis=0, weights=mkl.weights)
    print(mkl.weights)

Train and test.

In [None]:
# Predictor (sklearn)
if task_type == "classification":
    list_of_kernel_models = [("SVM", SVC(kernel = "precomputed"))]
if task_type == "regression":
    list_of_kernel_models = [("SVM", SVR(kernel = "precomputed")), ("KR", KernelRidge(kernel = "precomputed"))]

#if task_type == "classification" or task_type == "metric":
#    mod_list, param_mod = {"SVM": SVC()}, {"SVM": {"kernel": ["precomputed"]}}
#if task_type == "regression":
#    mod_list   = {"SVM": SVR(), "Ridge": KernelRidge()}
#    param_mod  = {"SVM": {"kernel": ["precomputed"]}, "Ridge": {"kernel": ["precomputed"]}}
#model = PredictorSelector(mod_list, param_mod)

for model_name, model in list_of_kernel_models:

    # Learning predictor
    fn = model.fit(train_K, train_labels)
    list_model.append(fn)

    # Prediction
    train_pred, test_pred = fn.predict(train_K), fn.predict(test_K)
    list_train_pred.append(train_pred[:,np.newaxis])
    list_test_pred.append(test_pred[:,np.newaxis])

    # Save predictions
    if use_fraction_of_train_for_test == False:
        np.savetxt(task_name + "-" + model_name, test_pred)
        
    # Evaluation on train set
    print("Train score of " + model_name + " = " + str(fn.score(train_K, train_labels)))
    if task_type == "regression":
        print("Train MSE of " + model_name + " = " + str(mean_squared_error(train_pred, train_labels)))
    
    # Evaluation on test set
    if use_fraction_of_train_for_test == True:
            
        print("Test score of " + model_name + " = " + str(fn.score(test_K, test_labels)))
        if task_type == "regression":
            print("Test MSE of " + model_name + " = " + str(mean_squared_error(test_pred, test_labels)))
            plot_regression_result(test_labels, test_pred)
        if task_type == "classification" or task_type == "metric":
            plot_confusion_matrix(confusion_matrix(test_labels, test_pred))
            
    print("\n\n")

# 3. Neural Net Model

Reset network.

In [None]:
tf.reset_default_graph()
tensorboard = False
feed_train, feed_test, feed_epoch = dict(), dict(), dict()

Add labels.

In [None]:
task_name, task_type = tasks[0]

ohe, le = OneHotEncoder(sparse = False), LabelEncoder()

train_full_labels = train_feat[task_name]
if use_fraction_of_train_for_test == True:
    train_labels = train_full_labels[train_sub]
    test_labels =  train_full_labels[test_sub]
else:
    train_labels = train_full_labels
    
if task_type == "classification":
    train_labels = ohe.fit_transform(np.reshape(le.fit_transform(train_labels), [-1,1]))
    if use_fraction_of_train_for_test == True:            
        test_labels = ohe.transform(np.reshape(le.transform(test_labels), [-1,1]))
if task_type == "regression":
    train_labels = np.reshape(train_labels, [-1,1])
    if use_fraction_of_train_for_test == True:            
        test_labels = np.reshape(test_labels, [-1,1])

num_labels = train_labels.shape[1]
with tf.device("/cpu:0"):
    label = tf.placeholder(tf.float32, shape = [None, num_labels], name = "labels")
    
feed_train[label] = train_labels
if use_fraction_of_train_for_test == True:
    feed_test[label] = test_labels
    
print("Task: " + task_name + " " + task_type + ", " + str(num_labels) + " label(s)\n")

### 3.1 Input Diagrams

Necessary preprocessing for tensorflow
1. preprocess diagrams,
2. pad diagrams with nans so that dimensions agree, 
3. compute masks i.e. locations of nans, 
4. replace nans with zeros,
5. compute integer version of masks.

In [None]:
diagram_types_fin, diagram_types_ess  = ["0_degree"], []
Nfin, Ness = 1900, 1162

preprocess_fin = Pipeline([("Separator",     tda.DiagramSelector(limit = np.inf, point_type = "finite")),
                           ("ProminentPts",  tda.ProminentPoints(use = True, num_pts = Nfin)),
                           ("Preprocessor",  tda.DiagramPreprocessor(use = True, scaler = MinMaxScaler()))
                          ])
preprocess_ess = Pipeline([("Separator",     tda.DiagramSelector(limit = np.inf, point_type = "essential")),
                           #("Preprocessor",  tda.DiagramPreprocessor(use = True, scaler = MinMaxScaler()))
                          ])

num_filt_fin, num_filt_ess = len(diagram_types_fin), len(diagram_types_ess)
num_diag_train, num_diag_test, num_diag_full = train_labels.shape[0], test_labels.shape[0], train_full_labels.shape[0]

train_D_fin = np.zeros([num_diag_train, num_filt_fin, Nfin, 2]) 
train_m_fin = np.zeros([num_diag_train, num_filt_fin, Nfin])
test_D_fin  = np.zeros([num_diag_test,  num_filt_fin, Nfin, 2]) 
test_m_fin  = np.zeros([num_diag_test,  num_filt_fin, Nfin]) 

idx_filt_fin = 0
for dt in diagram_types_fin:
    
    train_full_D = preprocess_fin.fit_transform(train_diag[dt])
    train_full_pad_D, train_full_mask = np.zeros([num_diag_full, Nfin, 2]), np.zeros([num_diag_full, Nfin])
    for i in range(num_diag_full):
        diag = train_full_D[i]
        train_full_pad_D[i,:diag.shape[0],:] = diag
        train_full_mask[i,:diag.shape[0]] = np.ones([diag.shape[0]])
    
    if use_fraction_of_train_for_test == True:
        train_pad_D, train_mask = train_full_pad_D[train_sub,:], train_full_mask[train_sub,:]
        test_pad_D, test_mask = train_full_pad_D[test_sub,:], train_full_mask[test_sub,:]
    else:
        train_pad_D, train_mask = train_full_pad_D, train_full_mask
        test_D = preprocess_fin.fit_transform(test_diag[dt])
        test_pad_D, test_mask = np.zeros([num_diag_test, Nfin, 2]), np.zeros([num_diag_test, Nfin])
        for i in range(num_diag_test):
            diag = test_D[i]
            test_pad_D[i,:min(Nfin, diag.shape[0]),:] = diag
            test_mask[i,:diag.shape[0]] = np.ones([diag.shape[0]])
    
    train_D_fin[:,idx_filt_fin,:,:], test_D_fin[:,idx_filt_fin,:,:] = train_pad_D, test_pad_D
    train_m_fin[:,idx_filt_fin,:],   test_m_fin[:,idx_filt_fin,:]   = train_mask,  test_mask
    idx_filt_fin += 1

train_D_ess = np.zeros([num_diag_train, num_filt_ess, Ness, 1]) 
train_m_ess = np.zeros([num_diag_train, num_filt_ess, Ness])
test_D_ess  = np.zeros([num_diag_test,  num_filt_ess, Ness, 1]) 
test_m_ess  = np.zeros([num_diag_test,  num_filt_ess, Ness]) 

idx_filt_ess = 0
for dt in diagram_types_ess:
    
    train_full_D = preprocess_ess.fit_transform(train_diag[dt])
    train_full_pad_D, train_full_mask = np.zeros([num_diag_full, Ness, 1]), np.zeros([num_diag_full, Ness])
    for i in range(num_diag_full):
        diag = train_full_D[i]
        train_full_pad_D[i,:diag.shape[0],:] = diag
        train_full_mask[i,:diag.shape[0]] = np.ones([diag.shape[0]])
    
    if use_fraction_of_train_for_test == True:
        train_pad_D, train_mask = train_full_pad_D[train_sub,:], train_full_mask[train_sub,:]
        test_pad_D, test_mask = train_full_pad_D[test_sub,:], train_full_mask[test_sub,:]
    else:
        train_pad_D, train_mask = train_full_pad_D, train_full_mask
        test_D = preprocess_ess.fit_transform(test_diag[dt])
        test_pad_D, test_mask = np.zeros([num_diag_test, Ness, 1]), np.zeros([num_diag_test, Ness])
        for i in range(num_diag_test):
            diag = test_D[i]
            test_pad_D[i,:min(Ness, diag.shape[0]),:] = diag
            test_mask[i,:diag.shape[0]] = np.ones([diag.shape[0]])
    
    train_D_ess[:,idx_filt_ess,:,:], test_D_ess[:,idx_filt_ess,:,:] = train_pad_D, test_pad_D
    train_m_ess[:,idx_filt_ess,:],   test_m_ess[:,idx_filt_ess,:]   = train_mask,  test_mask
    idx_filt_ess += 1

### 3.2 Network

Definition of permutation invariant and equivariant layers. A deep set network is then defined as a combination of such layers.

In [None]:
def permutation_invariant_layer(inp, dimension):
    dimension_before, num_pts = inp.shape[2].value, inp.shape[1].value
    with tf.device("/cpu:0"):
        #w = tf.get_variable("w",shape=[dimension_before,dimension],initializer=tf.random_uniform_initializer(-1.0,1.0))
        #b = tf.get_variable("b",shape=[1,dimension],initializer=tf.random_uniform_initializer(-1.0,1.0))
        u, v = 0.5, 0.5
        theta = tf.get_variable("t",shape=[1,dimension],initializer=tf.random_uniform_initializer(-np.pi/2,np.pi/2))
        cosines, sines = tf.cos(theta), tf.sin(theta)
        w, b = tf.concat([cosines,sines],0),tf.get_variable("b",initializer=tf.cast(-u*cosines-v*sines,dtype=tf.float32))
    with tf.device("/GPU:0"):
        return tf.nn.tanh(tf.reshape(tf.einsum("ijk,kl->ijl", inp, w) + b, [-1, num_pts, dimension]))
        
def permutation_equivariant_layer(inp, dimension):
    dimension_before, num_pts = inp.shape[2].value, inp.shape[1].value
    with tf.device("/cpu:0"):
        w = tf.get_variable("w",shape=[dimension_before,dimension],initializer=tf.random_uniform_initializer(-1.0,1.0))
        b = tf.get_variable("b",shape=[1,dimension],initializer=tf.random_uniform_initializer(-1.0,1.0))
    with tf.device("/GPU:0"):
        inp_max = tf.tile(tf.reshape(tf.reduce_max(inp, 1), [-1, 1, dimension_before]), [1, num_pts, 1])
        l = tf.nn.tanh(tf.einsum("ijk,kl->ijl", inp-inp_max, w) + b)
        return tf.layers.batch_normalization(tf.reshape(l,[-1, num_pts, dimension]))
    
def deep_set_network(ls_dgm, name, diag, mask, weight_mat, perm_inv_layers, perm_eq_layers, keep):
    num_diag_t, N = diag.shape[1].value, diag.shape[2].value
    for i in range(num_diag_t):
        
        diag_i, mask_i = diag[:,i,:,:], mask[:,i,:]
        weight_i = tf.reshape(tf.einsum("ijk,kl->ijl", diag_i, weight_mat), [-1,N])
        
        for j in range(len(perm_inv_layers)):
            with tf.variable_scope(name + str(i) + "-perm_inv-" + str(j)):
                diag_i = permutation_invariant_layer(diag_i, perm_inv_layers[i])
                
        for j in range(len(perm_eq_layers)):
            with tf.variable_scope(name + str(i) + "-perm_eq-" + str(j)):
                diag_i = permutation_equivariant_layer(diag_i, perm_eq_layers[i])
    
        final_dim                = diag_i.shape[2].value    
        tiled_weight_i           = tf.tile(tf.reshape(weight_i, [-1,N,1]), [1,1,final_dim])
        tiled_mask_i             = tf.tile(tf.reshape(mask_i,   [-1,N,1]), [1,1,final_dim])
        weighted_masked_layer    = tf.multiply(tf.multiply(diag_i, tiled_mask_i), tiled_weight_i)
        #weighted_masked_layer    = tf.multiply(diag_i, tiled_mask_i)
        weighted_masked_layer_t  = tf.transpose(weighted_masked_layer, perm = [0,2,1])
        values, _                = tf.nn.top_k(weighted_masked_layer_t, k = keep)  
        
        ls_dgm.append(tf.reshape(values, [-1, keep*final_dim]))    

Architecture is multi-channel deep set network followed by a fully connected layer.

In [None]:
with tf.device("/cpu:0"):
    diag_fin = tf.placeholder(tf.float32, shape = [None, len(diagram_types_fin), Nfin, 2],  name = "finite_diagrams")
    mask_fin = tf.placeholder(tf.float32, shape = [None, len(diagram_types_fin), Nfin],     name = "finite_masks")
    diag_ess = tf.placeholder(tf.float32, shape = [None, len(diagram_types_ess), Ness, 1],  name = "essential_diagrams")
    mask_ess = tf.placeholder(tf.float32, shape = [None, len(diagram_types_ess), Ness],     name = "essential_masks")
    
feed_test[diag_fin], feed_train[diag_fin] = test_D_fin, train_D_fin
feed_test[mask_fin], feed_train[mask_fin] = test_m_fin, train_m_fin
feed_test[diag_ess], feed_train[diag_ess] = test_D_ess, train_D_ess
feed_test[mask_ess], feed_train[mask_ess] = test_m_ess, train_m_ess

list_diagrams = []
deep_set_network(list_diagrams, "fin", diag_fin, mask_fin, tf.constant([[-1.0],[1.0]]), [50], [], 99)
deep_set_network(list_diagrams, "ess", diag_ess, mask_ess, tf.constant([[1.0]]),        [10], [20], 10)

# Concatenate all channels
vector = tf.concat(list_diagrams, 1)

# Fully connected
vector = tf.nn.dropout(vector, 0.9)
vector = tf.layers.batch_normalization(vector)
fn     = tf.layers.dense(vector, num_labels)

### 3.3 Losses and optimization.

In [None]:
if task_type == "classification":
    loss     = tf.reduce_mean(  tf.nn.softmax_cross_entropy_with_logits_v2(labels = label, logits = fn)  )
    accuracy = tf.reduce_mean(  tf.cast(tf.equal(tf.argmax(fn,1), tf.argmax(label,1)), dtype = tf.float32)  )
    if tensorboard == True:
        tf.summary.scalar("accuracy", accuracy)
    
if task_type == "regression":
    loss = tf.losses.mean_squared_error(fn, label)

if tensorboard == True:
    tf.summary.scalar("loss", loss)

In [None]:
opt = tf.train.GradientDescentOptimizer(learning_rate = 0.1)
train_step = opt.minimize(loss)

In [None]:
if tensorboard == True:
    merged = tf.summary.merge_all()

print("Variables = " + str(tf.trainable_variables()))
print("\nNumber of variables = " + str(np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])))

### 3.4 Learning best predictor and evaluation on train and test sets.

In [None]:
nb_epoch    = 10000
print_every = 100
batch_size  = 128
plot_lines  = False

with tf.Session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)) as sess:
    
    if tensorboard == True:
        train_writer = tf.summary.FileWriter('./train', sess.graph)
        test_writer = tf.summary.FileWriter('./test')
    
    # Initialize parameters
    sess.run(tf.global_variables_initializer())
            
    # Score to print
    if task_type == "classification":
        score = accuracy
    if task_type == "regression":
        score = loss
    
    # Learning best predictor
    for ep in range(nb_epoch):
            
        data_epoch = np.random.choice(train_num_pts, batch_size, replace = False)
        for k in feed_train.keys():
            feed_epoch[k] = feed_train[k][data_epoch]

        train_step.run(feed_dict = feed_epoch)
            
        if ep % print_every == 0: 
            if tensorboard == True:
                train_summary, train_score = sess.run([merged, score], feed_dict = feed_train)
                test_summary, test_score = sess.run([merged, score], feed_dict = feed_test)
                train_writer.add_summary(train_summary, ep)
                test_writer.add_summary(test_summary, ep)
            else:
                train_score = sess.run(score, feed_dict = feed_train)
                test_score  = sess.run(score, feed_dict = feed_test)
                
            print("\nTrain score on epoch " + str(ep) + " = " + str(train_score))
            if use_fraction_of_train_for_test == True:
                print("Test score on epoch " + str(ep) + "  = " + str(test_score))
            
            if plot_lines == True:
                idx_d, idx_f, num_lines = [0,50,70], 0, 2
                b = sess.run(tf.get_collection(tf.GraphKeys.VARIABLES, "fin"+str(idx_f)+"-perm_inv-0/b")[0])
                t = sess.run(tf.get_collection(tf.GraphKeys.VARIABLES, "fin"+str(idx_f)+"-perm_inv-0/t")[0])
                #w = sess.run(tf.get_collection(tf.GraphKeys.VARIABLES, "fin"+str(idx_f)+"-perm_inv-0/w")[0])
                w = np.concatenate([np.cos(t),np.sin(t)],0)
                [x1, x2, y1, y2] = [0,1,0,1]
                plt.figure()
                for idx in idx_d:
                    xs, ys = train_D_fin[idx,idx_f,:,0], train_D_fin[idx,idx_f,:,1]
                    plt.scatter(xs, ys)
                for i in range(num_lines):
                    plt.plot([x1,x2],[(-b[0,i]-w[0,i]*x1)/(w[1,i]), (-b[0,i]-w[0,i]*x2)/(w[1,i])])
                plt.axis([x1, x2, y1, y2])
                plt.show()    
            
    # Prediction
    train_pred, test_pred = sess.run(fn, feed_dict = feed_train), sess.run(fn, feed_dict = feed_test)
    if use_fraction_of_train_for_test == False:
        np.savetxt(task, test_pred)
          
    # Evaluation on train set
    if use_fraction_of_train_for_test == False:
        print("\nTrain score = " + str(sess.run(score, feed_dict = feed_train)))
        if task_type == "regression":
            plot_regression_result(train_labels, train_pred)
        if task_type == "classification":
            plot_confusion_matrix(confusion_matrix(np.argmax(train_labels,1), np.argmax(train_pred,1)))
        
    # Evaluation on test set
    else:
        print("Test score  = " + str(sess.run(score, feed_dict = feed_test)))
        if task_type == "regression":
            plot_regression_result(test_labels, test_pred)
        if task_type == "classification":
            plot_confusion_matrix(confusion_matrix(np.argmax(test_labels,1), np.argmax(test_pred,1)))
            
    sess.close()

# 4. Aggregating models

In [None]:
train_agg, test_agg = np.concatenate(list_train_pred,1), np.concatenate(list_test_pred,1)

In [None]:
# Predictor (sklearn, hyperopt)       
if task_type == "classification" or task_type == "metric":
    model = RandomForestClassifier()
if task_type == "regression":
    model = LinearRegression()
        
# Learning best predictor
model.fit(train_agg, train_labels)

# Prediction
train_pred, test_pred = model.predict(train_agg), model.predict(test_agg)

# Save predictions
if use_fraction_of_train_for_test == False:
    np.savetxt(task_name, test_pred)
    
# Evaluation on train set
print("Train score = " + str(model.score(train_agg, train_labels)))
if task_type == "regression":
    print("Train MSE = " + str(mean_squared_error(train_pred, train_labels)))
        
# Evaluation on test set
if use_fraction_of_train_for_test == True:
            
    print("Test score  = " + str(model.score(test_agg, test_labels)))
            
    if task_type == "regression":
        print("Test MSE = " + str(mean_squared_error(test_pred, test_labels)))
        plot_regression_result(test_labels, test_pred)
                
    if task_type == "classification":
        plot_confusion_matrix(confusion_matrix(test_labels, test_pred))
        
print("\n\n")