# Coreset for Neural Networks

## Import Libraries

In [71]:
import numpy as np
import pandas as pd
import math
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.metrics import log_loss
from sklearn.preprocessing import OneHotEncoder

## Data PreProcessing

### Data Extraction

In [72]:
data_init = pd.read_csv("bank-full.csv")

In [73]:
arr_init = data_init.to_numpy()

In [74]:
arr= []
for i in range(arr_init.shape[0]):
  cur = arr_init[i][0]
  arr.append(cur.split(';'))

In [75]:
for i in range(len(arr)):
  for j in range(len(arr[0])):
    try:
      arr[i][j] = int(arr[i][j])
    except:
      arr[i][j] = str(arr[i][j])
      arr[i][j] = arr[i][j].strip('"')

In [76]:
arr = np.array(arr,dtype='object')

In [77]:
arr.shape

(45211, 17)

In [78]:
X =arr[:,:-1]
Y = arr[:,-1]

In [79]:
X[0]

array([58, 'management', 'married', 'tertiary', 'no', 2143, 'yes', 'no',
       'unknown', 5, 'may', 261, 1, -1, 0, 'unknown'], dtype=object)

In [80]:
Y = np.reshape(Y, (-1,1))

In [81]:
for i in range(Y.shape[0]):
  if(Y[i]=='no'):
    Y[i,0]=0
  else:
    Y[i,0] = 1

In [82]:
  Y

array([[0],
       [0],
       [0],
       ...,
       [1],
       [0],
       [0]], dtype=object)

### One-Hot Enocding

In [83]:
OneHotEncoder(
    categories='auto',  # Categories per feature
    drop=None, # Whether to drop one of the features
    sparse=True, # Will return sparse matrix if set True
    # dtype= <class 'numpy.float64'>, # Desired data type of the output
    handle_unknown='error' # Whether to raise an error
)
enc = OneHotEncoder()

In [84]:
for j in range(X.shape[1]):
  if(type(X[0][j]) != type(1)):
    X_j_new = enc.fit_transform(np.reshape(X[:,j],(-1,1)))
    X_j_new = X_j_new.toarray()
    X = np.concatenate((X,X_j_new),axis=1)


In [85]:
X_aug = X
p=0
for j in range(X_aug.shape[1]):
  if(type(X[0][j]) not in [type(1),type(1.0)] ):
    X_aug = np.delete(X_aug,j-p,1)
    p+=1
X = X_aug

In [86]:
X[0]

array([58, 2143, 5, 261, 1, -1, 0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0,
       0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0,
       0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
       0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], dtype=object)

## Function Definitions

In [87]:
def sigmoid(z):
    z = z[0]
    a =   np.exp(z)/(1+np.exp(z)) if z<0 else (1/(1+np.exp(-z)))
    return a
def sigmoid_der(z):
    return sigmoid(z)*(1-sigmoid(z))

In [88]:
def linear(z):
    return z
def linear_der(z):
    return 1

In [89]:
def tanh(z):
    return np.where( z<0, (np.exp(z) - 1/np.exp(z) ) / (np.exp(z) + 1/np.exp(z)) , ( 1/np.exp(-z) - np.exp(-z) ) / (np.exp(-z) + 1/np.exp(-z)) )
def tanh_der(z):
    return 1- tanh(z)**2

In [90]:
def relu(z):
    # print(z)
    return z if z>0 else 0.1*z
def relu_der(z):
    z = z[0]
    if(z==0):
        z = z + 1e-10
    return 1 if z>0 else 0.1

In [91]:
def a(W,b,g,a_l_1):
    z_l = np.matmul(W,a_l_1) + b
    a_l=np.ones((z_l.shape[0],z_l.shape[1]))
    for i in range(z_l.shape[0]):
        a_l[i] = g(z_l[i])
    return a_l,z_l

In [92]:
def compute(X,W,B,G,N):
    A=[]
    Z=[]
    a_l = z_l = X.T
    A.append(a_l)
    Z.append(z_l)
    for l in range(1,len(N)):
        a_l,z_l = a(W[l-1],B[l-1],G[l-1],a_l)
        # print(a_l)
        A.append(np.array(a_l))
        Z.append(np.array(z_l))
    return A,Z

In [93]:
def binary_entropy_loss(truth_labels, predicted_labels):
    epsilon = 1e-15
    predicted_labels = np.clip(predicted_labels, epsilon, 1 - epsilon)
    N = predicted_labels.shape[0]
    truth_labels = np.reshape(truth_labels,(-1,1))
    # print(truth_labels.shape,predicted_labels.shape)
    loss = -np.sum(truth_labels * np.log(predicted_labels +1e-15) + (1 - truth_labels) * np.log(1 - predicted_labels+1e-15)) / N
    return loss

In [94]:
def partial_derivative_binary_entropy_loss(truth_labels, predicted_labels):
    N = predicted_labels.shape[0]
    return (predicted_labels - 2*predicted_labels*truth_labels + truth_labels) / (predicted_labels * (1 - predicted_labels) * N +1e-10)

## Coreset Construction


In [162]:
def coresetConstr(X,Y,W_anc,B_anc,R,n,eps,delta,l,L):
    G = [relu,relu,sigmoid]
    G_der = [ relu_der,relu_der,sigmoid_der ]
    Y_predicted=[]
    M = - 1e+8

    for i in range(0,X.shape[0]):
        Der = 0
        # forward propogation
        A,Z = compute(np.array([X[i]]),W_anc,B_anc,G,n)
        Y_predicted.append(A[3][0][0])
        #backward propogation
        dc_dz_last = np.multiply(partial_derivative_binary_entropy_loss(Y[i],A[3]) ,G_der[2](Z[3]))
        der_3 = np.matmul(dc_dz_last,A[2].T)
        Der += np.sum(der_3**2)
        dc_dz_second= np.multiply(np.matmul(W_anc[2].T,dc_dz_last) , G_der[1](Z[2]))
        der_2 = np.matmul(dc_dz_second,A[1].T)
        Der+= np.sum(der_2**2)
        dc_dz_first = np.multiply(np.matmul(W_anc[1].T,dc_dz_second) , G_der[0](Z[1]))
        der_1 = np.matmul(dc_dz_first,A[0].T)
        Der+= np.sum(der_1**2)
        if(Der>M):
            M=Der

    Y = np.reshape( np.array(Y,dtype='float'), (Y.shape[0]) )
    Y_predicted = np.reshape( np.array(Y_predicted,dtype='float'), (np.array(Y_predicted,dtype='float').shape[0]) )

    H = log_loss(Y,Y_predicted)

    N = int(np.log(X.shape[0]))
    W = np.zeros((X.shape[0]))
    M = M**(1/len(G))
    P = []
    for i in range(N-1):
        P.append([])
    for i in range(X.shape[0]):
        loss =-1* (Y[i] * math.log2(Y_predicted[i] +1e-15) + (1 - Y[i]) * math.log2(1 - Y_predicted[i]+1e-15))
        rel_loss = loss/H
        if(rel_loss<=0):
          rel_loss = -rel_loss + 1e-10
        if(math.log2(rel_loss)>0):
          P[int(math.log2(rel_loss))].append(i)
        else:
          P[0].append(i)

    for i in range(len(P)):
        Q_i_s = (H * 2**(i-1) + M*R +L*R**2)**2*delta**(-2) * np.log(1/l)
        if(Q_i_s<len(P[i])):
            Q_i = np.random.choice(P[i],int(Q_i_s))
        else:
            Q_i = P[i]

        for i in Q_i:
            W[i] = int(Q_i_s)/len(P)

    return W


In [163]:
N = [X.shape[1],5,5,1]
G = [relu,relu,sigmoid]
W = []
B = []
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)
for l in range(1,len(N)):
    W_l = np.random.rand(N[l],N[l-1])/10
    B_l = np.zeros((N[l],1))
    # print(W_l.shape,B_l.shape)
    W.append(W_l)
    B.append(B_l)
Weights = coresetConstr(X_train,Y_train,W,B,1,N,0.5,1,0.8,1)

In [164]:
size = 0
for i in range(X_train.shape[0]):
    if(Weights[i]!=0):
        size+=1

In [165]:
size

1899

In [166]:
X_train = np.asarray(X_train).astype('float32')
Y_train = np.asarray(Y_train).astype('float32')
Weights = np.asarray(Weights).astype('float32')
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train,Weights))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

In [167]:
train_dataset

<_BatchDataset element_spec=(TensorSpec(shape=(None, 51), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [168]:
tf.random.set_seed(1234)  # applied to achieve consistent results
model = Sequential(
    [
        Dense(10, activation = 'relu',   name = "L1"),
        Dense(5, activation = 'relu', name = "L2"),
        Dense(1,activation = 'sigmoid', name='L3')
    ]
)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(0.001),
)

In [187]:
model.fit(X_train,Y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7e4961f621d0>

In [188]:
model.get_weights()[1].shape

(10,)

In [189]:
X_test = np.asarray(X_test).astype('float32')
Y_test = np.asarray(Y_test).astype('float32')
Y_predicted = model.predict(X_test)



In [190]:
Y_predicted

array([[0.27522287],
       [0.09540436],
       [0.00782357],
       ...,
       [0.03653059],
       [0.01503961],
       [0.0034468 ]], dtype=float32)

In [191]:
Y_test = Y_test.reshape(-1,1)

In [192]:
log_loss(Y_test,Y_predicted)

0.2664012062153163

In [175]:
for i in range(Y_predicted.shape[0]):
  if(Y_predicted[i,0]>=0.5):
    Y_predicted[i,0]=1
  else:
    Y_predicted[i,0]=0

In [176]:
np.sum(np.where((Y_test[:,0] == Y_predicted[:,0]),1,0))/Y_test.shape[0]

0.8737144752847507

## Sequential Coresets

In [177]:
def difference(W,B,Weights):
  diff =0
  n = 0
  for i in range(len(W)):
    diff+= np.sum((W[i].T - Weights[2*i])**2)
    diff+= np.sum((np.reshape(B[i],(-1,)) - Weights[2*i+1])**2)
    n+= W[i].shape[0]*W[i].shape[1] + B[i].shape[0]
  return diff**(1/2)


In [199]:
def seqCoreSets(X,Y,R,eps,delta,l,L):
  N = [X.shape[1],5,5,1]
  G = [relu,relu,sigmoid]
  W = []
  B = []
  Weight_layers = []
  for l in range(1,len(N)):
    W_l = np.random.rand(N[l],N[l-1])/1000
    B_l = np.zeros((N[l],1))
    W_l_i = [W_l.T,B_l[:,0]]
    # print(W_l.shape,B_l.shape)
    W.append(W_l)
    B.append(B_l)
    Weight_layers.append(W_l_i)
  W_cur = W
  B_cur = B
  X = np.asarray(X).astype('float32')
  Y = np.asarray(Y).astype('float32')
  tf.random.set_seed(1234)  # applied to achieve consistent results
  model = Sequential(
      [
          Dense(5, activation = 'relu', weights = Weight_layers[0], name = "L1"),
          Dense(5, activation = 'relu',  weights = Weight_layers[1],name = "L2"),
          Dense(1,activation = 'sigmoid', weights = Weight_layers[2], name='L3')
      ]
  )
  model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(),
      optimizer=tf.keras.optimizers.Adam(0.0005),
  )
  model.build(input_shape = X.shape)
  model.set_weights([Weight_layers[0][0],Weight_layers[0][1],Weight_layers[1][0],Weight_layers[1][1],Weight_layers[2][0],Weight_layers[2][1]])
  Weights_cur = model.get_weights()
  H= 100
  first=True
  s_id = 0
  while(H>0.3 and s_id<10):
    s_id+=1
    print("set id:",s_id)

    Weights = coresetConstr(X,Y,W,B,1,N,0.5,1,0.8,100)

    size = 0

    for i in range(X.shape[0]):
        if(Weights[i]!=0):
            size+=1
    print("size:",size)

    Weights = np.asarray(Weights).astype('float32')

    train_dataset = tf.data.Dataset.from_tensor_slices((X,Y,Weights))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

    diff=R-1
    epochs = 0
    while(diff<R and H>0.3 and epochs<10):
      model.build()
      model.set_weights(Weights_cur)

      epochs+=1
      print("epoch:",epochs)

      history = model.fit(train_dataset,epochs=1)

      Weights_cur = model.get_weights()
      diff = difference(W,B,Weights_cur)
      print("diff:",diff)

    Y_predicted = model.predict(X)
    Y_predicted = np.reshape(Y_predicted,(Y_predicted.shape[0]))
    Y = np.reshape(Y,(Y.shape[0]))
    H = log_loss(Y,Y_predicted)
    Weights_cur = model.get_weights()
    for i in range(len(W)):

      W[i] = Weights_cur[2*i].T
      B[i] = np.reshape(Weights_cur[2*i+1],(-1,1))

    print("H:",H)

  return Weights

In [200]:
Weights= seqCoreSets(X_train,Y_train,0.5,0.5,1,0.8,10)

set id: 1
size: 2192
epoch: 1
diff: 0.23301066636794227
epoch: 2
diff: 0.4419070421986361
epoch: 3
diff: 0.6353816308265584
H: 0.4986983898723994
set id: 2
size: 3955
epoch: 1
diff: 0.271633093737475
epoch: 2
diff: 0.4867529794281466
epoch: 3
diff: 0.6699559728547362
H: 0.3908584030406151
set id: 3
size: 3981
epoch: 1
diff: 0.15973830350888246
epoch: 2
diff: 0.29361498448848417
epoch: 3
diff: 0.4080082186217088
epoch: 4
diff: 0.4954625416886715
epoch: 5
diff: 0.5639307581521705
H: 0.3595878331313354
set id: 4
size: 3955
epoch: 1
diff: 0.03570496974348725
epoch: 2
diff: 0.065017222125416
epoch: 3
diff: 0.08846247394551142
epoch: 4
diff: 0.10270202040212499
epoch: 5
diff: 0.10390722679453467
epoch: 6
diff: 0.11433565715785401
epoch: 7
diff: 0.11883580580522185
epoch: 8
diff: 0.11375415272389794
epoch: 9
diff: 0.11063063198370839
epoch: 10
diff: 0.11051010878500832
H: 0.35827945902535147
set id: 5
size: 3970
epoch: 1
diff: 0.0003827810287475586
epoch: 2
diff: 0.0036724805502997256
epoch: 

In [201]:
Weights

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [202]:
size = 0
for i in range(X_train.shape[0]):
    if(Weights[i]!=0):
        print(Weights[i],end=' ')
        size+=1

253.0 259.33334 259.33334 253.0 253.0 253.0 253.0 259.33334 253.0 259.33334 259.33334 253.0 253.0 253.0 253.0 253.0 253.0 259.33334 259.33334 259.33334 259.33334 259.33334 253.0 253.0 253.0 259.33334 253.0 253.0 259.33334 253.0 253.0 253.0 259.33334 253.0 253.0 253.0 253.0 259.33334 253.0 259.33334 259.33334 253.0 253.0 253.0 253.0 253.0 253.0 253.0 259.33334 253.0 253.0 253.0 253.0 253.0 253.0 259.33334 259.33334 253.0 253.0 253.0 259.33334 253.0 253.0 259.33334 259.33334 253.0 259.33334 259.33334 259.33334 259.33334 259.33334 253.0 253.0 253.0 259.33334 253.0 259.33334 253.0 259.33334 259.33334 259.33334 253.0 259.33334 259.33334 253.0 253.0 259.33334 253.0 253.0 253.0 259.33334 253.0 259.33334 253.0 259.33334 253.0 253.0 253.0 259.33334 259.33334 253.0 259.33334 259.33334 259.33334 259.33334 259.33334 259.33334 253.0 259.33334 259.33334 259.33334 253.0 259.33334 259.33334 259.33334 253.0 259.33334 259.33334 253.0 259.33334 253.0 259.33334 259.33334 259.33334 253.0 259.33334 253.0 25

In [203]:
size

3983

In [204]:
X_train.shape[0]

36168

### Model Output

In [205]:
X_train = np.asarray(X_train).astype('float32')
Y_train = np.asarray(Y_train).astype('float32')
Weights = np.asarray(Weights).astype('float32')
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train,Weights))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

In [206]:
train_dataset

<_BatchDataset element_spec=(TensorSpec(shape=(None, 51), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [207]:
tf.random.set_seed(1234)  # applied to achieve consistent results
model = Sequential(
    [
        Dense(10, activation = 'relu',   name = "L1"),
        Dense(5, activation = 'relu', name = "L2"),
        Dense(1,activation = 'sigmoid', name='L3')
    ]
)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(0.001),
)

In [208]:
model.fit(X_train,Y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7e494b2e0c10>

In [212]:
model.get_weights()[1]

array([ 0.53797835, -0.10412534, -0.06277367, -0.04400063,  0.32237226,
       -0.26154304, -0.2775164 ,  0.37491566,  0.25920147, -0.09035591],
      dtype=float32)

In [213]:
X_test = np.asarray(X_test).astype('float32')
Y_test = np.asarray(Y_test).astype('float32')
Y_predicted = model.predict(X_test)



In [214]:
Y_predicted

array([[0.3036783 ],
       [0.03031088],
       [0.00664023],
       ...,
       [0.00657426],
       [0.00840195],
       [0.00403276]], dtype=float32)

In [215]:
Y_test = Y_test.reshape(-1,1)

In [216]:
for i in range(Y_predicted.shape[0]):
  if(Y_predicted[i,0]>=0.5):
    Y_predicted[i,0]=1
  else:
    Y_predicted[i,0]=0

In [217]:
np.sum(np.where((Y_test[:,0] == Y_predicted[:,0]),1,0))/Y_test.shape[0]

0.8886431493973239