# Coreset for Neural Networks

In [None]:
import numpy as np
import pandas as pd
import math
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import BinaryCrossentropy

In [None]:
with open("CNN.txt", mode="r") as fp:
    svmformat_list = fp.read().splitlines()

# For each line we save the key:values to a dict
pandas_label_list = []
pandas_feature_list = []
for line in svmformat_list:
    feature_dict = {}

    items = line.split()
    pandas_label_list.append({'label': items[0]})

    for pair in items[1:]:
        feature_name, count = pair.split(':')
        feature_dict[feature_name] = float(count)

    pandas_feature_list.append(feature_dict)

In [None]:
data = pd.DataFrame.from_dict(pandas_feature_list)

In [None]:
data

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,128,203,694,779,88,324,689,886,141,487
0,29.0,3.821209,1.567568,13.547628,7.242389,0.019883,0.012195,0.067241,0.049107,3406.866211,...,,,,,,,,,,
1,25.0,3.052969,1.641484,22.334589,15.734018,0.023027,0.010731,0.077000,0.045884,3324.158203,...,,,,,,,,,,
2,82.0,1.601274,1.508805,5.860583,3.301121,0.025948,0.006956,0.082317,0.044845,3771.984131,...,,,,,,,,,,
3,25.0,4.819368,2.879584,41.382828,24.448074,0.014387,0.007596,0.069875,0.046916,3301.686035,...,,,,,,,,,,
4,29.0,2.768753,1.797319,13.338054,9.980667,0.011506,0.007269,0.100647,0.067401,3266.021484,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22540,30.0,2.808389,1.475269,9.408182,3.851741,0.011282,0.010490,0.116250,0.075856,3421.109131,...,,,,,,,,,,
22541,32.0,4.830225,1.974286,7.508844,3.180844,0.015507,0.014104,0.127734,0.071251,3479.608887,...,,,,,,,,,,
22542,32.0,5.308545,2.534434,10.520031,4.518093,0.017690,0.013312,0.104492,0.063117,3484.652588,...,,,,,,,,,,
22543,42.0,0.426690,0.192054,2.405270,1.100260,0.013657,0.008062,0.106696,0.046419,3616.494629,...,,,,,,,,,,


In [None]:
arr = data.to_numpy()
X=arr
X_complete = []
col_not_comp = []
# arr[0,128]!=arr[0,128]
for j in range(X.shape[1]):
    # for i in range(arr.shape[0]):
    #     if(arr[i,j]!=arr[i,j]):
    #         sum[j]+=1
    if(np.all(X[:,j]==X[:,j])):
        X_complete.append(arr[:,j])
    else:
        col_not_comp.append(j)
X_complete = np.array(X_complete).T

In [None]:
for j in col_not_comp:
    a = np.nanmean(X[:,j])
    for i in range(X.shape[0]):
      if(X[i,j]!=X[i,j]):
        X[i,j] = a

In [None]:
Y = np.zeros((len(pandas_label_list),1))
for i in range(len(pandas_label_list)):
    Y[i,0] = pandas_label_list[i]['label']

In [None]:
Y.shape

(22545, 1)

In [None]:
def sigmoid(z):
    z = z[0]
    a =   np.exp(z)/(1+np.exp(z)) if z<0 else (1/(1+np.exp(-z)))
    return a
def sigmoid_der(z):
    return sigmoid(z)*(1-sigmoid(z))

In [None]:
def linear(z):
    return z
def linear_der(z):
    return 1

In [None]:
def tanh(z):
    return np.where( z<0, (np.exp(z) - 1/np.exp(z) ) / (np.exp(z) + 1/np.exp(z)) , ( 1/np.exp(-z) - np.exp(-z) ) / (np.exp(-z) + 1/np.exp(-z)) )
def tanh_der(z):
    return 1- tanh(z)**2

In [None]:
def relu(z):
    # print(z)
    return z if z>0 else 0.1*z
def relu_der(z):
    z = z[0]
    if(z==0):
        z = z + 1e-10
    return 1 if z>0 else 0.1

In [None]:
def a(W,b,g,a_l_1):
    z_l = np.matmul(W,a_l_1) + b
    a_l=np.ones((z_l.shape[0],z_l.shape[1]))
    for i in range(z_l.shape[0]):
        a_l[i] = g(z_l[i])
    return a_l,z_l

In [None]:
def compute(X,W,B,G,N):
    A=[]
    Z=[]
    a_l = z_l = X.T
    A.append(a_l)
    Z.append(z_l)
    for l in range(1,len(N)):
        a_l,z_l = a(W[l-1],B[l-1],G[l-1],a_l)
        # print(a_l)
        A.append(np.array(a_l))
        Z.append(np.array(z_l))
    return A,Z

In [None]:
def binary_entropy_loss(truth_labels, predicted_labels):
    epsilon = 1e-15
    predicted_labels = np.clip(predicted_labels, epsilon, 1 - epsilon)
    N = predicted_labels.shape[0]
    truth_labels = np.reshape(truth_labels,(-1,1))
    # print(truth_labels.shape,predicted_labels.shape)
    loss = -np.sum(truth_labels * np.log(predicted_labels +1e-15) + (1 - truth_labels) * np.log(1 - predicted_labels+1e-15)) / N
    return loss

In [None]:
def partial_derivative_binary_entropy_loss(truth_labels, predicted_labels):
    N = predicted_labels.shape[0]
    return (predicted_labels - 2*predicted_labels*truth_labels + truth_labels) / (predicted_labels * (1 - predicted_labels) * N +1e-10)

## Coreset Construction


In [None]:
def coresetConstr(X,Y,W_anc,B_anc,R,n,eps,delta,l,L):
    G = [relu,relu,sigmoid]
    G_der = [ relu_der,relu_der,sigmoid_der ]
    Y_predicted=[]
    M = - 1e+8
    for i in range(0,X.shape[0]):
        Der = 0
        # forward propogation
        A,Z = compute(np.array([X[i]]),W_anc,B_anc,G,n)
        Y_predicted.append(A[3][0][0])
        #backward propogation
        dc_dz_last = np.multiply(partial_derivative_binary_entropy_loss(Y[i],A[3]) ,G_der[2](Z[3]))
        der_3 = np.matmul(dc_dz_last,A[2].T)
        Der += np.sum(der_3**2)
        dc_dz_second= np.multiply(np.matmul(W_anc[2].T,dc_dz_last) , G_der[1](Z[2]))
        der_2 = np.matmul(dc_dz_second,A[1].T)
        Der+= np.sum(der_2**2)
        dc_dz_first = np.multiply(np.matmul(W_anc[1].T,dc_dz_second) , G_der[0](Z[1]))
        der_1 = np.matmul(dc_dz_first,A[0].T)
        Der+= np.sum(der_1**2)
        if(Der>M):
            M=Der
    # print(Y)
    # print(Y_predicted)
    H = binary_entropy_loss(np.array(Y_predicted,dtype='float'),np.array(Y,dtype=
    'float'))
    # print(H,M)
    N = int(np.log(X.shape[0]))
    W = np.zeros((X.shape[0]))
    print(M)
    P = []
    for i in range(N-1):
        P.append([])
    for i in range(X.shape[0]):
        P[int(math.log(partial_derivative_binary_entropy_loss(Y[i],np.array([[Y_predicted[3]]])),2))].append(i)

    for i in range(len(P)):
        Q_i_s = (H * 2**(i-1) + M*R +L*R**2)**2*delta**(-2) * np.log(1/l)
        if(Q_i_s<len(P[i])):
            Q_i = np.random.choice(P[i],int(Q_i_s))
        else:
            Q_i = P[i]

        for i in Q_i:
            W[i] = int(Q_i_s)/len(P)
    return W


In [None]:
for i in range(Y.shape[0]):
  if(Y[i,0]==-1):
    Y[i,0]=0

In [None]:
N = [X.shape[1],5,5,1]
G = [relu,relu,sigmoid]
W = []
B = []
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)
for l in range(1,len(N)):
    W_l = np.random.rand(N[l],N[l-1])/100
    B_l = np.zeros((N[l],1))
    # print(W_l.shape,B_l.shape)
    W.append(W_l)
    B.append(B_l)
Weights = coresetConstr(X_train,Y_train,W,B,1,N,0.5,1,0.8,100)

In [None]:
size = 0
for i in range(X_train.shape[0]):
    if(Weights[i]!=0):
        size+=1

In [None]:
size

7138

In [None]:
X_train = np.asarray(X_train).astype('float32')
Y_train = np.asarray(Y_train).astype('float32')
Weights = np.asarray(Weights).astype('float32')
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train,Weights))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

In [None]:
train_dataset

<_BatchDataset element_spec=(TensorSpec(shape=(None, 229), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [None]:
tf.random.set_seed(1234)  # applied to achieve consistent results
model = Sequential(
    [
        Dense(10, activation = 'relu',   name = "L1"),
        Dense(5, activation = 'relu', name = "L2"),
        Dense(1,activation = 'sigmoid', name='L3')
    ]
)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(0.001),
)

In [None]:
model.fit(X_train,Y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fadbba335b0>

In [None]:
model.get_weights()[1].shape

(10,)

In [None]:
X_test = np.asarray(X_test).astype('float32')
Y_test = np.asarray(Y_test).astype('float32')
Y_predicted = model.predict(X_test)



In [None]:
Y_predicted

array([[0.60977596],
       [0.84141755],
       [0.538474  ],
       ...,
       [0.9365319 ],
       [0.38304555],
       [0.82219046]], dtype=float32)

In [None]:
Y_test = Y_test.reshape(-1,1)

In [None]:
for i in range(Y_predicted.shape[0]):
  if(Y_predicted[i,0]>=0.5):
    Y_predicted[i,0]=1
  else:
    Y_predicted[i,0]=0

In [None]:
np.sum(np.where((Y_test[:,0] == Y_predicted[:,0]),1,0))/Y_test.shape[0]

0.8460856065646485

## Sequential Coresets

In [None]:
def difference(W,B,Weights):
  diff =0
  n = 0
  for i in range(len(W)):
    diff+= np.sum((W[i].T - Weights[2*i])**2)
    diff+= np.sum((np.reshape(B[i],(-1,)) - Weights[2*i+1])**2)
    n+= W[i].shape[0]*W[i].shape[1] + B[i].shape[0]
  return diff**(1/2)


In [None]:
def seqCoreSets(X,Y,R,eps,delta,l,L):
  N = [X.shape[1],5,5,1]
  G = [relu,relu,sigmoid]
  W = []
  B = []
  Weight_layers = []
  for l in range(1,len(N)):
    W_l = np.random.rand(N[l],N[l-1])/100
    B_l = np.zeros((N[l],1))
    W_l_i = [W_l.T,B_l[:,0]]
    # print(W_l.shape,B_l.shape)
    W.append(W_l)
    B.append(B_l)
    Weight_layers.append(W_l_i)
  W_cur = W
  B_cur = B
  X = np.asarray(X_train).astype('float32')
  Y = np.asarray(Y_train).astype('float32')
  tf.random.set_seed(1234)  # applied to achieve consistent results
  model = Sequential(
      [
          Dense(5, activation = 'relu', weights = Weight_layers[0], name = "L1"),
          Dense(5, activation = 'relu',  weights = Weight_layers[1],name = "L2"),
          Dense(1,activation = 'sigmoid', weights = Weight_layers[2], name='L3')
      ]
  )
  model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(),
      optimizer=tf.keras.optimizers.Adam(0.001),
  )
  H= 100
  first=True

  while(H>10):
    Weights = coresetConstr(X_train,Y_train,W,B,1,N,0.5,1,0.8,100)
    size = 0
    for i in range(X_train.shape[0]):
        if(Weights[i]!=0):
            size+=1
    print("size",size)
    Weights = np.asarray(Weights).astype('float32')
    train_dataset = tf.data.Dataset.from_tensor_slices((X,Y,Weights))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)
    diff=R-1
    while(diff<R):
      # if(not first):
        # print(Weights_cur[0][0],model.get_weights()[0][0])
      history = model.fit(train_dataset,epochs=1)
      # print(history.history)
      Weights_cur = model.get_weights()
      diff = difference(W,B,Weights_cur)
      first = False
    #   print(diff)
    # print(Weights_cur[0][0])
    Y_predicted = model.predict(X_train)
    H = binary_entropy_loss(np.array(Y_predicted,dtype='float'),np.array(Y,dtype='float'))
    Weights_cur = model.get_weights()
    for i in range(len(W)):
      # print(B[i].shape,np.reshape(Weights_cur[2*i+1],(-1,1)).shape)
      W[i] = Weights_cur[2*i].T
      B[i] = np.reshape(Weights_cur[2*i+1],(-1,1))
    # break
    print("H",H)
  return Weights

In [None]:
Weights= seqCoreSets(X_train,Y_train,4,0.5,1,0.8,100)

38.738279561684784
size 7616
H 14.003033084356128
11186.69089185582
size 18036
H 12.295361504919546
15436.110597982075
size 18036
H 11.102372707076283
13100.412411840496
size 18036
H 10.344768615468789
4754.058171308414
size 18036
H 9.501508456646045


In [None]:
Weights = coresetConstr(X_train,Y_train,W,B,1,N,0.5,1,0.8,100)

In [None]:
Weights

array([615.25 ,   0.   ,   0.   , ...,   0.   , 547.375,   0.   ])

In [None]:
size = 0
for i in range(X_train.shape[0]):
    if(Weights[i]!=0):
        size+=1

In [None]:
size

7070

In [None]:
X_train.shape[0]

18036