In [2]:
'''
Functions from in-class exercises
'''
# Load the data and libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def laplace_mech_vec(vec, sensitivity, epsilon):
    return [v + np.random.laplace(loc=0, scale=sensitivity / epsilon) for v in vec]

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def gaussian_mech_vec(vec, sensitivity, epsilon, delta):
    return [v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)
            for v in vec]

def gaussian_mech_RDP_vec(vec, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return [v + np.random.normal(loc=0, scale=sigma) for v in vec]

def gaussian_mech_zCDP_vec(vec, sensitivity, rho):
    sigma = np.sqrt((sensitivity**2) / (2 * rho))
    return [v + np.random.normal(loc=0, scale=sigma) for v in vec]
    
def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

In [3]:
# Load data files
import numpy as np
import urllib.request
import io

url_x = 'https://github.com/jnear/cs211-data-privacy/raw/master/slides/adult_processed_x.npy'
url_y = 'https://github.com/jnear/cs211-data-privacy/raw/master/slides/adult_processed_y.npy'

with urllib.request.urlopen(url_x) as url:
    f = io.BytesIO(url.read())
X = np.load(f)

with urllib.request.urlopen(url_y) as url:
    f = io.BytesIO(url.read())
y = np.load(f)

In [4]:
# Split data into training and test sets
training_size = int(X.shape[0] * 0.8)

X_train = X[:training_size]
X_test = X[training_size:]

y_train = y[:training_size]
y_test = y[training_size:]

print('Train and test set sizes:', len(y_train), len(y_test))

Train and test set sizes: 36176 9044


### IMPLEMENTING MINI-BATCH GRADIENT DESCENT (WITHOUT DP FOR NOW)
#### Steps to follow to implement the mini-batch gradient descent (without dp):
1. Define a function that splits data into mini-batches (subsets of the whole dataset)
2. Define loss function that measures how good our model is.
3. Define gradient function. The gradient is a vector that indicates the rate of change of the loss in each direction.
4. Define avg_grad function that computes the average gradient over each mini-batch.
5. Define gradient_descent function that computes gradient using mini-batches for each iteration

In [5]:
def split_to_mini_batches(X, y, batch_size):
    # shuffling the data before creating mini_batches to prevent the model 
    # from learning possible patterns + each batch might contain more "diversified"
    # information. 
    
    shuffled_data = np.random.permutation(X.shape[0])
    randomized_X = X[shuffled_data]
    randomized_Y = y[shuffled_data]

    mini_batches = []
    for i in range(0,X.shape[0],batch_size):
        mini_batches.append((randomized_X[i:i+batch_size], randomized_Y[i:i+batch_size]))
        
    return mini_batches

In [6]:
'''
Functions taken from in-class-exercise 10.28.24
'''

# The loss function measures how good our model is. The training goal is to minimize the loss.
# This is the logistic loss function.
def loss(theta, xi, yi):
    exponent = - yi * (xi.dot(theta))
    return np.log(1 + np.exp(exponent))

# This is the gradient of the logistic loss
# The gradient is a vector that indicates the rate of change of the loss in each direction
def gradient(theta, xi, yi):
    exponent = yi * (xi.dot(theta))
    return - (yi*xi) / (1+np.exp(exponent))

#Vectorized version of gradient calculation by wanglun1996. 
#Github: https://github.com/sunblaze-ucb/dpml-benchmark/blob/master/lossfunctions/logistic_regression.py#L12

def gradient_vectorized(theta, x, y, lambda_param=None):
    """
    Gradient function for logistic regression without regularization.
    Based on the above logistic_regression
    """

    m, n = X.shape
    
    exponent = y * (x.dot(theta))
    gradient_loss =  - (X.T @ (y / (1 + np.exp(exponent)))) / m

    # Reshape to handle case where x is csr_matrix
    gradient_loss.reshape(theta.shape)

    return gradient_loss

def avg_grad(theta, X, y):

    #All_grads is a list of vectors, with each vector of length 104
    all_grads = [gradient(theta,X[i],y[i]) for i in range(len(X))] #one gradient per example in the data

    #Compute the column-wise average
    avg_grad = np.mean(all_grads,axis=0)
    
    return avg_grad

In [7]:
'''
Original function gradient_descent() taken from in-class-exercise 10.28.24 and modifying it
iterate over the mini-batches instead of over the entire dataset (full-batch)
'''
def mini_batch_gradient_descent(epochs, batch_size):
    #Step 1: initalize all thetas
    theta = [0 for _ in range(X_train.shape[1])] #Initial model

    #Step 2: split data into mini_batches
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

    #Step 3: iterate for each num samples in training set (training_set = mini batch)
        for X_train_batch, y_train_batch in mini_batches:
            theta = theta - avg_grad(theta, X_train_batch, y_train_batch)
            
    return theta

theta = mini_batch_gradient_descent(50 , 64)
theta

array([ 3.18333295e-02, -7.24772591e-01, -6.18566005e-01, -3.94976018e-01,
       -1.07397378e+00, -8.58378802e-01, -1.53725404e+00, -1.01144330e+00,
       -9.48915075e-01, -7.35441122e-01, -1.41681288e+00, -8.69407314e-01,
       -1.18777173e+00, -1.23182706e+00, -4.11682124e-02,  9.89539293e-02,
        5.35657550e-01,  1.27113170e+00, -3.53622683e-01,  8.32468932e-01,
       -1.47271301e+00,  1.34477683e+00,  1.00455231e-02, -1.57555629e+00,
        1.39413902e+00,  8.48817751e-01, -1.17347103e+00, -1.92510572e+00,
       -1.45620815e+00, -1.28870349e+00, -3.79294646e-01, -9.34284934e-02,
       -1.35088113e-01,  4.76118543e-01, -1.22083233e+00, -9.53941577e-01,
       -5.15858100e-01, -1.11066380e+00, -1.74509141e+00,  2.97435666e-01,
        2.59958338e-01,  5.40176079e-03,  3.51595426e-01, -4.12399185e-01,
       -9.18759091e-01, -5.17565340e-01, -1.39303274e+00, -1.59614174e+00,
       -7.57287234e-01,  6.69822896e-03, -1.26099067e+00, -7.38852592e-01,
       -1.17816769e+00, -

In [8]:
'''
Functions taken from in-class-exercise 10.28.24
'''
# Prediction: take a model (theta) and a single example (xi) and return its predicted label
def predict(xi, theta, bias=0):
    label = np.sign(xi @ theta + bias) #this is the dot product and take the sign. 
    return label

def accuracy(theta):
    return np.sum(predict(X_test, theta) == y_test)/X_test.shape[0]

def L2_clip(v, b):
    norm = np.linalg.norm(v, ord=2) #computing L2 norm 
    
    if norm > b:
        return b * (v / norm)
    else:
        return v

#theta = [-.1 for _ in range(104)]
accuracy(theta)

0.8447589562140646

### IMPLEMENTING MINI-BATCH GRADIENT DESCENT WITH (EPSILON)- DP

In [22]:
def epsilon_noisy_gradient_descent(epochs, epsilon, batch_size):
    #Step 1: initalize all thetas 
    theta = [0 for _ in range(X_train.shape[1])]

    #Step 2: splitting the epsilon and choosing sensitivity
    epsilon_i = epsilon/epochs
    sensitivity = 1 #?
    
    #Step 3: split data into mini_batches
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(all_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = laplace_mech_vec(grad_sum,sensitivity=sensitivity,epsilon=epsilon_i)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) 
            
            theta = theta - noisy_grad
    
    return theta

theta = epsilon_noisy_gradient_descent(10, 1.0, 64) #a smaller epsilon, accuracy is not as good. Noise can make the model worse. 
                                                    # If we increase iterations, it will make up for it. 


theta1 = epsilon_noisy_gradient_descent(10, 0.5, 64)
theta2 = epsilon_noisy_gradient_descent(10, 1.0, 55)
theta3 = epsilon_noisy_gradient_descent(10, 0.5, 55)
theta4 = epsilon_noisy_gradient_descent(10, 1.0, 70)
theta5 = epsilon_noisy_gradient_descent(10, 0.5, 70)

print('Final accuracy with epsilon = 1.0, epochs = 10, batch size = 64:', accuracy(theta))
print('Final accuracy with epsilon = 0.5, epochs = 10, batch size = 64: ', accuracy(theta1))
print('Final accuracy with epsilon = 1.0, epochs = 20, batch size = 55: ', accuracy(theta2))
print('Final accuracy with epsilon = 0.5, epochs = 20, batch size = 55: ', accuracy(theta3))
print('Final accuracy with epsilon = 1.0, epochs = 20, batch size = 70: ', accuracy(theta4))
print('Final accuracy with epsilon = 1.0, epochs = 20, batch size = 75: ', accuracy(theta5))


Final accuracy with epsilon = 1.0, epochs = 10, batch size = 64: 0.7588456435205662
Final accuracy with epsilon = 0.5, epochs = 10, batch size = 64:  0.7799646174259177
Final accuracy with epsilon = 1.0, epochs = 20, batch size = 55:  0.7904688191065901
Final accuracy with epsilon = 0.5, epochs = 20, batch size = 55:  0.7602830605926582
Final accuracy with epsilon = 1.0, epochs = 20, batch size = 70:  0.7829500221141088
Final accuracy with epsilon = 1.0, epochs = 20, batch size = 75:  0.754312251216276


### IMPLEMENTING MINI-BATCH GRADIENT DESCENT WITH (EPSILON,DELTA)- DP

In [23]:
def epsilon_delta_noisy_gradient_descent(epochs, epsilon, delta, batch_size):
    #Step 1: initalize all thetas 
    theta = [0 for _ in range(X_train.shape[1])]

    #Step 2: splitting the epsilons and delta over the num of iterations/epochs.
    epsilon_i = epsilon/epochs
    delta_i = delta/epochs

    #Step 3: split data into mini_batches
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 2. Call L2_clip on each gradient
            b = 3
            clipped_grads = [L2_clip(g, b) for g in all_grads]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(clipped_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = gaussian_mech_vec(grad_sum,sensitivity=b,epsilon=epsilon_i,delta=delta_i)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) 
            
            theta = theta - noisy_grad
    
    return theta

theta = epsilon_delta_noisy_gradient_descent(10, 1.0, 1e-5,64)
theta1 = epsilon_delta_noisy_gradient_descent(10, 0.5, 1e-5, 64)
theta2 = epsilon_delta_noisy_gradient_descent(10, 1.0, 1e-5, 55)
theta3 = epsilon_delta_noisy_gradient_descent(10, 0.5, 1e-5, 55)
theta4 = epsilon_delta_noisy_gradient_descent(10, 1.0, 1e-5, 70)
theta5 = epsilon_delta_noisy_gradient_descent(10, 0.5, 1e-5, 70)

print('Final accuracy with epsilon = 1.0, epochs = 10, batch size = 64:', accuracy(theta))
print('Final accuracy with epsilon = 0.5, epochs = 10, batch size = 64: ', accuracy(theta1))
print('Final accuracy with epsilon = 1.0, epochs = 20, batch size = 55: ', accuracy(theta2))
print('Final accuracy with epsilon = 0.5, epochs = 20, batch size = 55: ', accuracy(theta3))
print('Final accuracy with epsilon = 1.0, epochs = 20, batch size = 70: ', accuracy(theta4))
print('Final accuracy with epsilon = 1.0, epochs = 20, batch size = 75: ', accuracy(theta5))

  return - (yi*xi) / (1+np.exp(exponent))


Final accuracy with epsilon = 1.0, epochs = 10, batch size = 64: 0.7610570544007077
Final accuracy with epsilon = 0.5, epochs = 10, batch size = 64:  0.7224679345422379
Final accuracy with epsilon = 1.0, epochs = 20, batch size = 55:  0.7367315347191509
Final accuracy with epsilon = 0.5, epochs = 20, batch size = 55:  0.7203670942061035
Final accuracy with epsilon = 1.0, epochs = 20, batch size = 70:  0.7304290137107474
Final accuracy with epsilon = 1.0, epochs = 20, batch size = 75:  0.6640866873065016
Final accuracy: 0.7610570544007077


In [24]:
def vectorized_epsilon_delta_noisy_gradient_descent(epochs, epsilon, delta, batch_size):
    #Step 1: initalize all thetas 
    theta = [0 for _ in range(X_train.shape[1])]

    #Step 2: splitting the epsilons and delta over the num of iterations/epochs.
    epsilon_i = epsilon/epochs
    delta_i = delta/epochs

    #Step 3: split data into mini_batches
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient_vectorized(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 2. Call L2_clip on each gradient
            b = 3
            clipped_grads = [L2_clip(g, b) for g in all_grads]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(clipped_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = gaussian_mech_vec(grad_sum,sensitivity=b,epsilon=epsilon_i,delta=delta_i)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) 
            
            theta = theta - noisy_grad
    
    return theta

theta = vectorized_epsilon_delta_noisy_gradient_descent(10, 1.0, 1e-5,64) #a smaller epsilon, accuracy is not as good. Noise can make the model worse. 
                                                # If we increase iterations, it will make up for it. 
print('Final accuracy:', accuracy(theta))

ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

### IMPLEMENTING MINI-BATCH GRADIENT DESCENT WITH RÉNYI DP

In [26]:
'''
Original functions taken from homework assignment 9
'''
def mini_batch_noisy_gradient_descent_RDP(epochs, epsilon_bar, alpha, batch_size):
    #Step 1: initalize all thetas 
    theta = [0 for _ in range(X_train.shape[1])]


    #Step 3: split data into mini_batches
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 2. Call L2_clip on each gradient
            b = 3
            clipped_grads = [L2_clip(g, b) for g in all_grads]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(clipped_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = gaussian_mech_RDP_vec(grad_sum,sensitivity=b,alpha=alpha,epsilon=epsilon_bar)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) #MAYBE DO LEN(MINI_BATCH)
            
            theta = theta - noisy_grad
    
    return theta

theta = mini_batch_noisy_gradient_descent_RDP(10, 0.1, 20, 64)
theta1 = mini_batch_noisy_gradient_descent_RDP(10, 0.3, 20, 64)
theta2 = mini_batch_noisy_gradient_descent_RDP(10, 0.3, 20, 55)
theta3 = mini_batch_noisy_gradient_descent_RDP(10, 0.1, 15, 55)
theta4 = mini_batch_noisy_gradient_descent_RDP(10, 0.1, 15, 70)
theta5 = mini_batch_noisy_gradient_descent_RDP(10, 0.1, 25, 70)

print('Final accuracy with epsilon_bar = 0.1, epochs = 10, alpha = 20, batch size = 64:', accuracy(theta))
print('Final accuracy with epsilon_bar = 0.2, epochs = 10, alpha = 20, batch size = 64: ', accuracy(theta1))
print('Final accuracy with epsilon_bar = 0.3, epochs = 10, alpha = 20, batch size = 55: ', accuracy(theta2))
print('Final accuracy with epsilon_bar = 0.3, epochs = 10, alpha = 15, batch size = 55: ', accuracy(theta3))
print('Final accuracy with epsilon_bar = 0.1, epochs = 10, alpha = 15, batch size = 70: ', accuracy(theta4))
print('Final accuracy with epsilon_bar = 0.1, epochs = 10, alpha = 25, batch size = 70: ', accuracy(theta5))

Final accuracy with epsilon_bar = 0.1, epochs = 10, alpha = 20, batch size = 64: 0.770234409553295
Final accuracy with epsilon_bar = 0.2, epochs = 10, alpha = 20, batch size = 64:  0.7767580716497126
Final accuracy with epsilon_bar = 0.3, epochs = 10, alpha = 20, batch size = 55:  0.7705661211853162
Final accuracy with epsilon_bar = 0.3, epochs = 10, alpha = 15, batch size = 55:  0.7655904467049978
Final accuracy with epsilon_bar = 0.1, epochs = 10, alpha = 15, batch size = 70:  0.7535382574082264
Final accuracy with epsilon_bar = 0.1, epochs = 10, alpha = 25, batch size = 70:  0.7806280406899602


In [None]:
'''
Original functions taken from homework assignment 9
'''
def vectorized_mini_batch_noisy_gradient_descent_RDP(epochs, epsilon_bar, alpha, batch_size):
    #Step 1: initalize all thetas 
    theta = [0 for _ in range(X_train.shape[1])]


    #Step 3: split data into mini_batches
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient_vectorized(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 2. Call L2_clip on each gradient
            b = 3
            clipped_grads = [L2_clip(g, b) for g in all_grads]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(clipped_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = gaussian_mech_RDP_vec(grad_sum,sensitivity=b,alpha=alpha,epsilon=epsilon_bar)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) #MAYBE DO LEN(MINI_BATCH)
            
            theta = theta - noisy_grad
    
    return theta

theta = vectorized_mini_batch_noisy_gradient_descent_RDP(10, 0.1, 20,64) 

                                                
print('Final accuracy:', accuracy(theta))

### IMPLEMENTING MINI-BATCH GRADIENT DESCENT WITH zCDP

In [27]:
'''
Original functions taken from homework assignment 9
'''
def mini_batch_noisy_gradient_descent_zCDP(epochs, rho, batch_size):
    #IDEA: copy noisy_gradient_descent but use gaussian_mech_zCDP_vec to compute the noisy_gradient_sum
    
    #from the noisy_gradient_descent function provided above: 
    theta = np.zeros(X_train.shape[1])
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 2. Call L2_clip on each gradient
            b = 3
            clipped_grads = [L2_clip(g, b) for g in all_grads]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(clipped_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = gaussian_mech_zCDP_vec(grad_sum,sensitivity=b,rho=rho)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) #MAYBE DO LEN(MINI_BATCH)
            
            theta = theta - noisy_grad
    
    return theta

theta = mini_batch_noisy_gradient_descent_zCDP(10, 0.1, 64)

theta1 = mini_batch_noisy_gradient_descent_zCDP(10, 0.2, 64)
theta2 = mini_batch_noisy_gradient_descent_zCDP(10, 0.1, 55)
theta3 = mini_batch_noisy_gradient_descent_zCDP(10, 0.2, 55)
theta4 = mini_batch_noisy_gradient_descent_zCDP(10, 0.1, 70)
theta5 = mini_batch_noisy_gradient_descent_zCDP(10, 0.2, 70)

print('Final accuracy with rho = 1.0, epochs = 10, batch size = 64:', accuracy(theta))
print('Final accuracy with rho = 0.5, epochs = 10, batch size = 64: ', accuracy(theta1))
print('Final accuracy with rho = 1.0, epochs = 20, batch size = 55: ', accuracy(theta2))
print('Final accuracy with rho = 0.5, epochs = 20, batch size = 55: ', accuracy(theta3))
print('Final accuracy with rho = 1.0, epochs = 20, batch size = 70: ', accuracy(theta4))
print('Final accuracy with rho = 1.0, epochs = 20, batch size = 75: ', accuracy(theta5))

Final accuracy with rho = 1.0, epochs = 10, batch size = 64: 0.7470145953118089
Final accuracy with rho = 0.5, epochs = 10, batch size = 64:  0.7943387881468377
Final accuracy with rho = 1.0, epochs = 20, batch size = 55:  0.804953560371517
Final accuracy with rho = 0.5, epochs = 20, batch size = 55:  0.7919062361786819
Final accuracy with rho = 1.0, epochs = 20, batch size = 70:  0.8108137992038921
Final accuracy with rho = 1.0, epochs = 20, batch size = 75:  0.8147943387881469


In [20]:
'''
Original functions taken from homework assignment 9
'''
def vectorized_mini_batch_noisy_gradient_descent_zCDP(epochs, rho, batch_size):
    #IDEA: copy noisy_gradient_descent but use gaussian_mech_zCDP_vec to compute the noisy_gradient_sum
    
    #from the noisy_gradient_descent function provided above: 
    theta = np.zeros(X_train.shape[1])
    for _ in range(epochs): #epochs = iterations
        mini_batches = split_to_mini_batches(X_train, y_train, batch_size)

        for X_train_batch, y_train_batch in mini_batches:
        
            all_grads = [gradient_vectorized(theta,X_train_batch[i],y_train_batch[i]) for i in range(len(X_train_batch))]
            
            # 2. Call L2_clip on each gradient
            b = 3
            clipped_grads = [L2_clip(g, b) for g in all_grads]
            
            # 3. Take the sum of the clipped gradients and add noise
            grad_sum = np.sum(clipped_grads, axis=0)
    
            #Sensitivity is correct, by clipping
            noisy_grad_sum = gaussian_mech_zCDP_vec(grad_sum,sensitivity=b,rho=rho)
    
            noisy_grad = np.array(noisy_grad_sum )/ len(X_train_batch) #Danger: reveals the size of the training data (probably not a big deal but
            # does violate DP) #MAYBE DO LEN(MINI_BATCH)
            
            theta = theta - noisy_grad
    
    return theta

theta = vectorized_mini_batch_noisy_gradient_descent_zCDP(10, 0.1, 64)
print('Final accuracy:', accuracy(theta))

ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)