In [1]:
import numpy as np
import cvxpy as cp
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def gen_synthetic_multinomial(n0,n1,q,r,j):
    # n0,n1 is the number of samples from class 0, 1 resp.
    # q,r is are the theta^+/-
    
    X_train = []
    y_train = []
    
    X_train.extend(np.random.multinomial(j, q, size=n0))
    y_train.extend([1]*n0)
    
    X_train.extend(np.random.multinomial(j, r, size=n1))
    y_train.extend([-1]*n1)
    
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    
    return X_train, y_train

def gen_synthetic_normal(n,d,sigma):
    
    X = np.random.normal(0, 1, n*d).reshape(n, d)
    w = np.random.normal(0, 1, d)
    y = X@w + np.random.normal(0, sigma, n)
    
    return X, np.array([y]).T, np.array([w]).T

## Create dataset

In [10]:
n = 100 # number of data points
m = 10 # number of features
sigma = 0.1
lamb = 10
rho = 1
X, y, w = gen_synthetic_normal(n,m,sigma)

In [11]:
print("Shape of X: ", X.shape)
print("Shape of y: ", y.shape)
print("Shape of w: ", w.shape)

Shape of X:  (100, 10)
Shape of y:  (100, 1)
Shape of w:  (10, 1)


## Nominal Problem

In [12]:
def solve_nominal(X, y, lamb=10):
    w = cp.Variable((m,1))
    nominal_obj = cp.Minimize(cp.square(cp.norm(X@w - y, 2)) + lamb*cp.square(cp.norm(w,2)))
    constraints = []
    prob = cp.Problem(nominal_obj, constraints)
    prob.solve()
    return prob.value, w.value

In [13]:
loss, w_hat = solve_nominal(X,y,lamb=10)

In [14]:
print("Loss: ", loss)

Loss:  69.2878808448585


## Robust Problem

In [15]:
def solve_robust(X,y, rho, lambda_):
    # data, labels = X,y
    n,d = X.shape
    w_rob = cp.Variable((d,1))

    abs_resids = cp.abs(X@w_rob - y)
    robper = rho*cp.norm(w_rob, p=2)*np.ones((n,1))
    ridge_pen = lambda_*cp.norm(w_rob, p=2)**2
    obj = cp.norm(abs_resids + robper, p=2)**2 + ridge_pen

    prob = cp.Problem(cp.Minimize(obj))
    prob.solve()
    return prob.value, w_rob.value

In [16]:
loss, w_hat = solve_robust(X,y,rho=rho,lambda_=lamb)

In [17]:
print("Loss: ", loss)

Loss:  700.7135848079981


## Poisoned Problem

In [18]:
def solve_poisoned(X_nominal, y, lamb=10, rho=10, mu=10):
    X = cp.Variable((n,m))
    M = cp.Variable((m+1,m+1), PSD=True)
    N = cp.Variable((m+n,m+n), PSD=True)
    t = cp.Variable(1)
    U = cp.Variable((m,m), PSD=True)
    poisoned_obj = cp.Minimize(t - cp.square(cp.norm(y)) + mu*cp.trace(U))
    constraints = [
        M[0:m,0:m] == U - lamb*np.eye(m),
        M[m,m] == t,
        M[m:m+1,0:m] == y.T@X,
        M[0:m,m:m+1] == X.T@y,
        N[0:m,0:m] == U,
        N[m:,m:] == np.eye(n),
        N[m:,0:m] == X,
        N[0:m,m:] == X.T,
        
    ]
    for i in range(n):
        constraints.append(cp.norm(X[i]-X_nominal[i],2) <= rho)
    prob = cp.Problem(poisoned_obj, constraints)
    prob.solve()
    return -1*prob.value, X.value, U.value

In [19]:
print("Shape of X: ", X.shape)
print("Shape of y: ", y.shape)
print("Shape of w: ", w.shape)

Shape of X:  (100, 10)
Shape of y:  (100, 1)
Shape of w:  (10, 1)


In [20]:
print((y.T@X).shape)
print((np.ones((m+1,m+1))[m:m+1,0:m]).shape)

(1, 10)
(1, 10)


In [31]:
loss, X_hat, U_hat = solve_poisoned(X,y,lamb=lamb,rho=rho,mu=1)

In [32]:
print(loss)

4.3902298928443315


In [33]:
l = lambda X: np.linalg.norm(X@np.linalg.pinv(X)@y-y)**2

In [34]:
pd.DataFrame(X_hat)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.251309,-0.851148,0.799268,-0.087367,-0.486561,-0.654214,-0.201775,-0.855384,0.362957,-0.684572
1,-1.504286,-0.178425,0.467271,-0.171103,-0.602601,0.065808,-0.095025,0.071877,-1.331579,0.853530
2,-1.374239,-0.151907,0.397709,0.942305,-0.278633,-0.501040,-0.524141,0.206357,-0.927245,-0.472649
3,-0.472159,-0.144830,1.026161,0.899073,-0.784845,0.343206,0.153681,0.387949,-0.337169,-0.734067
4,-0.513562,-0.036443,0.322771,-0.845816,0.536518,-0.435454,0.005359,-0.762053,0.807828,0.227202
...,...,...,...,...,...,...,...,...,...,...
95,0.120560,0.025145,0.199197,0.481084,1.750914,-0.072872,0.673532,-1.127001,-0.615970,-1.070265
96,-0.549453,-1.040857,0.054505,0.333833,-0.174297,-0.650741,0.505431,-0.754658,-0.801609,0.962855
97,0.024130,0.607219,-0.093679,0.605852,0.278895,0.157330,-0.028907,0.252459,0.965624,0.085231
98,0.244982,0.252970,-0.305252,0.836656,-0.384649,-0.112389,-0.109219,0.068244,0.072439,0.463222


In [35]:
pd.DataFrame(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.390441,-1.369103,1.326134,-0.118549,-0.607819,-1.023481,-0.312262,-1.244183,0.583228,-0.950753
1,-2.130814,-0.149100,0.503479,-0.262783,-1.046106,0.129415,-0.127629,0.012040,-1.920033,1.066188
2,-1.854152,0.005374,0.215293,1.213497,-0.771032,-0.594108,-0.686484,0.093981,-1.324145,-0.909102
3,-0.630027,-0.439550,1.782877,1.294929,-0.627730,0.389596,0.197959,0.745420,-0.366855,-0.696237
4,-0.773900,0.229688,0.040482,-1.288151,0.237013,-0.531459,0.025264,-1.353327,1.055371,-0.048211
...,...,...,...,...,...,...,...,...,...,...
95,0.170207,-0.215123,0.613401,0.634197,2.591246,-0.176070,0.800328,-1.150528,-0.643207,-0.967228
96,-0.671136,-1.548279,0.422477,0.470868,0.227491,-0.905570,0.624114,-0.750050,-0.915235,1.528668
97,0.045512,0.999555,-0.097239,1.047864,0.558609,0.254859,-0.052165,0.469448,1.673560,0.202174
98,0.442095,0.212584,-0.188106,1.481832,-0.235045,-0.271977,-0.201048,0.310025,0.216765,1.090893


In [36]:
np.linalg.norm(y)**2

810.9130934104778

In [37]:
l(X_hat)

190.94426653161295

In [38]:
l(X)

1.0672579012105181