In [1]:
import numpy as np
import matplotlib.pyplot as plt

from pyxu.abc import Func, ProxFunc, DiffFunc, LinOp
import pyxu.operator as pxop

In [2]:
%matplotlib qt

### Playing around with pyxu stuff

In [3]:
# Implement l2 norm (already exists in pyxu)
class SquaredL2(Func):
    def __init__(self, dim):
        super().__init__(shape=(1, dim))

    def apply(self, arr):
        return (arr**2).sum(axis=-1, keepdims=True)

In [4]:
x = np.ones(10)
squaredl2func = SquaredL2(dim=x.size)

print(squaredl2func(x))  # no need to explicitly call apply

[10.]


In [5]:
# Implement l2 norm with its gradient
class SquaredL2(DiffFunc):
    def __init__(self, dim):
        super().__init__(shape=(1, dim))

    def apply(self, arr):
        return (arr**2).sum(axis=-1, keepdims=True)

    def grad(self, arr):
        return 2 * arr

In [6]:
# Implement l1 norm and its proximal operator
class L1Norm(ProxFunc):
    def __init__(self, dim):
        super().__init__(shape=(1, dim))

    def apply(self, arr):
        return np.abs(arr).sum(axis=-1, keepdims=True)

    def prox(self, arr, tau):
        return np.sign(arr) * np.clip(np.abs(arr) - tau, 0, None)

In [7]:
# Smooth out l1 norm to obtain Huber loss
dim = 1
huber = L1Norm(dim).moreau_envelope(mu=1)

In [8]:
t = np.linspace(-5, 5, 100)
t = t.reshape(-1, 1)  # reshape for technical reasons

plt.plot(t, huber(t))

[<matplotlib.lines.Line2D at 0x22be7ddcfd0>]

In [16]:
# Turn l2 norm into a loss function (computes distance to given vector y)
dim = 10
b = np.ones(dim)
l2_loss = pxop.SquaredL2Norm(b.size).asloss(b)  # computes ||arr - b||^2
#print(l2_loss(x)) where x is some array with dimension dim

In [22]:
l1_norm = pxop.L1Norm(dim=dim)
print(l1_norm(x))

[10.]


### Try to implement likelihood (univariate HP)

In [129]:
# Toy dimensions
M = 1  # total number of processes
K = 10  # total number of arrivals across all processes

# Define random matrix A of size (M+K) x M
A = np.random.normal(0, 1, (K+M, M*(1+M)))

In [124]:
# Define E: R^{K+1} -> R, E(x) = -\sum_{i=1}^{k} ln(x_i) + x_{K+1} as a DiffFunc
class LikelihoodE(DiffFunc):
    def __init__(self, dim):
        super().__init__(shape=(1, dim))

    def apply(self, arr):
        return -np.sum(np.log(arr[:-1])) + arr[-1]

    def grad(self, arr):
        # current gradient is wrong, need to return [-1/x1, ..., -1/xK, 1]
        if np.any(arr[:-1] == 0):
            print("Warning: divison by zero in the gradient.")
        return np.hstack([-1/arr[:-1], 1])

# Define linear operator. TODO: maybe implement this in a matrix-free way ?
opA = LinOp.from_array(A)

In [125]:
log = pxop.log(pxop.IdentityOp(dim=1))

x = np.array([2.718])
print(log(x))
print(np.log(2.718281828))

[0.99989632]
0.9999999998311266


In [126]:
dim = 10
E = LikelihoodE(dim=dim)

In [127]:
x = np.e*np.ones(dim)
x[-1] = 0
print(E(x))
print(E.grad(x))

-9.0
[-0.36787944 -0.36787944 -0.36787944 -0.36787944 -0.36787944 -0.36787944
 -0.36787944 -0.36787944 -0.36787944  1.        ]


### Try to implement likelihood (multivariate HP)

In [91]:
# Toy dimensions
# M = 8  # total number of processes
# k = [100, 165, 244, 116, 255, 342, 226, 353]  # [numberOfArrivalOfN1, numberOfArrivalOfN2, ...], toy numbers
# K = sum(k)  # total number of arrivals across all processes
M = 2
k = [4, 6]
K = sum(k)

# Define random matrix A of size (M+K) x M with positive entries
A = np.random.normal(0, 1, (K+M, M*(1+M)))
A[np.where(A<=0)] = 0.1

# Define subset S of {1, ..., M+K} of row indices of A which contribute to the compensator term
S = np.zeros(M)  # we know that |S| = M
for i in range(M):
    S[i] = sum(k[:(i+1)]) + i + 1
S = [int(si)-1 for si in S]  # convert to int, decaler de -1 pour avoir tout entre 0 et M+K-1

# Define the complement of S
allIndices = list(range(0, M+K))  # or range(1, M+K+1)
notS = list(set(allIndices) - set(S))

In [92]:
# Define E: R^{K+M} -> R, E(x) = -\sum_{i \not\in S} ln(x_i) + \sum_{i \in S} as a DiffFunc
class LikelihoodE(DiffFunc):
    def __init__(self, dim):
        super().__init__(shape=(1, dim))

    def apply(self, arr):
        return sum(-np.log(arr[notS])) + sum(arr[S])

    def grad(self, arr):
        grad = np.ones(arr.shape)
        grad[notS] = -1/arr[notS]
        return grad

# Define linear operator. TODO: maybe implement this in a matrix-free way ?
opA = LinOp.from_array(A)

In [93]:
dim = M+K
E = LikelihoodE(dim=dim)

In [94]:
x = np.ones(dim)
print(E(x))
print(E.grad(x))

2.0
[-1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1.  1.]


In [95]:
# Define -log(likelihood) (to minimize) as -log(L(theta)) = E(A*theta)
L = E * opA

In [96]:
theta = np.ones(M*(1+M))
print(L(theta))
print(L.grad(theta))
#print(opA.T(E.grad(opA(theta))))  # true gradient

-5.450670427977835
[-1.38300152 -1.21361301 -0.757177   -0.33146924 -1.03682223 -1.62176489]
