In [1]:
# This program computes the dimension of the image of the weight map 
# for DPNN (deep polynomial nerual network) network p with an architecture (d, sigma) 
# where d = (d0, d1, d2, ..., dL) and sigma is a monomial activation function. 
#
# The output of the network is given as a dL-tuple of homogeneous polynomials Pi
# The parameter map Psi is given as the map from all the space of all weights w_lij
# to a product of symmetric space given as (P1, P2, ..., PdL).
# where l is the indexing for W_l and ij is the element indexing in W_l
# 
# We compute dim(im(Psi)) as a rank of Jacobian(Psi) with a coefficients in 
# a finite field GF(prime) (or rational one)

In [4]:
# Import libraries
from sage.all import *
from itertools import chain

In [11]:
# Define the architecture
prime = 100271

# Dimensions of RNN layers
d = [3, 2, 1]

# Define the field
#field_k = QQ
field_k = GF(prime)

# Define the neural network p
ww_vars = []
for k in range(len(d)-1):
    ww_k = matrix(d[k+1], d[k], lambda i,j: var('w_{}{}{}'.format(k+1,i+1,j+1))).list() 
    ww_vars += ww_k 
xx_vars = [var('x_{}'.format(i)) for i in range(d[0])]
C = PolynomialRing(field_k, ww_vars)
FF = PolynomialRing(C, xx_vars)

ww = C.gens()
xx = FF.gens()

In [12]:
# Perform feedforward pass through 

# Index that controls the choice of parameters for Wi
shift = 0

# Compute the feedforward of RNN
sigma_Wi_x = Matrix(xx).transpose() # input vector

for i in range(len(d)-1):
    Wi = matrix(FF, d[i+1], (ww[i+shift] for i in range(d[i]*d[i+1])))
    print(f"W_{i+1}:\n{Wi}")
    print()
    shift += d[i]*d[i+1]

    Wi_x = Wi*sigma_Wi_x
    sigma_Wi_x = Wi_x.apply_map(lambda u: u^2) # apply an activation function sigma

p = Wi_x
p # print the neural network output

W_1:
[w_111 w_112 w_113]
[w_121 w_122 w_123]

W_2:
[w_211 w_212]



[(w_111^2*w_211 + w_121^2*w_212)*x_0^2 + (2*w_111*w_112*w_211 + 2*w_121*w_122*w_212)*x_0*x_1 + (w_112^2*w_211 + w_122^2*w_212)*x_1^2 + (2*w_111*w_113*w_211 + 2*w_121*w_123*w_212)*x_0*x_2 + (2*w_112*w_113*w_211 + 2*w_122*w_123*w_212)*x_1*x_2 + (w_113^2*w_211 + w_123^2*w_212)*x_2^2]

In [13]:
# Obtain the coefficients for numerator in Pi
coefficients = []

for i in range(d[-1]):
    # get the numerator 
    p_top = p[i][0]
    f_top = p_top.coefficients()
    coefficients.append(f_top)

In [14]:
# Combine all coefficients to get the parameter map f = (P1, P2, ..., PdL)
Psi = vector([])
for i in range(len(coefficients)): # count total diagonal dimension
    Psi = vector(chain(Psi,coefficients[i]))
#Psi # Print the parameter map

In [15]:
# Compute the dimensions of the image f and all f_i where f_i is f restricted to Pi
# first row  - dimension
# second row - ambient dimension
v = random_vector(field_k, len(ww))
J = jacobian(Psi, ww)
J_num = J(list(v)) # numerical value of the Jacobian
print(f"(rank,dim) = {rank(J_num), J.nrows()}")

(rank,dim) = (5, 6)


In [16]:
# Compute the dimension of the weight map f restricted to P_i
for i in range(len(coefficients)):
    J = jacobian(coefficients[i], ww)
    J_num = J(list(v)) # numerical value of the Jacobian
    print(f"(rank,dim) = {rank(J_num), J.nrows()}")

(rank,dim) = (5, 6)
