## Define Operator Algebra

In [1]:
using LinearAlgebra

struct Operator  # Linear Matrix Operators from Matrices to Matrices (and the operator adjoint)
    op
    adj
    sym
end

## Operators
‚Ñí(A::Matrix)  = Operator(X->A*X   , X->A'*X, "‚Ñí$(size(A))"  )   # left multiply by A (X ‚Üí AX)
‚Ñõ(A::Matrix)  = Operator(X->X*A   , X->X*A', "‚Ñõ$(size(A))")     # right multiply by A (X ‚Üí XA)
‚Ñã(A::Matrix)  = Operator(X->X.*A  , X->X.*A, "‚Ñã$(size(A))")    # Hadamard product (elementwise product)
‚Ñê()  =          Operator(X->X      ,    X->X,    "I")     # identity operator
ùí™()  =           Operator(X->zero(X) , X->zero(X),"ùí™")# zero operator

import Base:  zero, show, adjoint, *, \, ‚àò, +, -
show(io::IO, M::Operator) = print(io, M.sym)  # pretty printing
zero(::Any) = ùí™() # Let's make any undefined zero the ùí™ operator
adjoint(A::Operator) = Operator(A.adj, A.op,  "("*A.sym*")'")
adjoint(B::Bidiagonal) = Bidiagonal(adjoint.(B.dv),adjoint.(B.ev),(B.uplo == 'U') ? :L : :U) # lower to upper
-(A::Operator) = Operator(X->-A.op(X), X->-A.adj(X),"-"*A.sym)
-(::typeof(ùí™), X::Matrix) = -X # ùí™ - X should be -X
*(A::Operator, X::Matrix) = A.op(X)
\(‚Ñê::typeof(‚Ñê()), A::Matrix) = A
‚àò(A::Operator, B::Operator) = Operator(A.op ‚àò B.op, B.adj ‚àò A.adj, A.sym*"‚àò"*B.sym)
# We need [A;B]*C to somehow magically be [AC;BC]
*(M::Adjoint{Operator, Matrix{Operator}},v::Array) = M .* [v]
+(A::Array,x::Number)=A.+x

+ (generic function with 192 methods)

## Example

In [2]:
# Basic Test
B = [ 1 2; 3 4]
M = [10 1;1 10]
C = [ 2 5;4 6]
‚Ñí(M)

‚Ñí(2, 2)

In [3]:
typeof(‚Ñí(M))

Operator

In [4]:
‚Ñí(M) * [ 1 0 ;0 1]

2√ó2 Matrix{Int64}:
 10   1
  1  10

In [5]:
‚Ñí(M) * B 

2√ó2 Matrix{Int64}:
 13  24
 31  42

In [6]:
‚Ñí(M).op(B)

2√ó2 Matrix{Int64}:
 13  24
 31  42

In [7]:
M * B

2√ó2 Matrix{Int64}:
 13  24
 31  42

In [8]:
‚Ñõ(M) * B 

2√ó2 Matrix{Int64}:
 12  21
 34  43

In [9]:
B * M # right multiply by M

2√ó2 Matrix{Int64}:
 12  21
 34  43

In [10]:
[‚Ñã(M) * B M .* B]

2√ó4 Matrix{Int64}:
 10   2  10   2
  3  40   3  40

In [11]:
# tr( B'*(‚Ñí(M)*C) ), tr( (‚Ñí(M)'*B) * C)     # This is not correct
tr( B'*(‚Ñí(M)*C) ), tr( (‚Ñí(M)'*B)' * C)    # <B,‚ÑíC>=<‚Ñí'B,C>

(522, 522)

In [12]:
tr(B' * ‚Ñí(M).op(C)), tr( ‚Ñí(M).adj(B)' * C)

(522, 522)

In [13]:
B = [ 1 2; 3 4]
M = Bidiagonal( [‚Ñê(),‚Ñê(),‚Ñê()] , [‚Ñí(B),‚Ñí(B)], :L)
display(Matrix(M))

3√ó3 Matrix{Operator}:
 I        ùí™        ùí™
 ‚Ñí(2, 2)  I        ùí™
 ùí™        ‚Ñí(2, 2)  I

In [14]:
display(Matrix(M'))

3√ó3 Matrix{Operator}:
 (I)'  (‚Ñí(2, 2))'  ùí™
 ùí™     (I)'        (‚Ñí(2, 2))'
 ùí™     ùí™           (I)'

In [15]:
display(M')

3√ó3 Bidiagonal{Operator, Vector{Operator}}:
 (I)'  (‚Ñí(2, 2))'  ‚ãÖ
 ‚ãÖ     (I)'        (‚Ñí(2, 2))'
 ‚ãÖ     ‚ãÖ           (I)'

In [16]:
b = [ rand(2,2) for i=1:3]
x = M'\b
display(M'*x .- b)

3-element Vector{Matrix{Float64}}:
 [1.1102230246251565e-16 1.1102230246251565e-16; 5.551115123125783e-16 2.220446049250313e-16]
 [1.1102230246251565e-16 0.0; -1.1102230246251565e-16 1.1102230246251565e-16]
 [0.0 0.0; 0.0 0.0]

In [17]:
x = M \ b;
display(M * x - b)

3-element Vector{Matrix{Float64}}:
 [0.0 0.0; 0.0 0.0]
 [1.1102230246251565e-16 1.1102230246251565e-16; -1.1102230246251565e-16 -3.3306690738754696e-16]
 [7.771561172376096e-16 2.220446049250313e-16; -1.1102230246251565e-16 9.992007221626409e-16]

## Simple neural net

In [18]:
using OffsetArrays

h(x) =   exp(-x) # sample activation function
h‚Ä≤(x) = -exp(-x)

function neural_net(params,X‚ÇÄ;h=h,h‚Ä≤= h‚Ä≤)
    T = Matrix{Float64}
    N = length(params)
    X = OffsetArray(Vector{T}(undef,N+1),0:N)   
    Œî = Vector{T}(undef, N)
    X[0] = X‚ÇÄ
    W = first.(params)
    B = last.(params)
    
    for i=1:N         
          X[i] =  h.(W[i]*X[i-1] .+ B[i])
          Œî[i] =  h‚Ä≤.(W[i]*X[i-1] .+ B[i])        
    end 
    X,Œî
end

neural_net (generic function with 1 method)

## Initialization

In [19]:
n = [5,4,3,1]  ## this contains [n‚ÇÄ...n_N]
k = 10 # batchsize
N = length(n)-1 #should be positive
init(sizes...) = 0.01randn(sizes...)
Ws_and_bs =[ [init(n[i+1],n[i]) , init(n[i+1])]  for i=1:N] # The second part of the pair is a vector here
X‚ÇÄ = init(n[1],k)
y  =  init(n[end],k); #  y is what we will compare X_N against
X,Œ¥ = neural_net(Ws_and_bs,X‚ÇÄ) # This has all the X's and Œ¥'s

ùìÅ(x,y) = sum(abs2,x-y)/2 #loss
ùìÅ‚Ä≤(x,y) = x.-y;

X,Œ¥ = neural_net(Ws_and_bs,X‚ÇÄ) # Run the neural net

([[-0.035046585930585517 0.012982298073197633 ‚Ä¶ -0.0030924105743382856 0.005974802943575541; 0.00025141268114752164 -0.009914938897641928 ‚Ä¶ -0.0028492943487333005 -0.015611701717866393; ‚Ä¶ ; 0.0011209758801586 -0.01890446410264968 ‚Ä¶ 0.0066983491167069776 -0.007621109994910292; -0.0025355978426120874 0.003244059034906147 ‚Ä¶ -0.009292682626710189 0.0029745126722565923], [1.0067531321786765 1.0067656174577868 ‚Ä¶ 1.0069405824586666 1.0066829416285565; 1.0074065750718526 1.0071494685805025 ‚Ä¶ 1.0068925885520748 1.0069094341142268; 1.0055769603858578 1.0047587348195102 ‚Ä¶ 1.0053606878399655 1.0049888601341679; 1.0085566916227873 1.009323521581503 ‚Ä¶ 1.0093958702467327 1.0092805990678846], [1.0017893092314496 1.0017805430396876 ‚Ä¶ 1.0017822104742728 1.0017808934862515; 1.016596541450369 1.0166095468093477 ‚Ä¶ 1.0166026722344796 1.0166068698280382; 1.0017119796796596 1.0017240762688813 ‚Ä¶ 1.001736355471557 1.001729229882473], [1.013088829367144 1.0130887863343414 ‚Ä¶ 1.0130888458

In [20]:
# params: `W_i` and `b_i`s: x_{i+1} <- Wi*x_i .+ b_i
Ws_and_bs =[ [init(n[i+1],n[i]) , init(n[i+1],k)]  for i=1:N] # The second part of the pair is a vector here
X‚ÇÄ = init(n[1],k)
y  =  init(n[end],k); #  y is what we will compare X_N against
Ws_and_bs

3-element Vector{Vector{Matrix{Float64}}}:
 [[-0.0007425002343979739 0.01305051600523518 ‚Ä¶ 0.012663924191971722 -0.01593985959093624; 0.011395144366858671 -0.006086426328814495 ‚Ä¶ -0.014284322434702363 -0.002878267383783074; -0.0033208846620722005 0.003345859867818409 ‚Ä¶ 0.0019678023524151386 0.0013575897827500538; -0.017917798134837383 -0.004312213446321821 ‚Ä¶ -0.023435827251455776 0.002211033437777918], [-0.007716241358324973 0.004577874270209617 ‚Ä¶ -0.005131978802187087 0.012730134589888357; -0.005332104771768327 0.003963386477407432 ‚Ä¶ -0.00469900103415049 0.006604486903523769; -0.0060269627387580516 -0.003641375847949044 ‚Ä¶ 0.012053566566836964 -7.362984114200484e-5; -0.005845449849569563 0.0032734460614097746 ‚Ä¶ 0.011331395661874483 0.010861891571573369]]
 [[-0.007747100866522991 0.005688797516456197 -0.011949714157427571 -0.011666473803561134; 0.00770985891805216 0.0028942860334598714 0.001073826716296088 0.007012877275370005; 0.000589680633809492 -0.0028160147374555957

## Backward diff a neural net with operators

In [21]:
X,Œ¥ = neural_net(Ws_and_bs,X‚ÇÄ) # This has all the X's and Œ¥'s

## The diagonal matrix
M = Diagonal([ [‚Ñã(Œ¥[i]) ‚àò ‚Ñõ(X[i-1])  ‚Ñã(Œ¥[i])] for i=1:N])

## The lower triangular matrix (I-L)
ImL = Bidiagonal([‚Ñê() for i in 1:N], -[‚Ñã(Œ¥[i]) ‚àò ‚Ñí(Ws_and_bs[i][1]) for i=2:N] , :L)

## gradient of the loss function
g = [ fill(ùí™,N-1) ; [ùìÅ‚Ä≤(X[N],y)] ] 
‚àáJ = M' * (ImL' \ g)

3-element Vector{Matrix{Matrix{Float64}}}:
 [[5.837004276064882e-6 -3.817333149416575e-6 ‚Ä¶ -2.932603222639201e-6 -6.862320093669955e-7; 5.645463408326703e-7 -3.045564816450895e-7 ‚Ä¶ -2.868908532653854e-7 -8.740195101968993e-8; 2.399074531278846e-6 -1.564966572691779e-6 ‚Ä¶ -1.1801580185748418e-6 -2.7070589980740855e-7; 6.482051367532866e-6 -4.2278364607387255e-6 ‚Ä¶ -3.217045454585083e-6 -7.026802568212954e-7]; [-0.00020257679215507262 -0.00020006720499472351 ‚Ä¶ -0.00019664442136812127 -0.00019522548168917953; -1.851857550698091e-5 -1.668198545888503e-5 ‚Ä¶ -1.7493658849967452e-5 -1.916110364478841e-5; -8.177414914314553e-5 -8.305196629430666e-5 ‚Ä¶ -7.845241907506488e-5 -7.885357412621056e-5; -0.00022263035259689637 -0.00022156975956636702 ‚Ä¶ -0.00021320546325461905 -0.00021441502131992683];;]
 [[-0.02317497496830907 -0.02329871405058458 -0.023172851964494513 -0.023314952508587602; 0.22401380280779432 0.2252302998350849 0.22402059336430674 0.22538703705022006; 0.11928683674409789

In [22]:
*(A::Operator, B::Operator) = Operator(A.op ‚àò B.op, B.adj ‚àò A.adj, A.sym*"‚àò"*B.sym)

* (generic function with 191 methods)

In [23]:
[‚Ñê() ùí™(); ùí™() ‚Ñê()] * [‚Ñí([1 2; 3 4]); ùí™()]

MethodError: MethodError: no method matching +(::Operator, ::Operator)
The function `+` exists, but no method is defined for this combination of argument types.

Closest candidates are:
  +(::Any, ::Any, !Matched::Any, !Matched::Any...)
   @ Base operators.jl:642
  +(!Matched::BitMatrix, !Matched::UniformScaling)
   @ LinearAlgebra ~/.julia/juliaup/julia-1.12.4+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/uniformscaling.jl:154
  +(!Matched::Bool, !Matched::Complex{Bool})
   @ Base complex.jl:308
  ...


In [24]:
[‚Ñê() ùí™(); ùí™() ‚Ñê()] * Diagonal([‚Ñí([1 2; 3 4]), -‚Ñí([1 2; 3 4])]) 

ArgumentError: ArgumentError: cannot convert a value to Union{} for assignment

In [25]:
[‚Ñê() ùí™(); ùí™() ‚Ñê()] * [‚Ñí([1 2; 3 4]) -‚Ñí([1 2; 3 4]); ‚Ñí([1 3; 2 4]) -‚Ñí([1 3; 2 4])]

MethodError: MethodError: no method matching +(::Operator, ::Operator)
The function `+` exists, but no method is defined for this combination of argument types.

Closest candidates are:
  +(::Any, ::Any, !Matched::Any, !Matched::Any...)
   @ Base operators.jl:642
  +(!Matched::BitMatrix, !Matched::UniformScaling)
   @ LinearAlgebra ~/.julia/juliaup/julia-1.12.4+0.x64.linux.gnu/share/julia/stdlib/v1.12/LinearAlgebra/src/uniformscaling.jl:154
  +(!Matched::Bool, !Matched::Complex{Bool})
   @ Base complex.jl:308
  ...


In [26]:
+(A::Operator, B::Operator) = Operator(X->(A.op(X) + B.op(X)), X->(A.adj(X) + B.adj(X)), "("*A.sym*" + "*B.sym*")")

+ (generic function with 193 methods)

somma di operatori lineari

In [27]:
‚Ñí([1 2; 3 4]) + ‚Ñí([1 -2; -3 -2])

(‚Ñí(2, 2) + ‚Ñí(2, 2))

In [28]:
(‚Ñí([1 2; 3 4]) + ‚Ñí([1 -2; -3 -2])) * [1 2; 3 4]

2√ó2 Matrix{Int64}:
 2  4
 6  8

In [29]:
([1 2; 3 4] + [1 -2; -3 -2]) * [1 2; 3 4]

2√ó2 Matrix{Int64}:
 2  4
 6  8

Serve definire il prodotto tra una matrice di operatori e un operatore. Non sembra facile...

In [30]:
*(A::AbstractMatrix{Operator}, Œ±::Operator) = A .* [Œ±]

* (generic function with 192 methods)

In [31]:
[‚Ñê() ùí™(); ùí™() ‚Ñê()] * ‚Ñí([1 2; 3 4])

2√ó2 Matrix{Operator}:
 I‚àò‚Ñí(2, 2)  ùí™‚àò‚Ñí(2, 2)
 ùí™‚àò‚Ñí(2, 2)  I‚àò‚Ñí(2, 2)

In [32]:
ImL * ‚Ñí([1 2; 3 4]) 

3√ó3 Matrix{Operator}:
 I‚àò‚Ñí(2, 2)                  ùí™‚àò‚Ñí(2, 2)                  ùí™‚àò‚Ñí(2, 2)
 -‚Ñã(3, 10)‚àò‚Ñí(3, 4)‚àò‚Ñí(2, 2)  I‚àò‚Ñí(2, 2)                  ùí™‚àò‚Ñí(2, 2)
 ùí™‚àò‚Ñí(2, 2)                  -‚Ñã(1, 10)‚àò‚Ñí(1, 3)‚àò‚Ñí(2, 2)  I‚àò‚Ñí(2, 2)

In [33]:
M * ImL

3√ó3 Bidiagonal{Matrix{Operator}, Vector{Matrix{Operator}}}:
 [‚Ñã(4, 10)‚àò‚Ñõ(5, 10)‚àòI ‚Ñã(4, 10)‚àòI]                                  ‚Ä¶    ‚ãÖ  
 [‚Ñã(3, 10)‚àò‚Ñõ(4, 10)‚àò-‚Ñã(3, 10)‚àò‚Ñí(3, 4) ‚Ñã(3, 10)‚àò-‚Ñã(3, 10)‚àò‚Ñí(3, 4)]       ‚ãÖ  
   ‚ãÖ                                                                  [‚Ñã(1, 10)‚àò‚Ñõ(3, 10)‚àòI ‚Ñã(1, 10)‚àòI]

In [34]:
size(ImL), size(M)

((3, 3), (3, 3))

In [35]:
ImL * M

DimensionMismatch: DimensionMismatch: incompatible dimensions for matrix multiplication: tried to multiply a matrix of size (3, 4) with a matrix of size (1, 2). The second dimension of the first matrix: 4, does not match the first dimension of the second matrix: 1.

In [36]:
typeof(ImL) <: AbstractMatrix{Operator}

true

In [37]:
adjoint(::ComposedFunction{outer::Operator,inner::Operator}) = ComposedFunction{adjoint{inner}, adjoint{outer}}

UndefVarError: UndefVarError: `outer` not defined in `Main`
Suggestion: check for spelling errors or missing imports.

In [38]:
typeof(ImL ‚àò M)

ComposedFunction{Bidiagonal{Operator, Vector{Operator}}, Diagonal{Matrix{Operator}, Vector{Matrix{Operator}}}}

In [39]:
size(ImL * M)

DimensionMismatch: DimensionMismatch: incompatible dimensions for matrix multiplication: tried to multiply a matrix of size (3, 4) with a matrix of size (1, 2). The second dimension of the first matrix: 4, does not match the first dimension of the second matrix: 1.

In [40]:
pppp = (ImL * M)' \ g

DimensionMismatch: DimensionMismatch: incompatible dimensions for matrix multiplication: tried to multiply a matrix of size (3, 4) with a matrix of size (1, 2). The second dimension of the first matrix: 4, does not match the first dimension of the second matrix: 1.

In [41]:
typeof(M)

Diagonal{Matrix{Operator}, Vector{Matrix{Operator}}}

In [42]:
#‚àáJfd is the gradient calculated with finite differences method
‚àáJfd = Ws_and_bs*0
œµ = Ws_and_bs*0
ùúÄ = .0001
for i=1:length(Ws_and_bs), wb=1:2
    for j=1:length(œµ[i][wb])
        œµ[i][wb][j] = ùúÄ
        ‚àáJfd[i][wb][j] = (ùìÅ(neural_net(Ws_and_bs+œµ,X‚ÇÄ)[1][N],y).-ùìÅ(neural_net(Ws_and_bs-œµ,X‚ÇÄ)[1][N],y))/2ùúÄ
        œµ[i][wb][j] = .0
  end
 end

In [43]:
flatten(J) = vcat((x->x[:]).(vcat(J...))...)

flatten (generic function with 1 method)

In [44]:
norm(flatten(‚àáJ)-flatten(‚àáJfd))

1.1040649739623269e-7