In [2]:
import cmath
import math
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
from Utilities import createFourierMatrix,zeroPad2DMatrix,deZeroPad2DMatrix,computeLayerLipschitzFourier

In [5]:
def SGDminimize(X0,Z,U,F,rho):
    F_real, F_imag = torch.real(F), torch.imag(F)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    X = torch.rand_like(X0, requires_grad=True,dtype = torch.float32).to(device)
    optimizer = torch.optim.Adam([X], lr=0.01)
    for step in range(1000):
        optimizer.zero_grad()
        loss = torch.norm(X - X0, p='fro')**2
        res_real = X@F_real.T - Z.real + U.real
        res_imag = X@F_imag.T - Z.imag + U.imag 
        loss = loss + (rho/2)*torch.sum((res_imag**2 + res_real**2))

        loss.backward()
        optimizer.step()
        #if(step%100 ==0):
        #    print(loss)
    
    return X.detach()

In [3]:
import torchvision.models as models
alexnet_model = models.alexnet(pretrained=True)



In [11]:
layer = torch.tensor([[[[0,0.3,0],[0,0.4,0],[0,0.3,0]]]])
print(layer)

tensor([[[[0.0000, 0.3000, 0.0000],
          [0.0000, 0.4000, 0.0000],
          [0.0000, 0.3000, 0.0000]]]])


In [None]:
layer = torch.rand((1,1,3,3))

In [12]:
#ADMM Frobenius Normalize
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#layer = alexnet_model.features[3].weight.to(device)
layer = layer.to(device)
s = layer.shape
k = s[3]
n = 40

F = createFourierMatrix(k,n)

# Constants (example values, replace these with actual data)
all_one = torch.ones(n*n,device=device)
H0 = torch.reshape(layer,(s[0]*s[1],s[2]*s[3])).to(device)
print(H0.shape)
F = F.to(device)
print(F.shape)
F_real, F_imag = torch.real(F), torch.imag(F)

# Initialize optimization variables (H and lambda)
H = torch.rand_like(H0,dtype = torch.float32).to(device)
print(H.shape)
U = torch.zeros((s[0]*s[1],n*n),dtype=torch.complex64).to(device)
Hf = torch.zeros((s[0]*s[1],n*n),dtype=torch.complex64).to(device)

pri_res = 100
rho = 0.1

#i = 0
#while pri_res>0.5:
for i in range(100):
    # Optimize H
    H = SGDminimize(H0,Hf,U,F,rho)

    H_fourier = torch.zeros(Hf.shape,dtype = torch.complex64).to(device)
    H_fourier.real = H@torch.real(F.T)
    H_fourier.imag = H@torch.imag(F.T)

    #Optimize Hf

    Hf = H_fourier + U
    H_frob = torch.clamp(torch.sqrt(torch.sum(torch.square(torch.abs(Hf)),dim = 0)),min=1)
    s_f = H_frob.shape
    Hf = torch.div(Hf,torch.reshape(H_frob,(1,s_f[0])))

    # Update U
    U = U + H_fourier - Hf
    pri_res = torch.norm(H_fourier - Hf, p='fro')
    #i=i+1
    if(i%10 == 0):
        print(f'Step {i}: Primal Residual:{pri_res}')
# Results
print(f'Optimized H:\n{H}')

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
#Output Test
layer_wt = torch.reshape(H,(s[0],s[1],s[2],s[3]))
print(f'Lipschitz Constant: {computeLayerLipschitzFourier(layer_wt,n)}')
print(f'Difference:{torch.norm(H - H0, p="fro")**2}')
print(f'Original H:\n{H0}')

In [None]:
#ADMM Frobenius Normalize - Heuristc
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

layer = alexnet_model.features[3].weight.to(device)
#layer = layer.to(device)
s = layer.shape
k = s[3]
n = 40

#F = createFourierMatrix(k,n)

# Constants (example values, replace these with actual data)
H0 = zeroPad2DMatrix(layer,n)
H0 = torch.reshape(H0,(s[0]*s[1],n,n)).to(device)

print(H0.shape)
#F = F.to(device)
#print(F.shape)
#F_real, F_imag = torch.real(F), torch.imag(F)

# Initialize optimization variables (H and lambda)
H = torch.rand_like(H0,dtype = torch.float32).to(device)
print(H.shape)
U = torch.zeros((s[0]*s[1],n,n),dtype=torch.complex64).to(device)
Hf = torch.zeros((s[0]*s[1],n,n),dtype=torch.complex64).to(device)

pri_res = 100
rho = 0.1

#i = 0
#optimizer = torch.optim.Adam([H], lr=0.01)
#while pri_res>0.5:
for i in range(100):
    # Optimize H
    x = torch.fft.ifft2(Hf-U,norm='ortho')
    H = (2*H0 + (rho) * torch.real(x))/(2+rho)
    H = torch.reshape(H,(s[0],s[1],n,n))
    H = deZeroPad2DMatrix(H,k)
    H = zeroPad2DMatrix(H,n)
    H = torch.reshape(H,(s[0]*s[1],n,n))

    H_fourier = torch.fft.fft2(H,norm='ortho')

    #Optimize Hf

    Hf = H_fourier + U
    H_frob = torch.clamp(torch.sqrt(torch.sum(torch.square(torch.abs(Hf)),dim = 0)),min=1)
    #H_frob = torch.sum(torch.abs(Hf),dim = 0)
    #print(H_frob)
    s_f = H_frob.shape
    Hf = torch.div(Hf,torch.reshape(H_frob,(1,n,n)))

    # Update U
    U = U + H_fourier - Hf
    pri_res = torch.norm(H_fourier - Hf, p='fro')
    #i=i+1
    if(i%10 == 0):
        print(f'Step {i}: Primal Residual:{pri_res}')
# Results
print(f'Optimized H:\n{H}')

In [None]:
#Output Test
A = torch.reshape(H,(s[0],s[1],n,n))
A = deZeroPad2DMatrix(A,k)
layer_wt = torch.reshape(A,(s[0],s[1],s[2],s[3]))
print(f'Lipschitz Constant: {computeLayerLipschitzFourier(layer_wt,n)}')
print(f'Difference:{torch.norm(H - H0, p="fro")**2}')
print(f'Original H:\n{H0}')

In [13]:
#ADMM Frobenius Normalize - Exact
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

layer = alexnet_model.features[3].weight.to(device)
layer = layer.to(device)
s = layer.shape
k = s[3]
n = 40

F = createFourierMatrix(k,n)

# Constants (example values, replace these with actual data)

H0 = torch.reshape(layer,(s[0]*s[1],k*k)).to(device)

print(H0.shape)
F = F.to(device)
#print(F.shape)
F_real, F_imag = torch.real(F), torch.imag(F)

# Initialize optimization variables (H and lambda)
H = torch.rand_like(H0,dtype = torch.float32).to(device)
print(H.shape)
U = torch.zeros((s[0]*s[1],n*n),dtype=torch.complex64).to(device)
Hf = torch.zeros((s[0]*s[1],n*n),dtype=torch.complex64).to(device)

pri_res = 100
rho = 0.1

temp = 2*torch.eye(k*k).to(device) + rho*torch.real(F.H@F)
temp = temp.to('cpu')
l,v = torch.linalg.eig(temp)
print(l)
inv_mat = torch.linalg.inv(temp)
inv_mat = inv_mat.to(device)

#i = 0
#optimizer = torch.optim.Adam([H], lr=0.01)
#while pri_res>0.5:
for i in range(100):
    # Optimize H
    x = Hf - U
    H = (2*H0 + rho*torch.real(torch.conj(x)@F))@inv_mat
    #H = H.to(torch.float32)

    H_fourier = torch.zeros(Hf.shape,dtype = torch.complex64).to(device)
    H_fourier.real = H@torch.real(F.T)
    H_fourier.imag = H@torch.imag(F.T)
    #print(H_fourier)
    #Optimize Hf

    dual_res = Hf
    Hf = H_fourier + U
    H_frob = torch.clamp(torch.sqrt(torch.sum(torch.square(torch.abs(Hf)),dim = 0)),min=1)
    #H_frob = torch.sum(torch.abs(Hf),dim = 0)
    #print(H_frob)
    s_f = H_frob.shape
    Hf = torch.div(Hf,torch.reshape(H_frob,(1,s_f[0])))
    dual_res = torch.norm(Hf - dual_res, p='fro')

    # Update U
    U = U + H_fourier - Hf
    pri_res = torch.norm(H_fourier - Hf, p='fro')

    #i=i+1
    if(i%10 == 0):
        print(f'Step {i}: Primal Residual:{pri_res} --- Dual Residual:{dual_res}')
# Results
print(f'Optimized H:\n{H}')

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
#Output Test
layer_wt = torch.reshape(H,(s[0],s[1],s[2],s[3]))
print(f'Lipschitz Constant: {computeLayerLipschitzFourier(layer_wt,n)}')
print(f'Difference:{torch.norm(H - H0, p="fro")**2}')
print(f'Original H:\n{H0}')