In [1]:
## Training codes for LRPCA 
## Implemented by Jialin Liu @ Alibaba DAMO
## Date: Dec. 07, 2021

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
import scipy.io as sio
import scipy.sparse.linalg as lina
import time
# RPCA 代码
import sys
sys.path.append("/mnt/MineSafe-2024/models/robust-pca-master")
from r_pca import R_pca

## ================Preparations====================
device = torch.device('cuda:0')
datatype = torch.float64
Y0_t = np.load("./raw_batch.npy")  # observation

## =============Generate RPCA problems=============
rpca = R_pca(Y0_t)          # according to Appendix C, run classic RPCA to obtain X
bg, fg = rpca.fit(max_iter=100, iter_print=1)

iteration: 1, error: 16.320908213046007
iteration: 2, error: 7.34465539868624
iteration: 3, error: 4.485678942986565
iteration: 4, error: 2.9793070397594628
iteration: 5, error: 2.2528348579069952
iteration: 6, error: 1.7025888310159714
iteration: 7, error: 1.293094556967247
iteration: 8, error: 1.0354972425311202
iteration: 9, error: 0.8416237194625962
iteration: 10, error: 0.7036348017465519
iteration: 11, error: 0.6069763587327122
iteration: 12, error: 0.5316106520152405
iteration: 13, error: 0.4693515164089797
iteration: 14, error: 0.4167852580215523
iteration: 15, error: 0.3711894223881126
iteration: 16, error: 0.33057362312763006
iteration: 17, error: 0.2952373796441481
iteration: 18, error: 0.26587585059644714
iteration: 19, error: 0.24158035179223314
iteration: 20, error: 0.22136892121588367
iteration: 21, error: 0.2047793476561704
iteration: 22, error: 0.19082802245594227
iteration: 23, error: 0.17859210791655905
iteration: 24, error: 0.16788449622852994
iteration: 25, error: 

In [2]:
Y0_t = torch.from_numpy(Y0_t).to(device)
X0_t = torch.from_numpy(bg).to(device)

In [3]:
## ================Parameters======================
r 				= 2		# underlying rank
d1 				= Y0_t.shape[0]		# size (num. of rows)
d2 				= Y0_t.shape[1]		# size (num. of columns)
# alpha 			= 0.1		# fraction of outliers
step_initial 	= 0.5		# initial value of step size (eta in the paper)
ths_initial 	= 0.25		# initial value of thresholds (zeta in the paper) 1e-3
maxIt 			= 5		# num. of layers you want to train

In [4]:
## ===================LRPCA model===================
class MatNet(nn.Module):
	def __init__(self):
		super(type(self),self).__init__()
		self.ths_v 		= [nn.Parameter(Variable(torch.tensor(ths_initial, dtype=datatype, device = device), requires_grad=True)) for t in range(maxIt)]
		self.step 		= [nn.Parameter(Variable(torch.tensor(step_initial, dtype=datatype, device = device), requires_grad=True)) for t in range(maxIt)]
		self.ths_backup	= [torch.tensor(ths_initial, dtype=datatype, device = device) for t in range(maxIt)]

	def thre(self, inputs, threshold):
		out = torch.sign(inputs) * torch.max( torch.abs(inputs) - threshold, torch.zeros([1, 1], dtype=datatype, device=device) )
		return out

	def forward(self, Y0_t, r, X0_t, num_l):
		## Initialization
		S_t = self.thre(Y0_t, self.ths_v[0])
		L, Sigma, R = torch.svd_lowrank(Y0_t - S_t, q = r, niter = 4)
		Sigsqrt = torch.diag(torch.sqrt(Sigma))
		U_t = torch.mm(L, Sigsqrt)
		V_t = torch.mm(R, Sigsqrt) 

        ## Main Loop in LRPCA
		for t in range(1, num_l):
			YmUV = Y0_t - torch.mm(U_t, V_t.t())
			S_t = self.thre(YmUV, self.ths_v[t])
			E_t = YmUV - S_t 
			Vkernel = torch.inverse(V_t.t() @ V_t)
			Ukernel = torch.inverse(U_t.t() @ U_t)
			Unew = U_t + self.step[t] * (torch.mm(E_t,V_t) @ Vkernel)
			Vnew = V_t + self.step[t] * (torch.mm(U_t.t(),E_t).t() @ Ukernel)
			U_t = Unew
			V_t = Vnew

		## loss function in training
		loss = (torch.mm(U_t, V_t.t()) - X0_t).norm() 		
		return loss

	def InitializeThs(self, en_l):
		self.ths_v[en_l].data = torch.clone(self.ths_v[en_l-1].data * 0.1)
		
	def CheckNegative(self):
		isNegative = False;
		for t in range(maxIt):
			if(self.ths_v[t].data < 0):
				isNegative = True;
		if(isNegative):
			for t in range(maxIt):
				self.ths_v[t].data = torch.clone(self.ths_backup[t])
		else:
			for t in range(maxIt):
				self.ths_backup[t] = torch.clone(self.ths_v[t].data)
		return isNegative;
    
	def CheckNa(self):
		isNa = False;
		for t in range(maxIt):
			if(torch.isnan(self.ths_v[t].data)):
				isNa = True;
		if(isNa):
			for t in range(maxIt):
				self.ths_v[t].data = torch.clone(self.ths_backup[t])
		else:
			for t in range(maxIt):
				self.ths_backup[t] = torch.clone(self.ths_v[t].data)
		return isNa;

	def EnableSingleLayer(self,en_l):
		for t in range(maxIt): 
			self.ths_v[t].requires_grad = False
			self.step[t].requires_grad = False
		self.ths_v[en_l].requires_grad = True
		self.step[en_l].requires_grad = True

	def EnableLayers(self, num_l):
		for t in range(num_l): 
			self.ths_v[t].requires_grad = True
			self.step[t].requires_grad = True
		for t in range(num_l,maxIt): 
			self.ths_v[t].requires_grad = False
			self.step[t].requires_grad = False

In [5]:
## =================Training Scripts======================
Nepoches_pre 	= 500
Nepoches_full 	= 1000
lr_fac 			= 1.0															# basic learning rate

net = MatNet()
optimizers = []

for i in range(maxIt):
    optimizer = optim.SGD({net.ths_v[i]},lr = lr_fac * ths_initial / 5000.0)	# optimizer for each layer
    optimizer.add_param_group({'params': [net.step[i]], 'lr': lr_fac * 0.1})	# learning rate for each layer
    optimizers.append(optimizer)

In [6]:
## =================Layerwise Training======================
start = time.time()
for stage in range(maxIt):														# in k-th stage, we train the k-th layer
    
	## Pre-training: only train the k-th layer
	print('Layer ',stage,', Pre-training ======================')
	if(stage > 6):
		Nepoches_full = 500
	if(stage > 0):
		optimizers[stage].param_groups[0]['lr'] = net.ths_v[stage-1].data * lr_fac / 5000.0
	for epoch in range(Nepoches_pre):
		for i in range(maxIt):
			optimizers[i].zero_grad()
    
# 		U0_t,V0_t,Y0_t = generate_problem(r,d1,d2,alpha)
		net.EnableSingleLayer(stage)
		if(stage > 0):
			net.InitializeThs(stage)
		loss = net(Y0_t, r, X0_t, stage+1)
		loss.backward()
		optimizers[stage].step()
    
		if(epoch % 10 == 0):
			if net.CheckNegative():
				print("Negative detected, restored")
				
		lr = optimizers[stage].param_groups[0]['lr']
		if epoch % 20 == 0:
			print("epoch: " + str(epoch), "\t loss: " + str(loss.item()))

	# Full-training: train 0~k th layers
	print('Layer ',stage,', Full-training =====================')
	if stage == 0:
		continue

	for epoch in range(Nepoches_full):
		for i in range(maxIt):
			optimizers[i].zero_grad()
    
# 		U0_t,V0_t,Y0_t = generate_problem(r,d1,d2,alpha)
		net.EnableLayers(stage+1)
		loss = net(Y0_t, r, X0_t, stage+1)
		loss.backward()
        
		for i in range(stage+1):
			optimizers[i].step()

		if epoch % 20 == 0:
			print("epoch: " + str(epoch), "\t loss: " + str(loss.item()))
            
    
end = time.time()
print("Training end. Time: " + str(end - start))

## =====================Save model to .mat file ========================
result_ths 	= np.zeros((maxIt,))
result_stp1 = np.zeros((maxIt,))
result_stp2 = np.zeros((maxIt,))
for i in range(maxIt):
	result_ths[i] 	= net.ths_v[i].data.cpu().numpy()
	result_stp1[i] 	= net.step[i].data.cpu().numpy()

spath = 'LRPCA'+'.mat'
sio.savemat(spath, {'ths':result_ths, 'step':result_stp1})

epoch: 0 	 loss: 1715.2716108542072
epoch: 20 	 loss: 8.043741010227187
epoch: 40 	 loss: 8.05927368912403
epoch: 60 	 loss: 8.0715079170919
epoch: 80 	 loss: 8.05557473603394
epoch: 100 	 loss: 8.060268404114792
epoch: 120 	 loss: 8.060948836586247
epoch: 140 	 loss: 8.065619608440155
epoch: 160 	 loss: 8.055761015182192
epoch: 180 	 loss: 8.06553627171656
epoch: 200 	 loss: 8.036135614394958
epoch: 220 	 loss: 8.054975392923534
epoch: 240 	 loss: 8.069779225933932
epoch: 260 	 loss: 8.06703276730379
epoch: 280 	 loss: 8.066606532697948
epoch: 300 	 loss: 8.07282334565985
epoch: 320 	 loss: 8.0681741772168
epoch: 340 	 loss: 8.065878918405904
epoch: 360 	 loss: 8.064734027005109
epoch: 380 	 loss: 8.06275681190323
epoch: 400 	 loss: 8.065829780036035
epoch: 420 	 loss: 8.068415189247478
epoch: 440 	 loss: 8.065061928148056
epoch: 460 	 loss: 8.112580810276807
epoch: 480 	 loss: 8.060910422428272
epoch: 0 	 loss: 8.065239681649796
epoch: 20 	 loss: 8.06431513817433
epoch: 40 	 loss: 8.

In [None]:
## =================Layerwise Training======================
start = time.time()
for stage in range(maxIt):														# in k-th stage, we train the k-th layer
    
	## Pre-training: only train the k-th layer
	print('Layer ', stage, ', Pre-training ======================')  
	if(stage > 6):
		Nepoches_full = 500
	if(stage > 0):
		optimizers[stage].param_groups[0]['lr'] = net.ths_v[stage-1].data * lr_fac / 500.0
    
	for epoch in range(Nepoches_pre):
		for i in range(maxIt):
			optimizers[i].zero_grad()
   
		net.EnableSingleLayer(stage)
		if(stage > 0):
			net.InitializeThs(stage)

		loss = net(Y0_t, r, X0_t, stage+1)
		loss.backward()
		print(net.ths_v[0].grad)
		optimizers[stage].step()
        
		if(epoch % 10 == 0):
			if net.CheckNegative():
				print("Negative detected, restored")
			if net.CheckNa():
				print("Na detected, restored")
                
		lr = optimizers[stage].param_groups[0]['lr']
		if epoch % 20 == 0:
			print("epoch: " + str(epoch), "\t loss: " + str(loss.item()))
        
	# Full-training: train 0~k th layers
	print('Layer ', stage,', Full-training =====================')
	if stage == 0:
		continue
	for epoch in range(Nepoches_full):
		for i in range(maxIt):
			optimizers[i].zero_grad()
    
		net.EnableLayers(stage+1)
		loss = net(Y0_t, r, X0_t, stage+1)
		loss.backward()
        
		for i in range(stage+1):
			optimizers[i].step()

		if epoch % 20 == 0:
			print("epoch: " + str(epoch), "\t loss: " + str(loss.item()))  

end = time.time()
print("Training end. Time: " + str(end - start))

## =====================Save model to .mat file ========================
result_ths 	= np.zeros((maxIt,))
result_stp1 = np.zeros((maxIt,))
result_stp2 = np.zeros((maxIt,))
for i in range(maxIt):
	result_ths[i] 	= net.ths_v[i].data.cpu().numpy()
	result_stp1[i] 	= net.step[i].data.cpu().numpy()

spath = 'LRPCA'+'.mat'
sio.savemat(spath, {'ths':result_ths, 'step':result_stp1})