# tucker分解参数选择

## 目标函数
$$
\arg\min \Arrowvert CF - C \times_1 O \times_2 D \times_3 T \Arrowvert _F ^2 + 
\lambda_1 \Arrowvert O \Arrowvert_1 + \lambda_2 \Arrowvert D \Arrowvert_1 + \lambda_3 \Arrowvert T \Arrowvert_1 + 
\lambda_4 \Arrowvert C \Arrowvert_1 \\
where\ C\geq0 , O\geq0 , D\geq0 , T\geq0 , \lambda_i>0(i=1,2,3,4)
$$

## 评价函数

### 重构误差
$$
RMSE = \sqrt{\frac{\sum_{x=1}^M \sum_{y=1}^M \sum_{z=1}^N (r_{xyz} - \widehat{r_{xyz}})^2}{M*M*N}} \\
where\ CF = (r_{xyz})_{M*M*N} , 
C \times_1 O \times_2 D \times_3 T = (\widehat{r_{xyz}})_{M*M*N}
$$

### 稀疏性
$$
sparseness(X) = \frac{\sqrt{n} - (\sum \vert x_i \vert) / \sqrt{\sum x_i^2}}{\sqrt{n} - 1}
$$

## 参数范围

### I=J
5, 6, ..., 14, 15

### K
3, 4, ..., 9, 10

### lam_O=lam_D
0.0001, 0.0002, ..., 0.0009, 0.001

### lam_T
0.0001, 0.0002, ..., 0.0009, 0.001

### lam_C
0.0001, 0.0002, ..., 0.0009, 0.001


In [None]:
%matplotlib inline
import os
import math
import matplotlib.pyplot as plt
from threading import Thread
import numpy as np
import pandas as pd
import scipy.io as scio
from tempfile import mkstemp


# 加载待分解矩阵
CF_file = 'whole_month_log'
CF = scio.loadmat('./NTF/%s.mat' % CF_file)['M']
M = CF.shape[0]
N = CF.shape[2]

def decompose(coreNway, lam, no=0):
    outfile = 'out%d' % no
    funcfile = 'decompose%d' % no
    
    with open('./NTF/format.decompose', 'r') as f:
        content = f.read()
        content = content % (no, CF_file, outfile)
    
    with open('./NTF/%s.m' % funcfile, 'w') as f:
        f.write(content)
    
    os.system('matlab -r "cd NTF;decompose%d([%d,%d,%d],[%f,%f,%f,%f])";exit;' % 
              tuple([no] + coreNway + lam))
    try:
        data = scio.loadmat('./NTF/%s.mat' % outfile)
        os.remove('./NTF/%s.mat' % outfile)
    except:
        # 这种情况可能是分解失败
        data = None
    
    os.remove('./NTF/%s.m' % funcfile)
    
    return data


def RMSE(data):
    O = data['O']
    D = data['D']
    T = data['T']
    C = data['C']
    _CF = np.einsum('ijk,pi,qj,rk->pqr', C, O, D, T)
    return math.sqrt(np.mean((CF - _CF)**2))
    

def sparseness(mat):
    def L1(mat):
        return np.sum(np.abs(mat))
    def L2(mat):
        return math.sqrt(np.sum(mat*mat))
    n = mat.size
    l1 = L1(mat)
    l2 = L2(mat)
    return (math.sqrt(n) - l1/l2) / (math.sqrt(n)-1)


class Evaluator(Thread):
    def __init__(self, coreNway, lam, no):
        Thread.__init__(self)
        assert len(coreNway)==3 and len(lam)==4
        self.coreNway = coreNway
        self.lam = lam
        self.no = no
    
    def run(self):
        data = decompose(self.coreNway, self.lam, self.no)
        if data is not None:
            self.result = [RMSE(data), sparseness(data['C']), sparseness(data['O']), 
                           sparseness(data['D']), sparseness(data['T'])]
        else:
            self.result = [None] * 5
    
    def get_result(self):
        return self.result
    
    def get_coreNway(self):
        return self.coreNway
    
    def get_lam(self):
        return self.lam


def draw(df):
    plt.figure(figsize=(12,7))
    
    plt.subplot(121)
    plt.title('RMSE')
    plt.plot(df.RMSE)
    
    plt.subplot(122)
    plt.title('sparseness')
    plt.plot(df.SPC, label='C')
    plt.plot(df.SPO, label='O')
    plt.plot(df.SPD, label='D')
    plt.plot(df.SPT, label='T')
    
    plt.legend()
    plt.show()


In [None]:
results = []
i = 0
for I in range(5,16):
    for K in range(3,11):
        for lam_O in np.arange(1e-4,8.1e-4,1e-4):
            thds = []
            for lam_T in np.arange(1e-4,8.1e-4,1e-4):
                for lam_C in np.arange(1e-4,8.1e-4,1e-4):
                    i += 1
                    thd = Evaluator([I,I,K], [lam_O,lam_O,lam_T,lam_C], i)
                    thd.start()
                    thds.append(thd)
            for thd in thds:
                thd.join()
                results.append(thd.get_coreNway() + thd.get_lam() + thd.get_result())
            print (i)

df = pd.DataFrame(results, columns=['I','J','K','lam_O','lam_D','lam_T','lam_C','RMSE','SPC','SPO','SPD','SPT'])
