In [1]:
%matplotlib inline
import os
import math
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as scio
import pandas as pd

cols = [
    'am', 'agm', 'alm', 'apm', 'aum', 'bm', 'bbm', 'bum', 'cm', 'cfm', 'csm', 'cum', 'cym', 'erm', 'fbm', 'fgm', 'fum', 'hcm', 
    'im', 'jm', 'jdm', 'jmm', 'jrm', 'lm', 'lrm', 'mm', 'mam', 'mem', 'nim', 'oim', 'pm', 'pbm', 'pmm', 'ppm', 'rbm', 'rim', 
    'rmm', 'rom', 'rsm', 'rum', 'sfm', 'smm', 'snm', 'srm', 'tam', 'tcm', 'vm', 'whm', 'wrm', 'wsm', 'ym', 'zcm', 'znm', 'ap', 
    'agp', 'alp', 'app', 'aup', 'bp', 'bbp', 'bup', 'cp', 'cfp', 'csp', 'cup', 'cyp', 'erp', 'fbp', 'fgp', 'fup', 'hcp', 'ip', 
    'jp', 'jdp', 'jmp', 'jrp', 'lp', 'lrp', 'mp', 'map', 'mep', 'nip', 'oip', 'pp', 'pbp', 'pmp', 'ppp', 'rbp', 'rip', 'rmp', 
    'rop', 'rsp', 'rup', 'sfp', 'smp', 'snp', 'srp', 'tap', 'tcp', 'vp', 'whp', 'wrp', 'wsp', 'yp', 'zcp', 'znp', 'fund'
]

vari2name = {
    'a': '豆一', 'ag': '沪银', 'al': '沪铝', 'ap': '苹果', 'au': '沪金', 'b': '豆二', 'bb': '胶合板', 'bu': '沥青', 'c': '玉米', 
    'cf': '郑棉', 'cs': '玉米淀粉', 'cu': '沪铜', 'cy': '棉纱', 'er': '早籼稻(ER)', 'fb': '纤维板', 'fg': '玻璃', 'fu': '燃油', 
    'hc': '热轧卷板', 'i': '铁矿石', 'j': '焦炭', 'jd': '鸡蛋', 'jm': '焦煤', 'jr': '粳稻', 'l': '塑料', 'lr': '晚籼稻', 'm': '豆粕', 
    'ma': '甲醇(MA)', 'me': '甲醇(ME)', 'ni': '沪镍', 'oi': '菜油(OI)', 'p': '棕榈油', 'pb': '沪铅', 'pm': '普麦', 'pp': '聚丙烯', 
    'rb': '螺纹钢', 'ri': '早籼稻(RI)', 'rm': '菜粕', 'ro': '菜油(RO)', 'rs': '菜籽', 'ru': '橡胶', 'sf': '硅铁', 'sm': '锰硅', 
    'sn': '沪锡', 'sr': '白糖', 'ta': 'PTA', 'tc': '动力煤(TC)', 'v': 'PVC', 'wh': '强麦(WH)', 'wr': '线材', 
    'ws': '强麦(WS)', 'y': '豆油', 'zc': '动力煤(ZC)', 'zn': '沪锌'
}

I,J,K = 10,10,4
CF_file = 'whole_month_log'
CF = scio.loadmat('./NTF/%s.mat' % CF_file)['M']

def decompose(coreNway, lam, no=0):
    outfile = 'out%d' % no
    funcfile = 'decompose%d' % no
    
    with open('./NTF/format.decompose', 'r') as f:
        content = f.read()
        content = content % (no, CF_file, outfile)
    
    with open('./NTF/%s.m' % funcfile, 'w') as f:
        f.write(content)
    
    os.system('matlab -r "cd NTF;decompose%d([%d,%d,%d],[%f,%f,%f,%f])";exit;' % 
              tuple([no] + coreNway + lam))
    try:
        data = scio.loadmat('./NTF/%s.mat' % outfile)
        os.remove('./NTF/%s.mat' % outfile)
    except:
        # 这种情况可能是分解失败
        data = None
    
    os.remove('./NTF/%s.m' % funcfile)
    
    return data

def RMSE(data):
    O = data['O']
    D = data['D']
    T = data['T']
    C = data['C']
    _CF = np.einsum('ijk,pi,qj,rk->pqr', C, O, D, T)
    return math.sqrt(np.mean((CF - _CF)**2))

data = decompose([I,J,K],[0.0001]*4)

print (np.mean(CF))
print (RMSE(data))

O = data['O']
D = data['D']
T = data['T']
C = data['C']

FileNotFoundError: [Errno 2] No such file or directory: './NTF/format.decompose'

# 月份模式

In [None]:
plt.figure(figsize=(18,10))
for i in range(K):
    plt.plot(T[:,i], label='pattern %d'%(i))
plt.legend()
plt.show()

# 源模式

In [None]:
data = [{'margin':[], 'profit':[], 'fund':[]} for i in range(I)]
for i in range(107):
    t = max(O[i])
    t = O[i].tolist().index(t)
    if cols[i][-1]=='m':
        data[t]['margin'].append(vari2name[cols[i][:-1]])
    elif cols[i][-1]=='p':
        data[t]['profit'].append(vari2name[cols[i][:-1]])
    else:
        data[t]['fund'].append('fund')
df = pd.DataFrame(data)
df

# 目的模式

In [None]:
data = [{'margin':[], 'profit':[], 'fund':[]} for i in range(J)]
for i in range(107):
    t = max(D[i])
    t = D[i].tolist().index(t)
    if cols[i][-1]=='m':
        data[t]['margin'].append(vari2name[cols[i][:-1]])
    elif cols[i][-1]=='p':
        data[t]['profit'].append(vari2name[cols[i][:-1]])
    else:
        data[t]['fund'].append('fund')
df = pd.DataFrame(data)
df

# 核张量

In [None]:
fig,ax = plt.subplots(nrows=K,ncols=1,figsize=(20,20))
for i in range(K):
    ax[i].matshow(C[:,:,i], cmap='gray')

加同大类、同市场的约束，类似POI的约束

In [None]:
from pymatbridge import Matlab
help(Matlab.run_func)