# 实验

In [40]:
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch import Tensor
from torch.utils.data import DataLoader, Dataset
from Progbar import Progbar
from scipy.stats.stats import pearsonr
import os
from sklearn.metrics import matthews_corrcoef
from mat2csv import MatCsvPathPair, DataManager

p = MatCsvPathPair()
dm = DataManager()

MatCsvPathPair avaliable path:

        ../../project/main/
          objects/
            - objectA.mat
            - objectB.mat
            - objectC.mat
          matrices/
            - R1-2.mat
            - T1-1.mat
          output_s/
            - new_found_relations.mat
          output_d/
            - new_found_relations_row.mat
            - new_found_relations_col.mat

DataManager avaliable method :

        load:
            object -> (obj_mat_path)
            relation -> (key, path)
            new_found_relation -> (idx_obj_path, col_obj_path, relation_path)

        save:
            object -> (in_path, out_path)
            R_relation, T_relation  -> (in_path, out_path)
            SAME_new_found_relation -> (idx_obj_path, in_path, out_path)
            DIFF_new_found_relation -> (idx_obj_path, col_obj_path, in_path, out_path)

        save_all
        


## 数据准备

In [16]:
# objectA - objectC 关系矩阵
R = sio.loadmat('../../project/main/matrices/R1-3.mat')
R

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Fri Jan 25 19:42:45 2019',
 '__version__': '1.0',
 '__globals__': [],
 'R_matr': <50x40 sparse matrix of type '<class 'numpy.float64'>'
 	with 209 stored elements in Compressed Sparse Column format>}

In [17]:
data = R['R_matr']
data

<50x40 sparse matrix of type '<class 'numpy.float64'>'
	with 209 stored elements in Compressed Sparse Column format>

In [18]:
data.shape

(50, 40)

In [19]:
matrix = data.todense()
for i in matrix:
    print(i)

[[0.       0.       0.       0.       0.       0.       0.       0.
  0.       0.       0.       0.       0.       0.       0.912053 0.
  0.       0.       0.114144 0.       0.       0.       0.       0.
  0.       0.       0.       0.       0.       0.       0.       0.
  0.       0.       0.       0.       0.711648 0.       0.       0.      ]]
[[0.       0.       0.       0.       0.       0.       0.       0.
  0.       0.       0.       0.       0.       0.       0.       0.11607
  0.       0.       0.       0.       0.       0.       0.       0.
  0.90975  0.       0.199099 0.       0.       0.528818 0.       0.
  0.978274 0.806647 0.       0.       0.154204 0.984133 0.       0.      ]]
[[0.       0.       0.       0.       0.       0.       0.426262 0.
  0.151101 0.       0.       0.       0.       0.       0.       0.
  0.       0.       0.       0.       0.       0.871504 0.       0.
  0.       0.       0.       0.       0.       0.       0.       0.
  0.       0.       0.     

In [20]:
class LinearDataset(Dataset):

    def __init__(self, data, transform=None):
        self.matrix = data.todense()
        self.transform = transform

    def __len__(self):
        return len(self.matrix)

    def __getitem__(self, index):
        it = matrix[index]

        if self.transform is not None:
            it = self.transform(np.asarray(it))
            
        return it

## 模型建立

### LinearAutoEncoder

In [21]:
class LinearAutoEncoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearAutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 32),
            nn.ReLU(True),
            nn.Linear(32, 24),
            nn.ReLU(True),
            nn.Linear(24, 16),
            nn.ReLU(True))
        self.decoder = nn.Sequential(
            nn.Linear(16, 24),
            nn.ReLU(True),
            nn.Linear(24, 32),
            nn.ReLU(True),
            nn.Linear(32, output_size),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [22]:
def calculate_pcc_mse(output, noisy_data, MSE_loss):
    mse = MSE_loss(output, noisy_data).data
    np1 = output.cpu().detach().numpy().reshape(-1)
    np2 = noisy_data.cpu().detach().numpy().reshape(-1)
    PCC, _ = pearsonr(np1, np2)

    return PCC, mse

def predict(matrix, device="cpu", num_epochs=20):
    shape = matrix.shape
    dataset = LinearDataset(matrix, Tensor)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=3)
    model = LinearAutoEncoder(shape[1], shape[1]).to(device)
    MSE_loss = nn.MSELoss()
    BCE_Loss = nn.BCELoss()
    criterion = MSE_loss
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

    # 训练
    if os.path.exists("./model.pth"):
        model.load_state_dict(torch.load("./model.pth", "cpu"))
    else:
        model.train()
        for epoch in range(num_epochs):
            print('epoch [{}/{}]'.format(epoch + 1, num_epochs))
            prog = Progbar(len(dataloader))
            for i, data in enumerate(dataloader):
                noisy_data = data
                # ===================forward=====================
                output = model(noisy_data)
                loss = criterion(output, noisy_data)
                pcc, mse = calculate_pcc_mse(output, noisy_data, MSE_loss)
                # ===================backward====================
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                # =====================log=======================
                prog.update(i + 1, [("loss", loss.item()), ("MSE", mse), ("PCC", pcc)])
        torch.save(model.state_dict(), "./model.pth")

    # 预测、评价
    model.eval()
    dataloader2 = DataLoader(dataset, batch_size=shape[0], shuffle=True, num_workers=3)
    for data in dataloader2:
        noisy_data = data
        # ===================forward=====================
        output = model(noisy_data)
        loss = criterion(output, noisy_data)
        # =====================log and save==============
        return output, loss
        break  # 只有一个 batch, 一次全拿出来了，不会有第二个


In [47]:
objectA_idx = list(dm.load("object")(p.getObjectPair()[0])[0])
objectB_idx = list(dm.load("object")(p.getObjectPair("objectB")[0])[0])
objectC_idx = list(dm.load("object")(p.getObjectPair("objectC")[0])[0])

In [52]:
def save_output(output, output_path):
    output_data_to_save = []
    for i in output.data:
        it_list = []
        for j in i[0].data:
            it = j.item()
            it_list.append(it)
#             if it < 0.1:
#                 it_list.append(int(0))
#             else:
#                 it_list.append(it)
        output_data_to_save.append(it_list)
    df_output = pd.DataFrame(output_data_to_save, index=objectA_idx, columns=objectC_idx)
    df_output.to_csv(output_path)

In [53]:
output, loss = predict(data)
print("output.size = {}\nloss = {}".format(output.size(), loss))
output_path = "./output_"+str(loss.item())+".csv"
save_output(output, output_path)

output.size = torch.Size([50, 1, 40])
loss = 0.028659239411354065


In [25]:
from visualize_csv import bar3d_base

In [54]:
bar3d_base(pd.read_csv(output_path,index_col =0)).render_notebook()

In [39]:
pd.read_csv(output_path,index_col =0)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,0.045387,0.008085,0.035075,0.03715,0.002769,0.002148,0.013184,0.032441,0.004342,0.005319,...,0.023898,0.013098,0.022996,0.019087,0.012872,0.002623,0.026693,0.043399,0.012331,0.055791
1,0.112204,0.038926,0.095591,0.098561,0.018898,0.017171,0.05515,0.093687,0.027533,0.027613,...,0.075383,0.053678,0.074335,0.067149,0.052104,0.018807,0.083318,0.109113,0.051486,0.132535
2,0.053213,0.01062,0.041811,0.044111,0.003855,0.003076,0.01693,0.039092,0.005978,0.007067,...,0.02922,0.016746,0.028232,0.02379,0.016426,0.003685,0.0326,0.051028,0.015819,0.065101
3,0.081779,0.022322,0.067276,0.070074,0.009543,0.008198,0.033314,0.064651,0.014299,0.015386,...,0.050362,0.032638,0.049237,0.043157,0.031817,0.009335,0.055911,0.079059,0.031092,0.098152
4,0.081481,0.022151,0.066976,0.069781,0.009441,0.008118,0.033087,0.064336,0.014166,0.015226,...,0.050063,0.032418,0.048992,0.042923,0.031638,0.009239,0.055597,0.07873,0.03086,0.097748
5,0.137339,0.056108,0.119748,0.123006,0.029693,0.028033,0.076769,0.119199,0.042441,0.04064,...,0.098062,0.074299,0.097177,0.089637,0.0719,0.029913,0.10799,0.134215,0.071628,0.161322
6,0.04556,0.008125,0.035225,0.03727,0.002785,0.002162,0.013244,0.03255,0.004366,0.005343,...,0.023981,0.013163,0.023082,0.019166,0.01294,0.002638,0.026792,0.043543,0.012393,0.055932
7,0.043836,0.007602,0.033753,0.03575,0.002568,0.001982,0.01246,0.031102,0.004038,0.00498,...,0.022831,0.012395,0.021963,0.018167,0.012197,0.002428,0.025514,0.041859,0.011658,0.053843
8,0.046393,0.008382,0.035929,0.038017,0.002891,0.002253,0.013629,0.033257,0.004528,0.005515,...,0.024538,0.013535,0.023637,0.019659,0.013307,0.002742,0.027411,0.044356,0.012745,0.056938
9,0.093408,0.028163,0.077935,0.080917,0.012669,0.011166,0.041163,0.075569,0.018795,0.019586,...,0.059623,0.040185,0.058561,0.051988,0.039152,0.012496,0.066095,0.090503,0.038369,0.111438
