In [None]:
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# objectA - objectC 关系矩阵
R = sio.loadmat('../../project/main/matrices/R1-3.mat')
R

In [None]:
data = R['R_matr']
data

In [None]:
def getNumpyDataFromMatFile(filename = '../../project/main/matrices/R1-3.mat'):
    return sio.loadmat(filename)['R_matr']
getNumpyDataFromMatFile()

In [None]:
from sklearn.metrics import matthews_corrcoef

In [None]:
matrix = data.todense()

In [None]:
for i in matrix:
    print(i)

# AE

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch import Tensor

In [None]:
class LinearAutoEncoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearAutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 32),
            nn.ReLU(True),
            nn.Linear(32, 24),
            nn.ReLU(True),
            nn.Linear(24, 16),
            nn.ReLU(True))
        self.decoder = nn.Sequential(
            nn.Linear(16, 24),
            nn.ReLU(True),
            nn.Linear(24, 32),
            nn.ReLU(True),
            nn.Linear(32, output_size),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
from torch.utils.data import DataLoader, Dataset

In [None]:
class LinearDataset(Dataset):
    '''
    每一个 Item 是 (1, input_size) 的向量，元素范围[0, 1]
    '''

    def __init__(self, data, transform=None):
        self.matrix = data.todense()
        self.transform = transform

    def __len__(self):
        return len(self.matrix)

    def __getitem__(self, index):
        it = matrix[index]

        if self.transform is not None:
            it = self.transform(np.asarray(it))
            
        return it

In [None]:
data.shape

In [None]:
from Progbar import Progbar
from scipy.stats.stats import pearsonr
import os

In [None]:
def calculate_pcc_mse(output, noisy_data, MSE_loss):
    mse = MSE_loss(output, noisy_data).data
    np1 = output.cpu().detach().numpy().reshape(-1)
    np2 = noisy_data.cpu().detach().numpy().reshape(-1)
    PCC, _ = pearsonr(np1, np2)

    return PCC, mse
def predict(matrix, device="cpu", num_epochs=20):
    shape = matrix.shape
    dataset = LinearDataset(matrix, Tensor)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=3)
    model = LinearAutoEncoder(shape[1], shape[1]).to(device)
    MSE_loss = nn.MSELoss()
    BCE_Loss = nn.BCELoss()
    criterion = MSE_loss
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

    # 训练
    if os.path.exists("./model.pth"):
        model.load_state_dict(torch.load("./model.pth", "cpu"))
    else:
        model.train()
        for epoch in range(num_epochs):
            print('epoch [{}/{}]'.format(epoch + 1, num_epochs))
            prog = Progbar(len(dataloader))
            for i, data in enumerate(dataloader):
                noisy_data = data
                # ===================forward=====================
                output = model(noisy_data)
                loss = criterion(output, noisy_data)
                pcc, mse = calculate_pcc_mse(output, noisy_data, MSE_loss)
                # ===================backward====================
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                # =====================log=======================
                prog.update(i + 1, [("loss", loss.item()), ("MSE", mse), ("PCC", pcc)])
        torch.save(model.state_dict(), "./model.pth")

    # 预测、评价
    model.eval()
    dataloader2 = DataLoader(dataset, batch_size=shape[0], shuffle=True, num_workers=3)
    for data in dataloader2:
        noisy_data = data
        # ===================forward=====================
        output = model(noisy_data)
        loss = criterion(output, noisy_data)
        # =====================log and save==============
        return output, loss
        break  # 只有一个 batch, 一次全拿出来了，不会有第二个


In [None]:
def save_output(output, loss):
    output_data_to_save = []
    for i in output.data:
        it_list = []
        for j in i[0].data:
            it = j.item()
            if it < 0.1:
                it_list.append(int(0))
            else:
                it_list.append(it)
        output_data_to_save.append(it_list)
    # output_data_to_save 50x40

    df_output = pd.DataFrame(output_data_to_save)
    df_output.to_csv("./output_"+str(loss.item())+".csv")

In [None]:
output, loss = predict(data)
save_output(output, loss)

In [None]:
output.size()

In [None]:
loss

## 看他重构的矩阵长什么样

他的是压缩成csv，需要把0补上去

In [None]:
def load_R(matPath):
    matrix = sio.loadmat(matPath)["R_matr"].todense()
    matrix_data = []
    for i in np.asarray(matrix):
        matrix_data.append([j for j in i])
    return matrix_data

def save_matrix(matrix, path):
    matrix_data = []
    for i in np.asarray(matrix):
        matrix_data.append([j for j in i])
    df_matrix = pd.DataFrame(matrix_data)
    df_matrix.to_csv(path)

In [None]:
def save_mat(path = '../../project/main/matrices/R1-3.mat', output_path = "./R.csv"):
    Rmatrix = sio.loadmat(path)["R_matr"].todense()
    save_matrix(Rmatrix, output_path)

In [None]:
save_mat('../../project/main/matrices/R1-3.mat')

In [None]:
def load_Object(matPath='../../project/main/objects/objectA.mat'):
    matrix = sio.loadmat(matPath)["vett"]
    new_matrix = []
    for i in matrix:
        new_matrix.append(i[0][0])
    return new_matrix

def save_Object_mat_to_csv(matPath='../../project/main/objects/objectA.mat', output_path="./objectA.csv"):
    new_matrix = load_Object(matPath)
    df_matrix = pd.DataFrame(new_matrix)
    df_matrix.to_csv(output_path)

save_Object_mat_to_csv(matPath='../../project/main/objects/objectA.mat', output_path="./objectA.csv")
save_Object_mat_to_csv(matPath='../../project/main/objects/objectB.mat', output_path="./objectB.csv")
save_Object_mat_to_csv(matPath='../../project/main/objects/objectC.mat', output_path="./objectC.csv")

In [None]:
matrix = sio.loadmat('../../project/main/objects/objectA.mat')["vett"]

In [None]:
new_matrix = []
for i in matrix:
    new_matrix.append(i[0][0])
new_matrix

In [None]:
def R_csv_to_complete_csv(path):
    Rcsv = pd.read_csv(path)


In [None]:
def to_complete_csv(path = '../../project/main/output_s/new_found_relations.csv', output_path="./new_found_relations.csv"):
    pd_idx = load_Object(matPath='../../project/main/objects/objectA.mat')
    pd_col = load_Object(matPath='../../project/main/objects/objectC.mat')
    pd_complete = pd.DataFrame(data=[[0.0]*50]*50, index = pd_idx, columns=pd_idx)
    with open(path, "r") as F:
        for i in F.readlines():
            ap = i.split(',')
            key_x = ap[0]
            key_y = ap[1]
            value = float(ap[2])
            pd_complete[key_x][key_y] = value
    pd_complete.to_csv(output_path)
    return pd_complete
pd_complete = to_complete_csv()
pd_complete

In [None]:
data = []
npdata = pd_complete.to_numpy()
for i in range(len(npdata)):
    for j in range(len(npdata[i])):
        data.append([pd_complete.index[i], pd_complete.columns[j], 10*npdata[i][j]])

In [None]:
data

In [None]:
from pyecharts import options as opts
from pyecharts.charts import Bar3D

def bar3d_base() -> Bar3D:
    #data = [(i, j, random.randint(0, 12)) for i in range(6) for j in range(24)]
    c = (
        Bar3D()
        .add(
            "",
            data,
            xaxis3d_opts=opts.Axis3DOpts(type_="category"),
            yaxis3d_opts=opts.Axis3DOpts(type_="category"),
            zaxis3d_opts=opts.Axis3DOpts(type_="value"),
        )
        .set_global_opts(
            visualmap_opts=opts.VisualMapOpts(max_=10),
            title_opts=opts.TitleOpts(title="Bar3D-基本示例"),
        )
    )
    return c
c = bar3d_base()
c.render_notebook()

In [None]:
path = '../../project/main/output_s/new_found_relations.csv'
output_path="./new_found_relations.csv"
pd_idx = load_Object(matPath='../../project/main/objects/objectA.mat')
pd_col = load_Object(matPath='../../project/main/objects/objectA.mat')
pd_complete = pd.DataFrame(data=[[0]*50]*50, index = pd_idx, columns=pd_col)
with open(path, "r") as F:
    for i in F.readlines():
        ap = i.split(',')
        
        key_x = ap[0]
        key_y = ap[1]
        value = float(ap[2])
        print(value)
        pd_complete[key_x][key_y] = value
pd_complete

In [None]:
pd_idx = load_Object(matPath='../../project/main/objects/objectA.mat')
pd_col = load_Object(matPath='../../project/main/objects/objectC.mat')
pd_complete = pd.DataFrame(data=[[0]*40]*50, index = pd_idx, columns=pd_col)
pd_complete