# 转换`.mat`文件为`.csv`文件

In [None]:
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
class MatCsvPair:
    def __init__(self):
        print("init")
        
    def getOriginalRelationPair(self, name="R1-3"):
        return ('../../project/main/matrices/{}.mat'.format(name), "./{}.csv".format(name))
    
    def getObjectPair(self, name="objectA"):
        return ('../../project/main/objects/{}.mat'.format(name), "./{}.csv".format(name))
    
    def getSAME_NewFoundRelationPair(self, name="new_found_relations"):
        return ('../../project/main/output_s/{}.mat'.format(name), "./{}.csv".format(name))
    
    def getDIFF_NewFoundRelationPair(self, name="new_found_relations_row"):
        return ('../../project/main/output_d/{}.mat'.format(name), "./{}.csv".format(name))

pathPair = MatCsvPair()

## Object

包括 ObjectA, ObjectB, ObjectC 等

In [None]:
def load_Object(mat_path, key = "vett"):
    """加载mat_path文件中标签为key的那个矩阵，返回构造好的list
    """
    matrix = sio.loadmat(mat_path)[key]
    new_matrix = []
    for i in matrix:
        new_matrix.append(i[0][0])
    return new_matrix

In [None]:
def save_Object_mat_to_csv(mat_path, output_path):
    new_matrix = load_Object(mat_path)
    df_matrix = pd.DataFrame(new_matrix)
    df_matrix.to_csv(output_path)
    return df_matrix

In [None]:
save_Object_mat_to_csv('../../project/main/objects/objectA.mat', "./objectA.csv")

In [None]:
save_Object_mat_to_csv('../../project/main/objects/objectB.mat', "./objectB.csv")

In [None]:
save_Object_mat_to_csv('../../project/main/objects/objectC.mat', "./objectC.csv")

## R 矩阵和 $\theta$ 矩阵

R 矩阵，包括 R1-2.mat, R1-3.mat, R2-3.mat, R2-1.mat, R2-3.mat, R3-1.mat

R 矩阵是不同数据类型的关系矩阵。R 矩阵不一定是方阵。

$\theta$ 矩阵，包括 T1-1.mat, T2-1.mat, T3-1.mat

$\theta$ 矩阵是相同数据类型的关系矩阵。$\theta$ 矩阵一定是方阵。

In [None]:
def save_mat_to_csv(key, path, output_path):
    matrix = sio.loadmat(path)[key].todense()
    matrix_data = []
    for i in np.asarray(matrix):
        matrix_data.append([j for j in i])
    df_matrix = pd.DataFrame(matrix_data)
    df_matrix.to_csv(output_path)
    return df_matrix

In [None]:
def save_OriginalR_mat_to_csv(path = '../../project/main/matrices/R1-3.mat', output_path = "./R.csv"):
    return save_mat_to_csv("R_matr", path, output_path)

In [None]:
save_OriginalR_mat_to_csv('../../project/main/matrices/R1-3.mat', "./R1-3.csv")

In [None]:
def save_OriginalTheta_mat_to_csv(path = '../../project/main/matrices/T1-1.mat', output_path = "./T1-1.csv"):
    return save_mat_to_csv("teta_matr", path, output_path)

In [None]:
save_OriginalTheta_mat_to_csv('../../project/main/matrices/T1-1.mat', "./T1-1.csv")

## new_found_relations : SAME MODE and DIFFERENT MODE

`new_found_relations.mat` in SAME MODE is generated from `R1-3.mat`, etc.

`new_found_relations_row.mat` and `new_found_relations_col.mat` in DIFFERENT MODE is generated from `Theta.mat`, etc.

他的是压缩成csv，需要把0补上去

In [None]:
def load_relation(pd_idx, pd_col, relation_path):
    pd_complete = pd.DataFrame(data=[[0.0]*len(pd_col)]*len(pd_idx), index = pd_idx, columns=pd_col)
    with open(relation_path, "r") as F:
        for i in F.readlines():
            ap = i.split(',')
            key_x = ap[0]
            key_y = ap[1]
            value = float(ap[2])
            pd_complete[key_y][key_x] = value
    return pd_complete

In [None]:
def save_SAME_MODE_new_found_relations_mat_to_csv(
    idx_obj_path='../../project/main/objects/objectA.mat',
    relation_path = '../../project/main/output_s/new_found_relations.csv', 
    output_path="./new_found_relations.csv"):
    pd_idx = load_Object(idx_obj_path)
    pd_complete = load_relation(pd_idx, pd_idx, relation_path)
    pd_complete.to_csv(output_path)
    return pd_complete

In [None]:
save_SAME_MODE_new_found_relations_mat_to_csv(
    idx_obj_path='../../project/main/objects/objectA.mat',
    relation_path='../../project/main/output_s/new_found_relations.csv', 
    output_path="./new_found_relations.csv")

In [None]:
def save_DIFF_MODE_new_found_relations_mat_to_csv(
    idx_obj_path='../../project/main/objects/objectB.mat',
    col_obj_path='../../project/main/objects/objectA.mat',
    relation_path = '../../project/main/output_d/new_found_relations_row.csv',
    output_path = "./new_found_relations.csv"):
    pd_idx = load_Object(idx_obj_path)
    pd_col = load_Object(col_obj_path)
    pd_complete = load_relation(pd_idx, pd_col, relation_path)
    pd_complete.to_csv(output_path)
    return pd_complete

In [None]:
save_DIFF_MODE_new_found_relations_mat_to_csv(
    idx_obj_path='../../project/main/objects/objectB.mat',
    col_obj_path='../../project/main/objects/objectA.mat',
    relation_path='../../project/main/output_d/new_found_relations_row.csv', 
    output_path="./new_found_relations_row.csv")

In [None]:
save_DIFF_MODE_new_found_relations_mat_to_csv(
    idx_obj_path='../../project/main/objects/objectB.mat',
    col_obj_path='../../project/main/objects/objectA.mat',
    relation_path='../../project/main/output_d/new_found_relations_col.csv',
    output_path="./new_found_relations_col.csv")

看下shape

In [None]:
pd_col = load_Object(mat_path='../../project/main/objects/objectC.mat')
len(pd_col)

In [None]:
def get_matrix_shape_likely(path = '../../project/main/output_d/new_found_relations_row.csv'):
    key_x = []
    key_y = []

    with open(path, "r") as F:
        for i in F.readlines():
            ap = i.split(',')
            key_x.append(ap[0])
            key_y.append(ap[1])

    key_x = list(set(key_x))
    key_y = list(set(key_y))
    
    print(key_x, key_y)

    return (key_x, key_y)