# 数据整合

整合目标：
1. 文件描述
2. 命名空间描述
3. 类描述
4. 方法描述



In [2]:
import os
import tinydb
import time
import pandas as pd

In [3]:
import json

# 1. 读取数据

In [4]:
file_db_lst = tinydb.TinyDB('../data/results/amc-rough_csfile_db-dscoderv2lite.json').all()
namespace_db_lst = tinydb.TinyDB('../data/results/amc-rough_namespace_db-dscoderv2lite.json').all()
class_db_lst = tinydb.TinyDB('../data/results/amc-rough_class_db-dscoderv2lite.json').all()
method_db_lst = tinydb.TinyDB('../data/results/amc-rough_method_db-full.json').all()

In [9]:
file_db_map = {n['filepath']:n['desc'] for n in file_db_lst}

namespace_db_map = {n['namespace']:n['desc'] for n in namespace_db_lst}

class_db_map = {}
for cnode in class_db_lst:
    if not cnode['filepath'] in class_db_map:
        class_db_map[cnode['filepath']] = {}
    class_db_map[cnode['filepath']][cnode['classname']] = cnode['desc']

project_meta = json.load(open('../data/processed/amc-by-tree-sitter.json','r'))

method_db_map = {}
for node in method_db_lst:
    fpath = node['filepath']
    class_name = node['classname']
    method_name = node['methodname']
    desc = node['desc']
    if not fpath in method_db_map:
        method_db_map[fpath] = {}
    if not class_name in method_db_map[fpath]:
        method_db_map[fpath][class_name] = {}
    if not method_name in method_db_map[fpath][class_name]:
        method_db_map[fpath][class_name][method_name] = desc

# 2. 整合数据

In [15]:
amc_doc = {}
for k in project_meta:
    node = project_meta[k]
    fname = node['filename']
    fpath = node['filepath']
    namespace = node['namespace_key']
    if not namespace in amc_doc:
        amc_doc[namespace] = {
                            'namespace_doc':namespace_db_map[namespace],
                             'file_list':[]
        }
    csfile_doc_node = {
                    'filepath':fpath,
                      'file_doc':file_db_map[fpath],
                    'class_list':[]
    }
    for class_node in node['source_class']:
        class_name = class_node['class_name']
        class_doc_node = {'classname':class_name,
                         'class_doc':class_db_map[fpath][class_name],
                          'method_list':[]
                         }
        if fpath in method_db_map:
            for method_node in class_node['methods']:
                method_name = method_node['method_name']
                if (class_name in method_db_map[fpath]) and (method_name in method_db_map[fpath][class_name]):
                    method_doc_node = {'methodname':method_name,
                      'method_doc':method_db_map[fpath][class_name][method_name]
                                      }
                else:
                    method_doc_node = {'methodname':method_name,
                      'method_doc':''
                                      }
                class_doc_node['method_list'].append(method_doc_node)
        csfile_doc_node['class_list'].append(class_doc_node)
    amc_doc[namespace]['file_list'].append(csfile_doc_node)

## 2.1 输出json

In [16]:
json.dump(amc_doc,open('../data/results/doc_amc_241108.json','w',encoding='utf-8'))

# 3. 生成md文件

In [63]:
# import mistletoe

# with open('foo.md', 'r') as fin:
#     rendered = mistletoe.markdown(fin)