# txt

## txt to dict in json

In [None]:
# read .txt file
import re
import json
input_dir = 'resources/txt/input.txt'
file_block_delim = 'Leon424_new_message\n'
file_line_delim = '\n'
this_turn_dir = 'resources/json/this_turn.json'
next_turn_dir = 'resources/json/next_turn.json'


"""_txt_structure_
Leon424_new_message
Integrated_Generative_Model_for_Industrial_Anomaly_Detection_via_Bidirectional_LSTM_and_Attention_Mechanism.pdf
1. Hidden Markov model (HMM)
2. Local Outlier Factor (LOF)
3. Generative Adversarial Network (GAN)
4. Bidirectional Long Short-Term Memory (LSTM)
5. Attention Mechanism (AM)

Leon424_new_message
Integrated_Generative_Model_for_Industrial_Anomaly_Detection_via_Bidirectional_LSTM_and_Attention_Mechanism.pdf
- LSTM
- GAN
- Attention mechanism
"""

def delete_ordered_list_number(input_text):
    if re.match(r'^\d+\. ', input_text):
        return input_text[re.match(r'^\d+\. ', input_text).span()[1]:]
    else:
        return input_text

def delete_unordered_list_symbol(input_text):
    if re.match(r'- ', input_text):
        return input_text[re.match(r'- ', input_text).span()[1]:]
    else:
        return input_text

def delete_space_at_the_beginning(input_text):
    if re.match(r'^\s+', input_text):
        return input_text[re.match(r'^\s+', input_text).span()[1]:]
    else:
        return input_text

def convert_comma_to_list(input_text):
    return input_text.split(',')

def delete_included_string_in_set(input_set):
    original_set = input_set.copy()
    for x in original_set:
        for y in original_set:
            if x in y and x != y and x in input_set:
                input_set.remove(x)
    return input_set

def delete_same_string_lower_in_set(input_set):
    original_set = input_set.copy()
    delete_dict = {}
    for x in original_set:
        for y in original_set:
            if x.lower() == y.lower() and x != y :
                delete_dict[x.lower()] = x
    for x in delete_dict.values():
        if x in input_set:
            input_set.remove(x)
    return input_set

def txt_to_json(input_dir, file_block_delim, file_line_delim, this_turn_dir, next_turn_dir):
    with open(input_dir, 'r') as f:
        input_txt = f.read()
    input_text_list = input_txt.split(file_block_delim)[1:]
    input_text_lists = [x.split(file_line_delim) for x in input_text_list]
    input_text_dict = {}

    for x in input_text_lists:
        if x[0] not in input_text_dict.keys():
            input_text_dict[x[0]] = {}
            message_index = 0
        else:
            if 'for references' in x[1].lower() and 'break' in x[1].lower():
                continue
            message_index += 1
        input_text_dict[x[0]][f'message_{message_index}'] = []
        for y in x[1:]:
            if y != '':
                y = delete_ordered_list_number(y)
                y = delete_unordered_list_symbol(y)
                if ',' in y:
                    y = convert_comma_to_list(y)
                    
                else:
                    y = [y]
                y = [delete_space_at_the_beginning(z) for z in y]
                input_text_dict[x[0]][f'message_{message_index}'] += y
    input_text_dict = {
        x: {y_k: y_v for y_k, y_v in y.items() if y_k !='message_0'} for x, y in input_text_dict.items()
    }
    input_text_dict_set = {
        x: list([value  for z in y.values() for value in z]) for x, y in input_text_dict.items()
    }
    input_text_dict_set = {
        x: delete_same_string_lower_in_set(delete_included_string_in_set(y)) for x, y in input_text_dict_set.items()
    }
    input_text_dict_set_num = {
        x: len(y) for x, y in input_text_dict_set.items()
    }

    this_turn_dict = {}
    next_turn_dict = {}
    for key, value in input_text_dict_set.items(): 
        if input_text_dict_set_num[key] > 10:
            next_turn_dict[key] = value
        else:
            this_turn_dict[key] = value

    for key, value in this_turn_dict.items():
        print(f'{key}: {value}')


    for key, value in next_turn_dict.items():
        print(f'{key}: {value}')

    with open(this_turn_dir, 'w') as f:
        json.dump(this_turn_dict, f ,indent=4)

    with open(next_turn_dir,'w') as f:
        json.dump(next_turn_dict, f, indent= 4)

txt_to_json(input_dir, file_block_delim, file_line_delim, this_turn_dir, next_turn_dir)

# json

## divide json

In [None]:
import json 
import os 

"""_json_structure_
{
"Masked_Swin_Transformer_Unet_for_Industrial_Anomaly_Detection.pdf": {
    "CNN-based anomaly detection algorithms": "Yes.",
    "CutPaste (data enhancement-based strategy)": "Uncertain"
}, 
"Multivariate_Time-Series_Prediction_in_Industrial_Processes_via_a_Deep_Hybrid_Network_Under_Data_Uncertainty.pdf": {
    "DCGNet": "Uncertain. Please provide more context or information.",
"""

input_dir = 'resources/json/message_output.json'
output_folder = 'resources/json/'

def get_yes_uncertain_json(input_dir, output_folder):
    with open(input_dir, 'r') as f:
        result_dict = json.load(f)

    yes_dict = {}
    uncertain_dict = {}

    for key, value in result_dict.items():
        yes_dict[key] = []
        uncertain_dict[key] = []
        for v_k, v_v in value.items():
            if 'yes' in v_v.lower():
                yes_dict[key].append(v_k)
            else: 
                uncertain_dict[key].append(v_k)

    for key, value in yes_dict.items():
        print(f'yes_{key}: {len(value)}')
        print(f'uncertain_{key}: {len(uncertain_dict[key])}') 

    with open(f'{output_folder}yes_dict.json', 'w') as f:
        json.dump(yes_dict, f, indent=4)

get_yes_uncertain_json(input_dir, output_folder)

In [None]:
import json 
import csv
import os 


input_folder = 'resources/json/'
json_dir_list = ['this_turn.json', 'yes_dict.json']
output_dir = os.path.join(input_folder, 'final_output.json')
def combine_json(input_folder, json_dir_list, output_dir):

    for i, json_dir in enumerate(json_dir_list):
        file_dir = os.path.join(input_folder, json_dir)
        with open(file_dir, 'r') as f:
            if i == 0:
                result_dict = json.load(f)
            else:
                result_dict.update(json.load(f))

    with open(output_dir, 'w') as f:
        json.dump(result_dict, f, indent=4)

combine_json(input_folder, json_dir_list, output_dir)

# # write json to csv
# with open('final_output.csv', 'w', newline='') as f:
#     writer = csv.writer(f)
#     writer.writerow(['name', 'message'])
#     for key, value in result_dict.items():
#         writer.writerow([key, ', '.join(value)])


In [None]:
# write json to csv

# import csv
# import json

# with open('final_output.json', 'r') as f:
#     result_dict = json.load(f)
# with open('final_output.csv', 'w', newline='') as f:
#     writer = csv.writer(f)
#     writer.writerow(['name', 'message'])
#     for key, value in result_dict.items():
#         writer.writerow([key, ', '.join(value)])

In [None]:
import json
import csv
input_md_dir = 'resources/md/review.md'
input_json_dir = 'resources/json/final_output.json'
output_dir = 'resources/csv/final_output.csv'

"""_md_structure_
# Detection

## Transfer learning

Process Monitoring Using Domain-Adversarial Probabilistic Principal Component Analysis: A Transfer Learning Framework, IEEE Transactions on Industrial Informatics, 2023

综述：文章提出了一种新的基于PPCA的迁移学习方法DAPPCA，利用PPCA进行特征提取，再通过logistic对提取到的特征进行所属域分类。PPCA特征提取器和域分类器之间的对抗能够使模型提取出各个域之间的共同特征，实现知识的迁移。变分推断用于训练模型，得到模型的参数。DAPPCA主要用于解决新的过程模式缺少故障数据的问题，在数值数据和工业数据上的测试结果证明了其能够提高对新过程模式的故障探测能力。

模型图：![image.png](review_files\attach_2_image.png)

案例数据：
- Simulated example
- Electrical Submersible Pump (ESP) 


Safety Poka Yoke in Zero-Defect Manufacturing Based on Digital Twins, IEEE Transactions on Industrial Informatics, 2023

综述：在这篇文章中，作者提出了基于主动学习-深度神经网络（AL-DNN）和领域对抗神经网络（DANN）的设备故障探测和诊断算法。此外，还为智能制造管理设计了一个数字孪生车间管理和控制系统。AL-DNN首先通过SDAE-based DNN以无监督的方式进行特征提取，再通过主动学习对提取到的特征进行故障探测。DANN通过域分类器和故障分类器之间的对抗提取域之间的通用特征。实验探索表明，AL-DNN算法的准确率高达99.248%，DANN的准确率可以提高20.256%，与传统算法相比具有更高的准确性。

模型图：![image.png](review_files\attach_3_image.png)
![image-2.png](review_files\attach_3_image-2.png)

案例数据：
- Case Western Reserve University bearing dataset

"""

"""paper name difference
md: Safety Poka Yoke in Zero-Defect Manufacturing Based on Digital Twins
json: Masked_Swin_Transformer_Unet_for_Industrial_Anomaly_Detection.pdf
"""
def if_delete_from_list(string):
    if string == '':
        return True
    if '综述' in string:
        return True
    if '模型图' in string:
        return True
    if string.startswith('![image'):
        return True
    if '案例数据' in string:
        return True
    if string.startswith('-'):
        return True
    else:
        return False
    
def write_models_to_csv_hierarchy(input_md_dir, input_json_dir, output_dir):
        
    with open(input_md_dir, 'r') as f:
        input_text = f.read()
    input_list = input_text.split('\n')
    input_list = [x for x in input_list if if_delete_from_list(x) == False]
    output_dict = {}

    for x in input_list:
        if x.startswith('# '):
            present_first_level = x[2:]
            present_second_level = ''
        elif x.startswith('## '):
            present_second_level = x[3:]
        else:
            title = x.split(',')[0]
            output_dict[title] = [present_first_level, present_second_level]

    hierarchy_dict = {}
    for key, value in output_dict.items():
        if value[0] not in hierarchy_dict.keys():
            hierarchy_dict[value[0]] = {}
        if value[1] not in hierarchy_dict[value[0]].keys():
            hierarchy_dict[value[0]][value[1]] = []
        hierarchy_dict[value[0]][value[1]].append(key)

    with open(input_json_dir, 'r') as f:
        model_name_dict = json.load(f)

    with open(output_dir, 'w', newline='') as f:
        writer = csv.writer(f)
        for key, value in hierarchy_dict.items():
            writer.writerow([key, 'relevant model'])
            for paper_list in value.values():
                for paper in paper_list:
                    try:
                        writer.writerow([paper, ', '.join(model_name_dict[paper.replace(' ', '_')+'.pdf'])])
                    except:
                        print(paper)

write_models_to_csv_hierarchy(input_md_dir, input_json_dir, output_dir)