# Read FHX file

In [1]:
import pandas as pd
import re
import numpy as np

In [2]:
PATH = r"/Users/Vesal/Desktop/AltaML/Project/Use Case 1/Data/OneDrive_1_1-13-2021/"#AltaML_Sample_FHX_SAMA_Data/"
# FILE_NAME = 'AltaML_FHX_Sample.txt'
FILE_NAME = 'Reduced_FHX_file.txt'

data = open(PATH + FILE_NAME, 'r').read().strip()

# Define Module Extraction Functions

## 1- MODULE_CLASS

In [3]:
def module_class_variables(data):
    
    module_class = data.split('\nMODULE_CLASS')[1:]
    
    module_class_lists = []
    actual_class_module = []
    
    if len(module_class) > 0:
        module_class = ['MODULE_CLASS' + modules for modules in  module_class]
        
        for module in module_class:

            regex=re.compile(r'NAME=.* CATEGORY')
            module_name = regex.findall(module)[0][6:-10]
            
            module_class_lists.append([module_name])
            actual_class_module.append(module)

        return actual_class_module, module_class_lists
    
    else:
        return [[np.nan]]

## 2- MODULE_INSTANCE

In [4]:
def module_instance_variables(data):
    
    instance_modul_1 = data.split('MODULE_INSTANCE')[1:]
    
    module_instance_lists = []
    actual_instance_module = []
    
    if len(instance_modul_1) > 0:
        instance_modul_1 = ['MODULE_INSTANCE' + inst_modu for inst_modu in  instance_modul_1]
        
        for inst in instance_modul_1:

            # tag:
            regex=re.compile(r'TAG=.* PLANT_AREA')
            tag = regex.findall(inst)[0][5:-12]
            
            module_instance_lists.append([tag])
            actual_instance_module.append(inst)


        return actual_instance_module, module_instance_lists
    
    else:
        return [[np.nan]]

## 3- MODULE (INDEPENDENT)

In [5]:
def independent_module_variables(data):
    
    independent_modules = data.split('MODULE TAG')[1:]
    
    independent_modul_lists = []
    actual_independent_module = []
    
    if len(independent_modules) > 0:
        independent_modules = ['MODULE TAG' + ind_modu for ind_modu in  independent_modules]
        
        for ind in independent_modules:

            # tag:
            regex=re.compile(r'TAG=.* PLANT_AREA')
            tag = regex.findall(ind)[0][5:-12]
            
            independent_modul_lists.append([tag])
            actual_independent_module.append(ind)


        return actual_independent_module, independent_modul_lists
    
    else:
        return [[np.nan]]

# Create module dataframe

## 1- Class module

In [6]:
actual_class_module, class_module_name = module_class_variables(data)
class_module = pd.DataFrame(class_module_name, columns=['module_name'])
class_module['module_type'] = 'MODULE_CLASS'
class_module

Unnamed: 0,module_name,module_type
0,KS_AI_SIS_HH,MODULE_CLASS
1,KS_AI_STD,MODULE_CLASS


## 2- Instance module

In [7]:
actual_inst_module, inst_module_name = module_instance_variables(data)
instance_module = pd.DataFrame(inst_module_name, columns=['module_name'])
instance_module['module_type'] = 'MODULE_INSTANCE'
instance_module

Unnamed: 0,module_name,module_type
0,TI_580920,MODULE_INSTANCE
1,PI_580911B,MODULE_INSTANCE


## 3- Independent module

In [8]:
actual_ind_module, ind_module_name = independent_module_variables(data)
independent_modules = pd.DataFrame(ind_module_name, columns=['module_name'])
independent_modules['module_type'] = 'MODULE'
independent_modules

Unnamed: 0,module_name,module_type
0,FI_580931,MODULE
1,FIC_580931,MODULE
2,FI_580921,MODULE
3,FIC_580921,MODULE
4,PY-580959,MODULE


# Combine all modules and select the desired ones

In [9]:
all_modules = independent_modules.append(class_module).append(instance_module).reset_index(drop=True)
all_modules 

Unnamed: 0,module_name,module_type
0,FI_580931,MODULE
1,FIC_580931,MODULE
2,FI_580921,MODULE
3,FIC_580921,MODULE
4,PY-580959,MODULE
5,KS_AI_SIS_HH,MODULE_CLASS
6,KS_AI_STD,MODULE_CLASS
7,TI_580920,MODULE_INSTANCE
8,PI_580911B,MODULE_INSTANCE


In [10]:
all_actual_modules = actual_ind_module + actual_class_module + actual_inst_module

In [11]:
selected_modules_id = [0,1,2,3,4,5,6,7,8]

# Create function blocks dataframes from selected modules

## 1- Define function_block extractor function

In [12]:
def function_block_variables(module, module_name):

    func_modul_1 = module.split('FUNCTION_BLOCK')[1:]
    
    func_lists = []
    
    if len(func_modul_1) > 0:
    
        func_modul_1 = ['FUNCTION_BLOCK' + func for func in  func_modul_1]
        
        for func in func_modul_1:

            # name:
            regex=re.compile(r'NAME=.* DEFINITION')
            func_name = regex.findall(func)[0][6:-12]

            # definition
            regex=re.compile(r'DEFINITION=.*')
            func_def = regex.findall(func)[0][12:-1]
            
            func_lists.append([module_name, func_name, func_def])

        return func_lists
    
    else:
        return [[module_name, np.nan, np.nan]]    

## 2- Find function blocks for each selected module

In [13]:
modules = data.split('\nMODULE_CLASS')[1:]
modules = ['MODULE_CLASS' + module for module in modules]

module_function_block_list = []
for module_id in selected_modules_id:
    module_function_block_list.append(function_block_variables(all_actual_modules[module_id], 
                                            all_modules.iloc[module_id]['module_name']))
    
module_function_block_list = [item for sublist in module_function_block_list for item in sublist]

In [14]:
function_blocks = pd.DataFrame(module_function_block_list, columns=['module_name', 'function_block_name', 
                                                  'functon_block_definition']).drop_duplicates().dropna()

function_blocks

Unnamed: 0,module_name,function_block_name,functon_block_definition
0,FI_580931,AI2,AI
1,FI_580931,PV-COMPARE,CALC
2,FI_580931,HI_HI_ALARM,CND
3,FI_580931,HI_ALARM,CND
4,FI_580931,LO_ALARM,CND
...,...,...,...
253,PI_580911B,VIV_CURVE,SGCR
254,PI_580911B,FGR_CURVE,SGCR
255,PI_580911B,AIR_CURVE,SGCR
257,PI_580911B,PDE2,PDE


In [15]:
# function_blocks['functon_block_definition'].value_counts()

In [16]:
fb_dict = pd.Series(function_blocks['functon_block_definition'].values,
                       index=function_blocks['function_block_name']).to_dict()

# Create attribute instance dataframes from selected modules

## 1- Define instance attribute extractor function

In [17]:
def attribute_instance_variables(module, module_name):

    attr_inst_modul_1 = module.split('ATTRIBUTE_INSTANCE')[1:]
    
    if len(attr_inst_modul_1) > 0:
        attr_inst_modul_1 = ['ATTRIBUTE_INSTANCE' + attr for attr in  attr_inst_modul_1]
        
        attr_instance_lists = []
        
        for attr_inst in attr_inst_modul_1:
            
            # attribute instant name
            regex=re.compile(r'NAME=.*')
            attr_instance_name = regex.findall(attr_inst)[0][6:-1]
            
            try:
                regex=re.compile(r'VALUE { REF=".*" }')
                attr_ref = regex.findall(attr_inst)[0][13:-3]
            except:
                attr_ref = np.nan
            
            attr_instance_lists.append([module_name, attr_instance_name, attr_ref])

        return attr_instance_lists

    else:
        return [[module_name, np.nan]]

In [18]:
def clean_ref_instance_attribute(df, column):
    intit_list = df[column].unique().tolist()
    
    clean_list = []

    for ref in intit_list:
        str_list = ref.split('//')
        str_list = list(filter(None, str_list))
        clean_list.append(str_list)

    clean_list = [x for y in clean_list for x in y]
    
    dictionary = {intit_list[i]: clean_list[i] for i in range(len(intit_list))} 
    
    df[column] = df[column].replace(dictionary)
    
    return df

## 2- Find instance attributes for each selected module

In [19]:
modules = data.split('\nMODULE_CLASS')[1:]
modules = ['MODULE_CLASS' + module for module in modules]

attr_inst_list = []
for module_id in selected_modules_id:
    attr_inst_list.append(attribute_instance_variables(all_actual_modules[module_id], 
                                            all_modules.iloc[module_id]['module_name']))
    
attr_inst_list = [item for sublist in attr_inst_list for item in sublist]

In [20]:
instance_attribute = pd.DataFrame(attr_inst_list, columns=['module_name', 'attribute_instance_name', 
                                                          'attr_instance_ref']).replace(
                                                        '', np.nan).dropna().reset_index(drop=True)

instance_attribute = clean_ref_instance_attribute(instance_attribute, 'attr_instance_ref')
instance_attribute.head()

Unnamed: 0,module_name,attribute_instance_name,attr_instance_ref
0,FI_580931,TEMP_IN,TI_580920/AI1/OUT
1,FI_580931,PRESS_IN,AUX_BMS_IO/PIT911B/OUT
2,FIC_580931,IN,FI_580931/AI1/OUT
3,FI_580921,TEMP_IN,TI_580920/AI1/OUT
4,FI_580921,PRESS_IN,AUX_BMS_IO/PIT911B/OUT


## 3- Create Reference df for instance attributes

In [21]:
def instance_attribute_reference_df(df, column):
    clean_list = df[column].unique().tolist()
    
    elements = []
    for ref in clean_list:
        str_list = ref.split('/')
        elements.append(str_list)
    
    padding_list = []
    for element in elements:
        pattern = ['', '', '']
        if len(element) < 3:
            pattern[len(element)-1:] = element
        elif len(element) > 3:
            pattern = element[:-1]
        else:
            pattern[:len(element)] = element

        padding_list.append(pattern)
    
    final_list = []
    for i, lis in enumerate(padding_list):
        lis += [clean_list[i]]
        final_list.append(lis)
        
    ref_table = pd.DataFrame(final_list, columns=['ref_module_name', 'ref_function_block', 'ref_attribute', 
                                              'attr_instance_ref'])
    
    final_ref_df = df.merge(ref_table)
    final_ref_df.loc[final_ref_df['ref_module_name'] == '', 'ref_module_name'] = final_ref_df['module_name']
    
    return final_ref_df

In [40]:
ia_ref_df = instance_attribute_reference_df(instance_attribute, 'attr_instance_ref')
ia_ref_df.head()

Unnamed: 0,module_name,attribute_instance_name,attr_instance_ref,ref_module_name,ref_function_block,ref_attribute
0,FI_580931,TEMP_IN,TI_580920/AI1/OUT,TI_580920,AI1,OUT
1,FI_580921,TEMP_IN,TI_580920/AI1/OUT,TI_580920,AI1,OUT
2,KS_AI_SIS_HH,TEMP_IN,TI_580920/AI1/OUT,TI_580920,AI1,OUT
3,KS_AI_SIS_HH,TEMP_IN,TI_580920/AI1/OUT,TI_580920,AI1,OUT
4,PI_580911B,TEMP_IN,TI_580920/AI1/OUT,TI_580920,AI1,OUT


In [23]:
ia_dict = pd.Series(ia_ref_df['ref_function_block'].values,
                       index=ia_ref_df['attribute_instance_name']).to_dict()
ia_dict

{'TEMP_IN': 'AI1',
 'PRESS_IN': 'PIT911B',
 'IO_IN': 'PIT911B',
 'IN': 'AI1',
 'INPUT': 'PIC_580919',
 'ALM_HYS': 'AI1',
 'HI_ALM_HYS': 'AI1',
 'HI_HI_ALM_HYS': 'AI1',
 'LO_ALM_HYS': 'AI1',
 'LO_LO_ALM_HYS': 'AI1',
 'AVTR1_BYPASS1': 'PIT911B',
 'HI_HI_LIM': 'PIT911B',
 'HI_LIM': 'PIT911B',
 'SCALE': 'PIT911B',
 'XMTR_FROZEN': 'PIT911B_ALM'}

# Find wire connections for selected modules

## 1- Define wire_connections extractor function

In [24]:
def wire_connections_variables(module, module_name):

    wire_modul_1 = module.split('WIRE SOURCE')[1:]
    
    if len(wire_modul_1) > 0:
        wire_modul_1 = ['WIRE SOURCE' + wire for wire in  wire_modul_1]
        
        wire_connection_lists = []
        
        for conn in wire_modul_1:

            # source
            regex=re.compile(r'SOURCE=.* D')
            source = regex.findall(conn)[0][8:-3]

            # destination
            regex=re.compile(r'DESTINATION=.*')
            destination = regex.findall(conn)[0][13:-1]
            
            wire_connection_lists.append([module_name, source, destination])

        return wire_connection_lists
    else:
        return [[module_name, np.nan, np.nan]]

## 2- Find wire connections for each selected module

In [25]:
modules = data.split('\nMODULE_CLASS')[1:]
modules = ['MODULE_CLASS' + module for module in modules]

wire_connection_list = []
for module_id in selected_modules_id:
    wire_connection_list.append(wire_connections_variables(all_actual_modules[module_id], 
                                            all_modules.iloc[module_id]['module_name']))
    
wire_connection_list = [item for sublist in wire_connection_list for item in sublist]

In [26]:
wire_connections = pd.DataFrame(wire_connection_list, columns=['module_name', 'source', 'destination']).dropna()

wire_connections

Unnamed: 0,module_name,source,destination
0,FI_580931,HI_HI_ALARM/OUT_D,AI1/HI_HI_ENABLE
1,FI_580931,HI_ALARM/OUT_D,AI1/HI_ENABLE
2,FI_580931,LO_ALARM/OUT_D,AI1/LO_ENABLE
3,FI_580931,LO_LO_ALARM/OUT_D,AI1/LO_LO_ENABLE
4,FI_580931,AI2/OUT,AGA_US1/IN
...,...,...,...
377,PI_580911B,ALM_HYS,LO_LO_ALM_HYS
378,PI_580911B,HI_HI_ALARM/OUT_D,AI1/HI_HI_ENAB
379,PI_580911B,HI_ALARM/OUT_D,AI1/HI_ENAB
380,PI_580911B,LO_ALARM/OUT_D,AI1/LO_ENAB


## 3- Find instance modules and replace them with their function block

In [27]:
wire_connections['source'] = wire_connections['source'].replace(ia_dict)
wire_connections['destination'] = wire_connections['destination'].replace(ia_dict)

wire_connections

Unnamed: 0,module_name,source,destination
0,FI_580931,HI_HI_ALARM/OUT_D,AI1/HI_HI_ENABLE
1,FI_580931,HI_ALARM/OUT_D,AI1/HI_ENABLE
2,FI_580931,LO_ALARM/OUT_D,AI1/LO_ENABLE
3,FI_580931,LO_LO_ALARM/OUT_D,AI1/LO_LO_ENABLE
4,FI_580931,AI2/OUT,AGA_US1/IN
...,...,...,...
377,PI_580911B,AI1,AI1
378,PI_580911B,HI_HI_ALARM/OUT_D,AI1/HI_HI_ENAB
379,PI_580911B,HI_ALARM/OUT_D,AI1/HI_ENAB
380,PI_580911B,LO_ALARM/OUT_D,AI1/LO_ENAB


## 4- Replace the source and distance function block names

In [28]:
wire_connections["destination"] = wire_connections.apply(lambda x: x["destination"].split("/")[0], axis=1)
wire_connections["source"] = wire_connections.apply(lambda x: x["source"].split("/")[0], axis=1)

wire_connections

Unnamed: 0,module_name,source,destination
0,FI_580931,HI_HI_ALARM,AI1
1,FI_580931,HI_ALARM,AI1
2,FI_580931,LO_ALARM,AI1
3,FI_580931,LO_LO_ALARM,AI1
4,FI_580931,AI2,AGA_US1
...,...,...,...
377,PI_580911B,AI1,AI1
378,PI_580911B,HI_HI_ALARM,AI1
379,PI_580911B,HI_ALARM,AI1
380,PI_580911B,LO_ALARM,AI1


In [36]:
cleaned_wire_connections = wire_connections.replace(fb_dict)

cleaned_wire_connections

Unnamed: 0,module_name,source,destination
0,FI_580931,CND,AIWCALARM
1,FI_580931,CND,AIWCALARM
2,FI_580931,CND,AIWCALARM
3,FI_580931,CND,AIWCALARM
4,FI_580931,AI,AGA_US
...,...,...,...
377,PI_580911B,AIWCALARM,AIWCALARM
378,PI_580911B,CND,AIWCALARM
379,PI_580911B,CND,AIWCALARM
380,PI_580911B,CND,AIWCALARM


In [38]:
cleaned_wire_connections.drop('module_name',1).drop_duplicates().to_csv('graph_def.csv', index=False)