In [53]:
import numpy as np
from utils import equipment_dict, string_integer_dict, equipment_characteristics,Trial_strings,string_to_equipment,string_to_simplestring,equipment_to_string

if len(equipment_dict) != len(string_integer_dict) or len(equipment_dict) != len(equipment_characteristics):
    raise ValueError("Equipment dictionary, string-integer mapping, and equipment characteristics must have the same length.")

for i,eq in equipment_dict.items():
    print(i,eq)

0 Feed
1 Mixer
2 Splitter
3 Heater
4 Cooler
5 HeatExchanger
6 Pump
7 CSTR
8 PFR
9 Flash
10 DistillationColumn
11 DistillationColumnwithRecycle
12 Compressor
13 Turbine
14 Product
15 Subbranch 1 starter
16 Subbranch 1 end point
17 Subbranch 1 connection point
18 Subbranch 2 starter
19 Subbranch 2 end point
20 Subbranch 2 connection point
21 Subbranch 3 starter
22 Subbranch 3 end point
23 Subbranch 3 connection point
24 HeatExchanger A
25 HeatExchanger B
26 Minorfeed
27 Minorproduct
28 End


In [54]:
basic_equipment = [eq for eq, char in equipment_characteristics.items() if char == "B"]
multiple_io_equipment = [eq for eq, char in equipment_characteristics.items() if char == "M"]
special_equipment = [eq for eq, char in equipment_characteristics.items() if char == "S"]

print("Basic equipment:", basic_equipment)
print("Multiple input/output equipment:", multiple_io_equipment)
print("Special equipment:", special_equipment,"\n")


Trial_eqs = {}
for name, equipment_string in Trial_strings.items():
    Trial_eqs[name] = string_to_equipment(equipment_string)

Trial_simplestrings = {}
for name, equipment_string in Trial_strings.items():
    Trial_simplestrings[name] = string_to_simplestring(equipment_string)

for name in Trial_eqs:
    print(f"{name}: {Trial_eqs[name]} -> {Trial_simplestrings[name]}")

Basic equipment: [3, 4, 6, 7, 8, 12, 13]
Multiple input/output equipment: [2, 9, 10, 11]
Special equipment: [0, 1, 5, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] 

CstrD: [0, 17, 1, 3, 7, 7, 4, 11, 14, 15, 6, 16, 28] -> F1MHCstrCstrCDP-1Pump1
PfrD: [0, 17, 1, 3, 8, 4, 11, 14, 15, 6, 16, 28] -> F1MHPfrCDP-1Pump1
Ned1: [0, 13, 24, 4, 12, 24, 3, 14, 28] -> FTHxCCompHxHP
Ned2: [0, 13, 24, 4, 12, 2, 3, 17, 1, 3, 14, 15, 24, 16, 28] -> FTHxCCompSH1MHP-1Hx1
Ned3: [0, 13, 24, 4, 12, 3, 2, 3, 17, 1, 3, 14, 15, 24, 16, 28] -> FTHxCCompHSH1MHP-1Hx1
Ned4: [0, 13, 24, 20, 1, 25, 4, 12, 2, 3, 17, 1, 2, 3, 14, 15, 24, 16, 18, 25, 13, 19, 28] -> FTHx2MHxCCompSH1MSHP-1Hx1-2HxT2
Ned5: [0, 13, 24, 20, 1, 25, 4, 12, 2, 3, 17, 1, 2, 3, 23, 1, 3, 14, 15, 25, 16, 18, 24, 2, 13, 19, 21, 22, 28] -> FTHx2MHxCCompSH1MSH3MHP-1Hx1-2HxST2-33


In [58]:

def connectivity(equipment,equipment_dict,process="generic"):
    """
    This function takes a list of equipment and checks the connectivity rules are satisfied.
    Rules:
    The length of the equipment list must not exceed max_length.
    Same equipment can only be used twice consecutively.
    The first equipment must be a feed.
    The last equipment must be an end.
    The product equipment must be present.
    There should be same number of subbranch starter tokens and multiple input/output equipment tokens.
    The number of subbranch starter tokens must match the number of subbranch end tokens and subbranch connector tokens.
    The subbranch starter, end, and connector tokens must be unique.
    The subbranch starter tokens must be used in the correct order.
    The subbranch end tokens must come after the corresponding subbranch starter tokens before any other subbranch starter.
    For every subbranch starter, there must be 2 subbranch tokens to indicate the end of the subbranch and connection point.
    For every subbranch starter, it should find the corresponding subbranch end before any other subbranch starter.
    The same subbranch token can not be followed by the same subbranch starter token.
    The subbranch tokens must be followed by a mixer if the same subbranch starter tokens have not been used before.
    Missing rules:
    - Generic Hx token rules
    """
    enumerated_equipment = list(enumerate(equipment))
    multi_io_equipment_tokens = [(i,e) for i, e in enumerated_equipment if e in multiple_io_equipment]
    # if len(equipment) > max_length:
    #     raise ValueError(f"Equipment list exceeds maximum length of {max_length}.")
    
    if equipment.count(0) != 1:
        raise ValueError("There must be exactly one feed in the equipment list.")
    if equipment.count(len(equipment_dict) - 1) != 1:
        raise ValueError("There must be exactly one end in the equipment list.")
    if equipment[0] != 0:
        raise ValueError("The first equipment must be a feed.")
    if equipment[-1] != len(equipment_dict) - 1:
        raise ValueError("The last equipment must be an end.")
    product_equipment = [(i,e) for i, e in enumerate(equipment) if e == 14]
    if len(product_equipment) != 1:
        raise ValueError("There must be exactly one product equipment in the equipment list.")
    if product_equipment[0][0] == 1:
        raise ValueError("The product equipment must not be the second equipment in the list.")
    if equipment[product_equipment[0][0] +1] != 15 and equipment[product_equipment[0][0] + 1] != len(equipment_dict) - 1:
        raise ValueError("The product equipment must be followed by a subbranch 1 starter or end token.")

    subbranch_starters_and_positions = [(i,e) for i, e in enumerate(equipment) if e in [15, 18, 21]]
    subbranch_ends_and_positions = [(i,e) for i, e in enumerate(equipment) if e in [16, 19, 22]]
    subbranch_connectors_and_positions = [(i,e) for i, e in enumerate(equipment) if e in [17, 20, 23]]
    #last equipment in the list must be a product or subbranch end
    if equipment[-2] not in [14, 16, 19, 22]:
        raise ValueError("The last equipment before the end token must be a product or subbranch end.")
    for (i, e) in subbranch_starters_and_positions:
        if i < product_equipment[0][0]:
            raise ValueError("Subbranch starters must come after the product equipment.")
    if len(subbranch_starters_and_positions) != len(multi_io_equipment_tokens):
        raise ValueError("The number of subbranch starter tokens must match the number of multiple input/output equipment tokens.")
    else:
        for (i1, e1), (i2, e2) in zip(subbranch_starters_and_positions, multi_io_equipment_tokens):
            if i1 < i2:
                raise ValueError("Subbranch starter tokens must come after the corresponding multiple input/output equipment tokens.")
            
    if len(subbranch_starters_and_positions) != len(subbranch_ends_and_positions):
        raise ValueError("The number of subbranch starters must match the number of subbranch ends.")
    if len(subbranch_starters_and_positions) != len(subbranch_connectors_and_positions):
        raise ValueError("The number of subbranch starters must match the number of subbranch connectors.")
    
    if len(subbranch_starters_and_positions) != len(set(e for i, e in subbranch_starters_and_positions)):
        raise ValueError("Subbranch starter tokens must be unique.")
    if len(subbranch_ends_and_positions) != len(set(e for i, e in subbranch_ends_and_positions)):
        raise ValueError("Subbranch end tokens must be unique.")
    if len(subbranch_connectors_and_positions) != len(set(e for i, e in subbranch_connectors_and_positions)):
        raise ValueError("Subbranch connector tokens must be unique.")

    if len(subbranch_starters_and_positions) == 1:
        if subbranch_starters_and_positions[0][1] != 15:
            raise ValueError("If there is only one subbranch starter, it must be Subbranch 1 starter.")
        if subbranch_ends_and_positions[0][1] != 16:
            raise ValueError("If there is only one subbranch starter, the corresponding end must be Subbranch 1 end point.")
        if subbranch_connectors_and_positions[0][1] != 17:
            raise ValueError("If there is only one subbranch starter, the corresponding connector must be Subbranch 1 connection point.")
        
    if len(subbranch_starters_and_positions) ==2:
        if subbranch_starters_and_positions[0][1] != 15 or subbranch_starters_and_positions[1][1] != 18:
            raise ValueError("If there are two subbranch starters, they must be Subbranch 1 starter and Subbranch 2 starter.")
        if subbranch_ends_and_positions[0][1] != 16 or subbranch_ends_and_positions[1][1] != 19:
            raise ValueError("If there are two subbranch starters, first end must be Subbranch 1 end point and second end must be Subbranch 2 end point.")
        if subbranch_ends_and_positions[0][0] >= subbranch_starters_and_positions[1][0]:
            raise ValueError("Subbranch 1 end must come before Subbranch 2 starter.")

    for i in range(len(subbranch_starters_and_positions)):
        if subbranch_starters_and_positions[i][0] >= subbranch_ends_and_positions[i][0]:
            raise ValueError("Subbranch end must come after the corresponding subbranch starter.")
    for i in range(len(subbranch_starters_and_positions) - 1):
        if subbranch_starters_and_positions[i][0] >= subbranch_starters_and_positions[i + 1][0]:
            raise ValueError("Subbranch starters must be used in the correct order.")
    for (i,e) in subbranch_connectors_and_positions:
            if equipment[i+1] != 1 and equipment[i+1] not in [16, 19, 22]:
                raise ValueError("Subbranch connectors must be followed by a mixer or subbranch end.")

    mixer_positions = [i for i, e in enumerate(equipment) if e == 1]
    for i in range(len(mixer_positions)):
        if equipment[mixer_positions[i] - 1] not in [17, 20, 23]:
            raise ValueError("Mixer must be preceded by a subbranch connector.")
    hxa_positions = [i for i, e in enumerate(equipment) if e == 24]
    if len(hxa_positions) != 2 and len(hxa_positions) != 0:
        raise ValueError("There must be exactly two HeatExchanger A tokens (Hxa) or none.")
    else:
        if len(hxa_positions) == 2:
            if hxa_positions[1] == hxa_positions[0] + 1:
                raise ValueError("HeatExchanger A tokens (Hxa) must not be consecutive.")
    hxb_positions = [i for i, e in enumerate(equipment) if e == 25]
    if len(hxb_positions) != 2 and len(hxb_positions) != 0:
        raise ValueError("There must be exactly two HeatExchanger B tokens (Hxb) or none.")
    else:
        if len(hxb_positions) == 2:
            if hxb_positions[1] == hxb_positions[0] + 1:
                raise ValueError("HeatExchanger B tokens (Hxb) must not be consecutive.")
    if process == "SCO2":
        # Check for SCO2 process specific rules
        # SCO2 processes must have at least 1 heater, 1 cooler, and 1 compressor and 1 turbine
        if equipment.count(3) < 1 or equipment.count(4) < 1 or equipment.count(12) < 1 or equipment.count(13) < 1:
            raise ValueError("SCO2 process must have at least one heater, one cooler, one compressor, and one turbine.")
        for i in range(len(equipment) - 2):
            if len(set(equipment[i:i+2])) == 1:
                raise ValueError("The same equipment cannot be used twice consecutively.") 

for name, equipment in Trial_eqs.items():
    try:
        connectivity(equipment, equipment_dict)
        print(f"{name} is valid.")
    except ValueError as e:
        print(f"{name} is invalid: {e}")

CstrD is valid.
PfrD is valid.
Ned1 is valid.
Ned2 is valid.
Ned3 is valid.
Ned4 is valid.
Ned5 is valid.


In [59]:
#Synthetic data generator
min_length = 8
max_length = 20
N = 1000
# Generate synthetic data
synthetic_data = []
failed_attempts = []
np.random.seed(42)  # For reproducibility
nabil_eq = [3,4,12,13]
eg_eq = [3,4,6,7,8]
main_branch_eq = nabil_eq
for _ in range(N):
    total_length = np.random.randint(min_length, max_length) - 3
    mixer_splitter_count = np.random.randint(0,4)
    hx_count = np.random.randint(0, 3)   
    equipment = np.random.choice(main_branch_eq, size=total_length, replace=True).tolist()                                                        
    if hx_count > 0:
        hx_positions = np.random.choice(range(total_length + 1), size=2, replace=False)
        for pos in hx_positions:
            equipment.insert(pos, 24)
    if hx_count > 1:
        hx_positions = np.random.choice(range(total_length + 1), size=2, replace=False)
        for pos in hx_positions:
            equipment.insert(pos, 25)
    for _ in range(mixer_splitter_count):
        equipment.insert(np.random.randint(0, total_length + 1),2) # Splitter
    if mixer_splitter_count > 0:
        connection_token = 17
        for _ in range(mixer_splitter_count):
            position = np.random.randint(0, total_length + 1)
            equipment.insert(position, 1)
            equipment.insert(position, connection_token)
            connection_token += 3
    main_branch_end = np.random.randint(4,len(equipment))
    if main_branch_end == position or main_branch_end == position + 1:
        main_branch_end = position + 2
    if mixer_splitter_count == 0:
        main_branch_end = len(equipment)
    equipment.insert(main_branch_end, 14)  # Product
    if mixer_splitter_count > 0:
        equipment.insert(main_branch_end + 1, 15)
        s1end = np.random.randint(min(main_branch_end + 2, len(equipment)), len(equipment) + 1)
        if mixer_splitter_count == 1:
            equipment.append(16)  # Subbranch 1 end
        else:
            equipment.insert(s1end, 16)  # Subbranch 1 end
    if mixer_splitter_count > 1:
        equipment.insert(s1end + 1, 18)  # Subbranch 2 starter
        s2end = np.random.randint(min(s1end + 2, len(equipment)), len(equipment) + 1)
        if mixer_splitter_count == 2:
            equipment.append(19)
        else:
            equipment.insert(s2end, 19)  # Subbranch 2 end
    if mixer_splitter_count > 2:
        equipment.insert(s2end + 1, 21)  # Subbranch 3 starter
        s3end = np.random.randint(min(s2end + 2, len(equipment)), len(equipment) + 1)
        if mixer_splitter_count == 3:
            equipment.append(22)
        else:
            equipment.insert(s3end, 22)  # Subbranch 3 end
    equipment.insert(0, 0)  # Feed
    equipment.append(len(equipment_dict) - 1)  # End    
    try:
        connectivity(equipment, equipment_dict, "SCO2")
        synthetic_data.append(equipment)
    except ValueError as e:
        failed_attempts.append((equipment, str(e)))
print(f"Generated {len(synthetic_data)} valid synthetic configurations.")
for i in range(len(synthetic_data)):
    print(f"{i+1}: {(equipment_to_string(synthetic_data[i]))}")

Generated 80 valid synthetic configurations.
1: FCHTHHxbSHxaTCComp1cMHHxbTP1sHHxaT1eEnd
2: FTHxaHxbTHxbCompCTHxaHPEnd
3: FCHCompCTCH1cMSHP1sC1eEnd
4: F3cMSCompC1cM2cMSHSP1sHxbHHxbHxaCHxaCT1e2s2e3s3eEnd
5: FTSHxa1cMCTHxaHTP1sComp1eEnd
6: F1cMCompHCompHxbSHCHxbTP1sTHxaTHxa1eEnd
7: FTHCompCTHPEnd
8: FCompTCompSHCompHxaHTCompHxaCompTH1cMCompHCompP1sC1eEnd
9: FHxaHCompHxaTCTPEnd
10: FCTSHHxbSHxaCHP1sHxbT1cMC2cMCHxaTCompC1e2sCHComp2eEnd
11: F1cMCSHxb2cMHxaTCompSP1s1e2sTCompHxbHxaHComp2eEnd
12: FHxb2cMSCompSComp3cMP1sComp1cMHxaTCSHxbHComp1e2sHxa2e3s3eEnd
13: FCompHTS1cMP1sCCompC1eEnd
14: F2cMSCHC1cMCompSHxaTCHCompHCHxaCHCompCP1s1e2sTC2eEnd
15: F3cMT2cMSCS1cMSHxaP1sCompHHxaHC1e2s2e3s3eEnd
16: FHxb1cM3cMHHxaSC2cMHxbSHxaCP1sC1e2sHSComp2e3sCHTC3eEnd
17: FTHHxaHHxaCompHCPEnd
18: FT2cMS3cMHSHxa1cMHxbHHxaCompHCompSTHxbCP1sHTC1e2s2e3s3eEnd
19: FCS1cMSComp3cMS2cMCCompHP1s1e2sTC2e3s3eEnd
20: FHxaHS2cMCompSTHxa1cMCHCompP1s1e2sTCT2eEnd
21: FCompHHxaHTCompHxaCTCompHTHPEnd
22: FS2cMHxa1cMHCTHxbHxaP1sHHxbHS

In [60]:
error_messages = {}
for equipment, error in failed_attempts:
    if error not in error_messages:
        error_messages[error] = []
    error_messages[error].append(equipment)
error_messages = {k: v for k, v in sorted(error_messages.items(), key=lambda item: len(item[1]), reverse=True)}
for error, equipments in error_messages.items():
    print(f"Error: {error}", {len(equipments)})
    for i in range(min(5, len(equipments))):
        print(f"  Equipment: {(equipment_to_string(equipments[i]))}")

Error: The same equipment cannot be used twice consecutively. {362}
  Equipment: FComp1cM2cMCompHHSCompCTHTSP1sCCC1e2sH2eEnd
  Equipment: FHxaHxbCompCHxaHTCTTHxbCCCCCTCHCompPEnd
  Equipment: FCompHxaCHTCompHHxaTTCHTCompCompCTPEnd
  Equipment: FCompCompCS1cM2cMSCP1sTHCompComp1e2sTCompH2eEnd
  Equipment: FTCompCompCompCTCCHCHHCTTPEnd
Error: Subbranch starter tokens must come after the corresponding multiple input/output equipment tokens. {273}
  Equipment: FCompComp3cMTSHSHP1s2cM1cM1e2sComp2e3sCSCompCompCompComp3eEnd
  Equipment: FHT1cMCCHTHHP1sCompSComp1eEnd
  Equipment: F1cM2cMS3cMP1sCompHCC1e2sSTS2e3s3eEnd
  Equipment: FTCompS2cP1sMSTComp3cM1c1e2sMHxaCompTCompH2e3sCompCHxaTHSTC3eEnd
  Equipment: F2cMTT3cMS1cMCHP1s1e2s2e3sSSC3eEnd
Error: SCO2 process must have at least one heater, one cooler, one compressor, and one turbine. {161}
  Equipment: FSHxbTHT2cMHxaCompHP1s1cMSHTHxbHxaCompH1e2s2eEnd
  Equipment: FTT3cMT1cM2cMSP1sSHS1e2sComp2e3s3eEnd
  Equipment: FCompHxaHxbHTTHHHxbHxaCompTHCom

In [64]:
#Analysis of generated synthetic data
#designs that has 1 subbranch
tris = 0
twos = 0
ones = 0
zeros = 0
hx2 = 0
hx1 = 0
hx0 = 0
for equipment in synthetic_data:
    if equipment.count(21) == 1:
        tris += 1
    elif equipment.count(18) == 1:
        twos += 1
    elif equipment.count(15) == 1:
        ones += 1
    else:
        zeros += 1
    if equipment.count(25) == 2:
        hx2 += 1
    elif equipment.count(24) == 2:
        hx1 += 1
    elif equipment.count(24) == 0 and equipment.count(25) == 0:
        hx0 += 1
print(f"Number of designs with 1 subbranch: {ones}")
print(f"Number of designs with 2 subbranches: {twos}")
print(f"Number of designs with 3 subbranches: {tris}")
print(f"Number of designs with no subbranches: {zeros}")
print(f"Number of designs with 1 HeatExchanger A: {hx1}")
print(f"Number of designs with 2 HeatExchanger A: {hx2}")
print(f"Number of designs with no HeatExchanger A: {hx0}")

print(len(np.unique(synthetic_data)))

Number of designs with 1 subbranch: 20
Number of designs with 2 subbranches: 19
Number of designs with 3 subbranches: 17
Number of designs with no subbranches: 24
Number of designs with 1 HeatExchanger A: 30
Number of designs with 2 HeatExchanger A: 30
Number of designs with no HeatExchanger A: 20
80


In [62]:
synthetic_data = np.array(synthetic_data, dtype=object)
failed_attempts = np.array(failed_attempts, dtype=object)
np.save('sco2_sd.npy', synthetic_data, allow_pickle=True)
np.save('sco2_fa.npy', failed_attempts, allow_pickle=True)