# Sensor placement

## imports

In [25]:
#imports
import pandas as pd
import re
import os

In [26]:
#build df from database exctracted data
filename='new_LayoutID_occurence_2025-07-14.csv'
filepath = os.path.join('OUTs', filename)
df = pd.read_csv(filepath)
#inspest column
df.head()

Unnamed: 0,new_layoutID,Occurrence,KBn,buildingIDs,LayoutID
0,gas:1--heating-circuit_mix+ctl:1--warm-water_i...,128,KB1,"[768, 769, 770, 778, 787, 1924, 1926, 1927, 46...",{'gas:1--heating-circuit:1--warm-water:1'}
1,gas:1--heating-circuit_mix+ctl:1,77,KB3,"[758, 759, 499, 1525, 1649, 1654, 1657, 1791, ...",{'gas:1--heating-circuit:1'}
2,gas:1--heat-exchanger:1--heating-circuit_mix+c...,46,KB4,"[1925, 1931, 1781, 1509, 1526, 1527, 1531, 153...",{'gas:1--heat-exchanger:1--heating-circuit:1--...
3,local-heating-station_noHX+ctlprim+ctlsec:1--h...,43,KB5,"[509, 790, 1005, 1628, 1629, 1630, 1631, 672, ...",{'local-heating-station:1--heating-circuit:1--...
4,gas:1--heat-exchanger:1--heating-circuit_mix+c...,40,KB6,"[1524, 1795, 1560, 1638, 1639, 604, 617, 623, ...",{'gas:1--heat-exchanger:1--heating-circuit:1'}


## detection of Invalid LayoutIDs

In [27]:
# gind invalid LayoutIDs -> clean row 158
def find_invalid_parts(layout_column):
    for i, layout in enumerate(layout_column):
        for part in layout.split('--'):
            if len(part.split(':')) != 2:
                print(f"Row {i} - Invalid part: {part}")
find_invalid_parts(df['LayoutID'])

Row 295 - Invalid part: heat-exchanger:1-heating-circuit:1


In [28]:
# Sample: df['LayoutID_Tops'] = your actual column
def extract_words(layout_str):
    # Split by '--', then split each part by ':'
    parts = layout_str.split('--')
    words = []
    for part in parts:
        words.extend(part.split(':'))
    return words

# Apply and flatten the list
all_words = df['new_layoutID'].dropna().apply(extract_words).explode()

# Get unique values
unique_words = all_words.unique().tolist()

# Optional: sort alphabetically or numerically
unique_words.sort()

#get rid of pipes and numbers
cleaned_words = [
    word for word in unique_words
    if not word.isdigit() and
    'pipe' not in word.lower() and
    'valve' not in word.lower()
]

In [29]:
cleaned_words

['1-heating-circuit',
 'buffer-tank',
 'chp',
 'district-heating_HX+ctl',
 'district-heating_HX+unctl',
 'district-heating_noHX+ctl',
 'district-heating_noHX+unctl',
 'gas',
 'global-separation-circuit',
 'heat-exchanger',
 'heat-pump',
 'heating-circuit_mix+ctl',
 'heating-circuit_mix+unctl',
 'heating-circuit_unmix+ctl',
 'heating-circuit_unmix+unctl',
 'hydraulic-separator',
 'local-heating-station_HX+ctlprim+ctlsec',
 'local-heating-station_noHX+ctlprim+ctlsec',
 'solar-thermal',
 'warm-water_external',
 'warm-water_internal']

##  Matching acronym

In [6]:
sensor_rules_indexed = {
    'buffer-tank': ['t_buffer(n)_tank_1'],
    'chp': ['t_chp(n)_flow', 't_chp(n)_return'],
    'district-heating': ['t_dh_primflow', 't_dh_primreturn'],
    'gas': ['t_b(n)_flow', 't_b(n)_flow'],
    'heating-circuit': ['t_hc(n)_flow', 't_hc(n)_return'],
    'heat-pump': ['t_hp(n)_primflow', 't_hp(n)_primreturn', 't_hp(n)_secflow', 't_hp(n)_secreturn'],
    'local-heating-station': ['t_lh(n)_subsflow', 't_lh(n)_subsreturn'],
    'solar-thermal': ['t_st(n)_flow', 't_st(n)_return'],
    'warm-water': ['t_hw(n)_return', 't_hw(n)_tank', 't_hw(n)_pwh', 't_hw(n)_circ']
}

## generate sensor placement

In [7]:
def generate_sensor_placement(layout):
    if pd.isna(layout):
        return ''
        
    parts = layout.split('--')
    sensor_list = []
    idx = 1  # global sensor index in parentheses

    for part in parts:
        # Validate part has exactly one colon (system_type:index)
        if part.count(':') != 1:
            return "wrong LayoutID format"

        try:
            system_type, system_index = part.split(':')
        except ValueError:
            return "wrong LayoutID format"

        # Check if index is an integer
        try:
            i = int(system_index)
        except ValueError:
            return "wrong Layout ID format"

        if system_type in sensor_rules_indexed:
            sensors = sensor_rules_indexed[system_type]
            for label in sensors:
                label_with_hli = re.sub(r'\(n\)', f'(hli={i})', label)
                sensor_list.append(f"({idx}: {label_with_hli})")
                idx += 1

    return ','.join(sensor_list)
    
df['sensor_placement'] = df['LayoutID'].apply(generate_sensor_placement)
df['sensor_count'] = df['sensor_placement'].str.count(r'\(\d+:')  # counts sensor entries

In [8]:
df.iloc[156:161]

Unnamed: 0,LayoutID,Occurrence,buildingIDs,KBn,sensor_placement,sensor_count
156,district-heating:1--down-right-pipes:1--heat-e...,1,[1879],157,"(1: t_dh_primflow),(2: t_dh_primreturn),(3: t_...",10
157,district-heating:1--buffer-tank:1--heating-cir...,1,[711],158,"(1: t_dh_primflow),(2: t_dh_primreturn),(3: t_...",9
158,district-heating:1--buffer-tank:1--heating-cir...,1,[335],159,"(1: t_dh_primflow),(2: t_dh_primreturn),(3: t_...",7
159,district-heating:1--heat-exchanger:1-heating-c...,1,[1801],160,wrong LayoutID format,0
160,district-heating:1--heating-circuit:1--heating...,1,[629],161,"(1: t_dh_primflow),(2: t_dh_primreturn),(3: t_...",20


## Exports

In [9]:
#export
if True:
    df.to_csv('sensor_placement_for_BKn.csv', index=False)

In [10]:
df['LayoutID'].iloc[158]

'district-heating:1--buffer-tank:1--heating-circuit:1--heating-circuit:2'