# Sensor placement

## imports

In [1]:
#imports
import pandas as pd
import re

In [2]:
#build df from database exctracted data
df = pd.read_csv('LayoutID_occurence.csv')
#inspest column
df.head()

Unnamed: 0,LayoutID,Occurrence,buildingIDs,KBn
0,gas:1--heating-circuit:1--warm-water:1,119,"[768, 769, 770, 778, 787, 1924, 1926, 1927, 46...",1
1,district-heating:1--heating-circuit:1--warm-wa...,82,"[774, 1909, 1910, 1911, 1912, 1913, 1314, 1315...",2
2,gas:1--heating-circuit:1,71,"[758, 759, 499, 1525, 1649, 1654, 1657, 1791, ...",3
3,gas:1--heat-exchanger:1--heating-circuit:1--wa...,46,"[1925, 1931, 1781, 1509, 1526, 1527, 1531, 153...",4
4,local-heating-station:1--heating-circuit:1--wa...,41,"[509, 790, 1005, 1628, 1629, 1630, 1631, 672, ...",5


## extract Words to build the sensor_rules_indexed

In [3]:
# Sample: df['LayoutID_Tops'] = your actual column
def extract_words(layout_str):
    # Split by '--', then split each part by ':'
    parts = layout_str.split('--')
    words = []
    for part in parts:
        words.extend(part.split(':'))
    return words

# Apply and flatten the list
all_words = df['LayoutID'].dropna().apply(extract_words).explode()

# Get unique values
unique_words = all_words.unique().tolist()

# Optional: sort alphabetically or numerically
unique_words.sort()

#get rid of pipes and numbers
cleaned_words = [
    word for word in unique_words
    if not word.isdigit() and
    'pipe' not in word.lower() and
    'valve' not in word.lower()
]

In [4]:
cleaned_words

['1-heating-circuit',
 'buffer-tank',
 'chp',
 'district-heating',
 'gas',
 'global-separation-circuit',
 'heat-exchanger',
 'heat-pump',
 'heating-circuit',
 'hydraulic-separator',
 'local-heating-station',
 'solar-thermal',
 'warm-water']

## clean if need be

In [5]:
# gind invalid LayoutIDs -> clean row 158
def find_invalid_parts(layout_column):
    for i, layout in enumerate(layout_column):
        if i == 158:
            layout = layout.replace('heat-exchanger:1-heating-circuit:1', 'heat-exchanger:1--heating-circuit:1')
            df.at[i, 'LayoutID'] = layout  # update corrected layout back in DataFrame
        for part in layout.split('--'):
            if len(part.split(':')) != 2:
                print(f"Row {i} - Invalid part: {part}")
find_invalid_parts(df['LayoutID'])

##  Matching acronym

In [6]:
sensor_rules_indexed = {
    'buffer-tank': ['t_buffer(n)_tank_1'],
    'chp': ['t_chp(n)_flow', 't_chp(n)_return'],
    'district-heating': ['t_dh_primflow', 't_dh_primreturn'],
    'gas': ['t_b(n)_flow', 't_b(n)_flow'],
    'heating-circuit': ['t_hc(n)_flow', 't_hc(n)_return'],
    'heat-pump': ['t_hp(n)_primflow', 't_hp(n)_primreturn', 't_hp(n)_secflow', 't_hp(n)_secreturn'],
    'local-heating-station': ['t_lh(n)_subsflow', 't_lh(n)_subsreturn'],
    'solar-thermal': ['t_st(n)_flow', 't_st(n)_return'],
    'warm-water': ['t_hw(n)_return', 't_hw(n)_tank', 't_hw(n)_pwh', 't_hw(n)_circ']
}

## generate sensor placement

In [7]:
def generate_sensor_placement(layout):
    if pd.isna(layout):
        return ''
        
    parts = layout.split('--')
    sensor_list = []
    idx = 1  # global sensor index in parentheses

    for part in parts:
        if ':' not in part:
            continue  # skip malformed parts

        try:
            system_type, system_index = part.split(':')
        except ValueError:
            continue  # malformed part (e.g., multiple colons)

        try:
            i = int(system_index)
        except ValueError:
            continue  # skip if index is not a number

        if system_type in sensor_rules_indexed:
            sensors = sensor_rules_indexed[system_type]
            for label in sensors:
                # Replace (n) with actual index (hli = i)
                label_with_hli = re.sub(r'\(n\)', f'(hli={i})', label)
                sensor_list.append(f"({idx}: {label_with_hli})")
                idx += 1
    return ','.join(sensor_list)
    
df['sensor_placement'] = df['LayoutID'].apply(generate_sensor_placement)
df['sensor_count'] = df['sensor_placement'].str.count(r'\(\d+:')  # counts sensor entries

In [8]:
df.head()

Unnamed: 0,LayoutID,Occurrence,buildingIDs,KBn,sensor_placement,sensor_count
0,gas:1--heating-circuit:1--warm-water:1,119,"[768, 769, 770, 778, 787, 1924, 1926, 1927, 46...",1,,
1,district-heating:1--heating-circuit:1--warm-wa...,82,"[774, 1909, 1910, 1911, 1912, 1913, 1314, 1315...",2,,
2,gas:1--heating-circuit:1,71,"[758, 759, 499, 1525, 1649, 1654, 1657, 1791, ...",3,,
3,gas:1--heat-exchanger:1--heating-circuit:1--wa...,46,"[1925, 1931, 1781, 1509, 1526, 1527, 1531, 153...",4,,
4,local-heating-station:1--heating-circuit:1--wa...,41,"[509, 790, 1005, 1628, 1629, 1630, 1631, 672, ...",5,,


## Exports

In [9]:
#export
if True:
    df.to_csv('sensor_placement_for_BKn.csv', index=False)