# Sensor placement

## imports

In [1]:
#imports
import pandas as pd
import re
import os

In [2]:
#build df from database exctracted data
filename='LayoutID_occurence_new_std_2025-07-15.csv'
extraced_date=filename[-14:-4]
filepath = os.path.join('OUTs', filename)
df = pd.read_csv(filepath)
#inspest column
df.head()

Unnamed: 0,LayoutID_Tops,Occurrence,most_common_count,new_sensor_count,Variations_KBn,buildingIDs,Variations,sensor_count_set
0,gas:1--heating-circuit:1--warm-water:1,187,8,{8},"KB1, KB4, KB15","[768, 769, 770, 778, 787, 304, 1924, 1925, 192...",{'gas:1--global-separation-circuit:1--heating-...,"{0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 22}"
1,gas:1--heating-circuit:1,132,4,{4},"KB3, KB6, KB14","[758, 759, 499, 507, 1524, 1525, 1649, 1654, 1...","{'gas:1--heating-circuit:1', 'gas:1--heat-exch...","{0, 2, 3, 4, 5, 6, 7, 8}"
2,district-heating:1--heating-circuit:1--warm-wa...,84,8,{8},KB2,"[774, 1909, 1910, 1911, 1912, 1913, 1314, 1315...",{'district-heating:1--heating-circuit:1--warm-...,"{0, 6, 7, 8, 9}"
3,gas:1--heating-circuit:1--heating-circuit:2--w...,51,10,{10},"KB7, KB23, KB37, KB197","[1775, 1779, 2219, 1511, 1515, 1516, 1517, 151...",{'gas:1--heat-exchanger:1--heating-circuit:1--...,"{0, 8, 9, 10, 11, 12, 14}"
4,local-heating-station:1--heating-circuit:1--wa...,45,8,{8},KB5,"[509, 790, 1005, 1628, 1629, 1630, 1631, 672, ...",{'local-heating-station:1--heating-circuit:1--...,"{0, 5, 6, 7, 8}"


## detection of Invalid LayoutIDs

In [3]:
# gind invalid LayoutIDs -> clean row 158
def find_invalid_parts(layout_column):
    for i, layout in enumerate(layout_column):
        for part in layout.split('--'):
            if len(part.split(':')) != 2:
                print(f"Row {i} - Invalid part: {part}")
find_invalid_parts(df['LayoutID_Tops'])

In [4]:
# Sample: df['LayoutID_Tops'] = your actual column
def extract_words(layout_str):
    # Split by '--', then split each part by ':'
    parts = layout_str.split('--')
    words = []
    for part in parts:
        words.extend(part.split(':'))
    return words

# Apply and flatten the list
all_words = df['LayoutID_Tops'].dropna().apply(extract_words).explode()

# Get unique values
unique_words = all_words.unique().tolist()

# Optional: sort alphabetically or numerically
unique_words.sort()

#get rid of pipes and numbers
cleaned_words = [
    word for word in unique_words
    if not word.isdigit() and
    'pipe' not in word.lower() and
    'valve' not in word.lower()
]

In [5]:
cleaned_words

['buffer-tank',
 'chp',
 'district-heating',
 'gas',
 'heat-pump',
 'heating-circuit',
 'local-heating-station',
 'solar-thermal',
 'warm-water',
 'warm-water_external']

##  Matching acronym

In [6]:
sensor_rules_indexed = {
    'buffer-tank': ['t_buffer(n)_tank_1'],
    'chp': ['t_chp(n)_flow', 't_chp(n)_return'],
    'district-heating': ['t_dh_primflow', 't_dh_primreturn'],
    'gas': ['t_b(n)_flow', 't_b(n)_flow'],
    'heating-circuit': ['t_hc(n)_flow', 't_hc(n)_return'],
    'heat-pump': ['t_hp(n)_primflow', 't_hp(n)_primreturn', 't_hp(n)_secflow', 't_hp(n)_secreturn'],
    'local-heating-station': ['t_lh(n)_subsflow', 't_lh(n)_subsreturn'],
    'solar-thermal': ['t_st(n)_flow', 't_st(n)_return'],
    'warm-water': ['t_hw(n)_return', 't_hw(n)_tank', 't_hw(n)_pwh', 't_hw(n)_circ'],
    'warm-water_external': ['t_hw(n)_return', 't_hw(n)_tank', 't_hw(n)_pwh', 't_hw(n)_circ']
}

## generate sensor placement

In [7]:
def generate_sensor_placement(layout):
    if pd.isna(layout):
        return ''
        
    parts = layout.split('--')
    sensor_list = []
    idx = 1  # global sensor index in parentheses

    for part in parts:
        # Validate part has exactly one colon (system_type:index)
        if part.count(':') != 1:
            return "wrong LayoutID format"

        try:
            system_type, system_index = part.split(':')
        except ValueError:
            return "wrong LayoutID format"

        # Check if index is an integer
        try:
            i = int(system_index)
        except ValueError:
            return "wrong Layout ID format"

        if system_type in sensor_rules_indexed:
            sensors = sensor_rules_indexed[system_type]
            for label in sensors:
                label_with_hli = re.sub(r'\(n\)', f'(hli={i})', label)
                sensor_list.append(f"({idx}: {label_with_hli})")
                idx += 1

    return ','.join(sensor_list)
    
df['sensor_placement'] = df['LayoutID_Tops'].apply(generate_sensor_placement)
#df['sensor_count'] = df['sensor_placement'].str.count(r'\(\d+:')  # counts sensor entries

In [8]:
df.iloc[156:161]

Unnamed: 0,LayoutID_Tops,Occurrence,most_common_count,new_sensor_count,Variations_KBn,buildingIDs,Variations,sensor_count_set,sensor_placement
156,gas:1--gas:2--gas:3--heating-circuit:1--heatin...,1,14,{18},KB130,[1422],{'gas:1--gas:2--gas:3--global-separation-circu...,{14},"(1: t_b(hli=1)_flow),(2: t_b(hli=1)_flow),(3: ..."
157,gas:1--gas:2--heating-circuit:1--gas:3--warm-w...,1,11,{12},KB170,[673],{'gas:1--gas:2--heat-exchanger:1--heating-circ...,{11},"(1: t_b(hli=1)_flow),(2: t_b(hli=1)_flow),(3: ..."
158,gas:1--gas:2--heating-circuit:1--heating-circu...,1,16,{16},KB120,[1880],{'gas:1--gas:2--global-separation-circuit:1--h...,{16},"(1: t_b(hli=1)_flow),(2: t_b(hli=1)_flow),(3: ..."
159,gas:1--gas:2--heating-circuit:1--heating-circu...,1,16,{18},KB121,[571],{'gas:1--gas:2--global-separation-circuit:1--h...,{16},"(1: t_b(hli=1)_flow),(2: t_b(hli=1)_flow),(3: ..."
160,gas:1--gas:2--heating-circuit:1--heating-circu...,1,19,{20},KB188,[496],{'gas:1--gas:2--heating-circuit:1--heating-cir...,{19},"(1: t_b(hli=1)_flow),(2: t_b(hli=1)_flow),(3: ..."


## Exports

In [9]:
#export
if True:
    filename='sensor_placement_for_BKn'+extraced_date+'.csv'
    filepath = os.path.join('OUTs', filename)
    df.to_csv(filepath, index=False)