# Analysis of Layout ID

## Relevant documents

- (Python Client Repo)[https://github.com/Green-Fusion/energy-management-backend/tree/main/python_client]
- (Klemmenbelegung)[https://docs.google.com/spreadsheets/d/1nkdkx2rI6nVKgoKBgkCUtfwEwuv8kptrRUXcXtfv0NM/edit?gid=247168398#gid=247168398]
- (Hypothesis for Klemmenbelegung)[https://docs.google.com/spreadsheets/d/1TSTxMCgEvuoayzOfx1MUqlV0tiqsVTBRN8aldlnFXxA/edit?gid=0#gid=0]

# import data

In [1]:
#imports
import pandas as pd


In [4]:
#build df from database exctracted data
df = pd.read_csv('data_from_db_2025-07-02.csv')
#inspest column
df.columns

Index(['building_id', 'customer_name', 'address', 'postal_code', 'city',
       'heatingType', 'heatingSurface', 'coordinates', 'gfid', 'LayoutID',
       'device_type', 'device_id'],
      dtype='object')

In [5]:
#filter out uncesseray infos
df=df[['building_id','device_type', 'customer_name','address','LayoutID']]

In [6]:
df.head()

Unnamed: 0,building_id,device_type,customer_name,address,LayoutID
0,756,RUT956,HwS,Wolzogenstr.28,gas:1--gas:2--heating-circuit:1--heating-circu...
1,757,RUT956,HwS,Hagelberger Str. 26,gas:1--gas:2--heating-circuit:1
2,758,RUT956,HwS,Hochstr. 8,gas:1--heating-circuit:1
3,759,RUT956,HwS,Planufer 82a,gas:1--heating-circuit:1
4,761,RUT956,HwS,Gabainstr. 13,gas:1--gas:2--global-separation-circuit:1--hea...


In [7]:
df.count()

building_id      1893
device_type      1051
customer_name    1893
address          1893
LayoutID         1096
dtype: int64

# Group by LayoutID

In [None]:
#consider only count

In [9]:
unique_count = df['LayoutID'].nunique()
print(f"Number of unique LayoutID values: {unique_count}")

Number of unique LayoutID values: 233


In [10]:
# Group by LayoutID
grouped = df.groupby('LayoutID').agg(
    Occurrence=('LayoutID', 'count'),
    buildingIDs=('building_id', list)
).reset_index()

# Add a simple index column starting from 0
grouped = grouped.sort_values(by='Occurrence', ascending=False).reset_index(drop=True)


In [11]:
# Show result
grouped.head(20)

Unnamed: 0,LayoutID,Occurrence,buildingIDs
0,gas:1--heating-circuit:1--warm-water:1,119,"[768, 769, 770, 778, 787, 1924, 1926, 1927, 46..."
1,district-heating:1--heating-circuit:1--warm-wa...,82,"[774, 1909, 1910, 1911, 1912, 1913, 1314, 1315..."
2,gas:1--heating-circuit:1,71,"[758, 759, 499, 1525, 1649, 1654, 1657, 1791, ..."
3,gas:1--heat-exchanger:1--heating-circuit:1--wa...,46,"[1925, 1931, 1781, 1509, 1526, 1527, 1531, 153..."
4,local-heating-station:1--heating-circuit:1--wa...,41,"[509, 790, 1005, 1628, 1629, 1630, 1631, 672, ..."
5,gas:1--heat-exchanger:1--heating-circuit:1,39,"[1524, 1795, 1560, 1638, 1639, 604, 617, 623, ..."
6,gas:1--heating-circuit:1--heating-circuit:2--w...,34,"[2219, 1511, 1515, 1516, 1517, 1518, 1537, 179..."
7,gas:1--gas:2--heat-exchanger:1--heating-circui...,25,"[510, 1773, 1777, 1778, 1528, 1529, 1530, 1539..."
8,district-heating:1--heating-circuit:1,22,"[762, 766, 777, 712, 450, 389, 391, 1725, 1726..."
9,district-heating:1--heating-circuit:1--heating...,22,"[600, 1488, 1724, 796, 1772, 1385, 1386, 1389,..."


In [18]:
# Sum the number of occurrences for top 10 and top 20
top_10_sum = grouped.head(10)['Occurrence'].sum()
top_20_sum = grouped.head(20)['Occurrence'].sum()
print(f"total LayoutID: {df.LayoutID.count()}")
print(f"top 10 LayoutIDs count: {top_10_sum}")
print(f"top 20 LayoutIDs count: {top_20_sum}")

print(f"Coverage top 10 LayoutIDs: {100*top_10_sum/df.LayoutID.count(): .1f} %")
print(f"Coverage top 20 LayoutIDs: {100*top_20_sum/df.LayoutID.count(): .1f} %")

total LayoutID: 1096
top 10 LayoutIDs count: 501
top 20 LayoutIDs count: 631
Coverage top 10 LayoutIDs:  45.7 %
Coverage top 20 LayoutIDs:  57.6 %


# exports

In [19]:
if True:
    df.to_csv('Building_ID_VS_LayoutID.csv', index=False)
    grouped.to_csv('LayoutID_occurence.csv', index=False)