# Analysis of Layout ID

## Relevant documents

- (Python Client Repo)[https://github.com/Green-Fusion/energy-management-backend/tree/main/python_client]
- (Klemmenbelegung)[https://docs.google.com/spreadsheets/d/1nkdkx2rI6nVKgoKBgkCUtfwEwuv8kptrRUXcXtfv0NM/edit?gid=247168398#gid=247168398]
- (Hypothesis for Klemmenbelegung)[https://docs.google.com/spreadsheets/d/1TSTxMCgEvuoayzOfx1MUqlV0tiqsVTBRN8aldlnFXxA/edit?gid=0#gid=0]

# import data

In [1]:
#imports
import pandas as pd


In [2]:
#build df from database exctracted data
df = pd.read_csv('data_from_db.csv')
#inspest column
df.columns

Index(['building_id', 'customerID', 'address', 'postal_code', 'city',
       'atvise_display_name', 'base', 'heatingType', 'heatingSurface',
       'nickname', 'gui_name', 'coordinates', 'CustomerAlias', 'gfid',
       'LayoutID'],
      dtype='object')

In [3]:
#filter out uncesseray infos
df=df[['building_id', 'gfid','LayoutID']]

In [4]:
df.head()

Unnamed: 0,building_id,gfid,LayoutID
0,571,GFP-7066,gas:1--gas:2--global-separation-circuit:1--hea...
1,574,GFP-7141,gas:1--gas:2--global-separation-circuit:1--hea...
2,575,GFP-7166,gas:1--gas:2--global-separation-circuit:1--hea...
3,738,GFP-12968,gas:1--gas:2--chp:1--buffer-tank:1--global-sep...
4,739,GFP-12991,gas:1--gas:2--chp:1--buffer-tank:1--global-sep...


In [5]:
df.count()

building_id    1699
gfid           1135
LayoutID        439
dtype: int64

In [6]:
type(df.LayoutID.count())

numpy.int64

# Group by LayoutID

In [7]:
unique_count = df['LayoutID'].nunique()
print(f"Number of unique LayoutID values: {unique_count}")

Number of unique LayoutID values: 153


In [8]:
# Group by LayoutID
grouped = df.groupby('LayoutID').agg(
    Occurrence=('LayoutID', 'count'),
    buildingIDs=('building_id', list)
).reset_index()

# Add a simple index column starting from 0
grouped = grouped.sort_values(by='Occurrence', ascending=False).reset_index(drop=True)


In [9]:
# Show result
grouped.head(20)

Unnamed: 0,LayoutID,Occurrence,buildingIDs
0,gas:1--heating-circuit:1--warm-water:1,38,"[463, 508, 512, 1461, 1467, 768, 769, 770, 778..."
1,district-heating:1--heating-circuit:1--warm-wa...,36,"[509, 790, 1005, 1314, 1315, 1316, 1317, 1318,..."
2,gas:1--heating-circuit:1,24,"[758, 759, 599, 1486, 1345, 1354, 439, 1373, 1..."
3,district-heating:1--heating-circuit:1,16,"[712, 762, 766, 450, 327, 389, 391, 353, 517, ..."
4,gas:1--heat-exchanger:1--heating-circuit:1--wa...,13,"[666, 799, 555, 541, 542, 579, 330, 331, 1454,..."
5,district-heating:1--heating-circuit:1--heating...,11,"[460, 461, 462, 556, 557, 1455, 1456, 600, 148..."
6,gas:1--heating-circuit:1--heating-circuit:2--w...,10,"[433, 547, 1490, 1450, 464, 1552, 1570, 1578, ..."
7,district-heating:1--heating-circuit:1--heating...,9,"[459, 444, 524, 1429, 576, 1472, 1589, 2236, 2..."
8,gas:1--warm-water:1--heat-exchanger:1--heating...,9,"[1566, 480, 481, 482, 483, 487, 488, 489, 1452]"
9,gas:1--heat-exchanger:1--heating-circuit:1,9,"[1560, 351, 337, 1377, 707, 514, 515, 617, 458]"


In [12]:
# Sum the number of occurrences for top 10 and top 20
top_10_sum = grouped.head(10)['Occurrence'].sum()
top_20_sum = grouped.head(20)['Occurrence'].sum()
print(f"total LayoutID: {df.LayoutID.count()}")
print(f"top 10 LayoutIDs count: {top_10_sum}")
print(f"top 20 LayoutIDs count: {top_20_sum}")

print(f"Coverage top 10 LayoutIDs: {top_10_sum/df.LayoutID.count(): .3f}")
print(f"Coverage top 20 LayoutIDs: {top_20_sum/df.LayoutID.count(): .3f}")

total LayoutID: 439
top 10 LayoutIDs count: 175
top 20 LayoutIDs count: 236
Coverage top 10 LayoutIDs:  0.399
Coverage top 20 LayoutIDs:  0.538


# exports

In [11]:
if True:
    df.to_csv('Building_ID_VS_LayoutID.csv', index=False)
    grouped.to_csv('LayoutID_occurence.csv', index=False)