In [3]:
# Importy
import pandas as pd
import json
import os
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

In [4]:
# Wczytanie modelu
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

In [9]:
# Wczytanie klas bSDD
with open("../data/bsdd_cleared.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

flattened_classes = []
for entry in raw_data:
    dictionary_name = entry["dictionary_name"]
    for cls in entry["classes"]:
        cls["dictionary_name"] = dictionary_name
        flattened_classes.append(cls)

bsdd_df = pd.DataFrame(flattened_classes)

# Podgląd
bsdd_df.head()

Unnamed: 0,class_name,class_code,class_description,dictionary_name
0,Busbar terminal,EC000001,"Terminal for the array of conductors, which ar...",ETIM
1,Residual current circuit breaker (RCCB),EC000003,A safety device that disconnects a circuit whe...,ETIM
2,Cable end sleeve,EC000005,A protective sleeve used to terminate and secu...,ETIM
3,Cover plate for installation units,EC000006,A protective plate that covers electrical inst...,ETIM
4,Cover frame for domestic switching devices,EC000007,A decorative or protective frame surrounding s...,ETIM


In [12]:
# Przygotowanie opisów
bsdd_df["full_text"] = (
    bsdd_df["class_code"].astype(str) + " — " +
    bsdd_df["class_name"].fillna("") + " — " +
    bsdd_df["class_description"].fillna("")
)

In [13]:
# Embeddingi dla klas bSDD
bsdd_embeddings = model.encode(bsdd_df["full_text"].tolist(), convert_to_tensor=True)

In [21]:
# Wczytanie IFC (sample_ifc.csv)
ifc_df = pd.read_csv("../data/ifc_sample.csv")

# Podgląd
ifc_df.head()

Unnamed: 0,GlobalId,Name,ObjectType,Description,Text,Prawdziwy_Kod_CCI
0,1X2Y3Z,Ściana zewnętrzna,IfcWall,"Ściana zewnętrzna, betonowa, grubość 25cm","Ściana zewnętrzna, IfcWall, Ściana zewnętrzna,...",21.11.10
1,4A5B6C,Drzwi jednoskrzydłowe,IfcDoor,"Drzwi z drewna, malowane, w otworze 90cm","Drzwi jednoskrzydłowe, IfcDoor, Drzwi z drewna...",23.32.10
2,7D8E9F,Okno PCV,IfcWindow,"Okno dwuszybowe PCV, rozwierno-uchylne, szerok...","Okno PCV, IfcWindow, Okno dwuszybowe PCV, rozw...",23.21.10
3,0A1B2C,Strop żelbetowy,IfcSlab,"Strop monolityczny z betonu B25, grubość 20cm","Strop żelbetowy, IfcSlab, Strop monolityczny z...",22.31.10
4,3E4F5G,Słup stalowy,IfcColumn,"Słup stalowy HEA, wysokość 3m","Słup stalowy, IfcColumn, Słup stalowy HEA, wys...",21.41.10


In [28]:
# Embeddingi IFC
def concat_ifc_info(row):
    parts = []
    # podstawowe pola tekstowe
    for col in ["Name", "ObjectType", "Description", "Prawdziwy_Kod_CCI"]:
        val = row.get(col)
        if val and val != '':
            parts.append(str(val))
    # BaseQuantities
    for col in ["Dlugosc_m", "Pole_powierzchni_m2", "Objetosc_m3"]:
        val = row.get(col)
        if val not in [None, '', float('nan')]:
            parts.append(f"{col}: {val}")
    # P-Set properties
    for col in ["FireRating", "Material"]:
        val = row.get(col)
        if val and val != '':
            parts.append(f"{col}: {val}")
    # scal wszystko w jeden string
    return ', '.join(parts)

# Scalone info
ifc_df["full_text"] = ifc_df.apply(concat_ifc_info, axis=1)

ifc_embeddings = model.encode(ifc_df["full_text"].tolist(), convert_to_tensor=True)

In [29]:
# Klasyfikacja
results = []

for i, ifc_emb in enumerate(tqdm(ifc_embeddings)):
    similarities = util.cos_sim(ifc_emb, bsdd_embeddings)[0]
    best_idx = similarities.argmax().item()
    best_score = similarities[best_idx].item()
    
    results.append({
        "GlobalId": ifc_df.loc[i, "GlobalId"],
        "Opis_IFC": ifc_df.loc[i, "Text"],
        "Kod_bSDD": bsdd_df.loc[best_idx, "class_code"],
        "Nazwa_klasy_bSDD": bsdd_df.loc[best_idx, "class_name"],
        "Słownik": bsdd_df.loc[best_idx, "dictionary_name"],
        "Podobieństwo": round(best_score, 4)
    })

results_df = pd.DataFrame(results)

100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 908.78it/s]


In [30]:
# Wyniki
results_df.sort_values("Podobieństwo", ascending=False).head(10)

Unnamed: 0,GlobalId,Opis_IFC,Kod_bSDD,Nazwa_klasy_bSDD,Słownik,Podobieństwo
0,1X2Y3Z,"Ściana zewnętrzna, IfcWall, Ściana zewnętrzna,...",Ac_15_50_17,Concrete cover surveying,Uniclass,0.6279
9,8T9U0V,"Drzwi przeciwpożarowe, IfcDoor, Drzwi EI60 sta...",L-NBB,Leaf of large door,CCI,0.6241
3,0A1B2C,"Strop żelbetowy, IfcSlab, Strop monolityczny z...",Ac_15_50_17,Concrete cover surveying,Uniclass,0.6196
1,4A5B6C,"Drzwi jednoskrzydłowe, IfcDoor, Drzwi z drewna...",L-NBB,Leaf of large door,CCI,0.619
4,3E4F5G,"Słup stalowy, IfcColumn, Słup stalowy HEA, wys...",Ac_15_50_73,Roof surveying,Uniclass,0.609
8,5Q6R7S,"Schody prefabrykowane, IfcStair, Schody żelbet...",Ac_30_60_42,Ironing,Uniclass,0.6003
7,2N3O4P,"Balustrada stalowa, IfcRailing, Balustrada z w...",Ac_15_45_35,Historic building surveying,Uniclass,0.5913
5,6H7I8J,"Płyta fundamentowa, IfcSlab, Płyta żelbetowa, ...",Ac_15_50_52,Metalwork surveying,Uniclass,0.5895
6,9K0L1M,"Dach dwuspadowy, IfcRoof, Konstrukcja drewnian...",Ac_25_90_12,Ceremonial worship activities,Uniclass,0.5372
2,7D8E9F,"Okno PCV, IfcWindow, Okno dwuszybowe PCV, rozw...",Ac_15_50_25,"Doors, windows and glazing surveying",Uniclass,0.4143


In [25]:
# Eksport wyników
results_df.to_csv("../results/full_bsdd.csv", index=False)