In [None]:
# Importy + konfig
import pandas as pd
import json
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

In [None]:
# Wczytanie modelu
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

In [9]:
# Wczytanie i przygotowanie klas bSDD
with open("../data/bsdd_cleared.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

flattened_classes = []
for entry in raw_data:
    dictionary_name = entry["dictionary_name"]
    for cls in entry["classes"]:
        cls["dictionary_name"] = dictionary_name
        flattened_classes.append(cls)

bsdd_df = pd.DataFrame(flattened_classes)

bsdd_df["full_text"] = (
    bsdd_df["class_code"].astype(str) + " — " +
    bsdd_df["class_name"].fillna("") + " — " +
    bsdd_df["class_description"].fillna("")
)

bsdd_embeddings = model.encode(bsdd_df["full_text"].tolist(), convert_to_tensor=True)

In [11]:
# Wczytanie danych IFC z parsera
ifc_df = pd.read_csv("../data/ifc_objects.csv")
ifc_df.head()

Unnamed: 0,GlobalId,IfcType,Name,Text
0,3zR0BOEcLADRKln4HYporH,IFCSLAB,floor,"A solid, site-cast concrete floor, providing a..."
1,1AQAupaRP1txwK1AGiN61V,IFCWALL,house - outer wall - house right front,"A solid outer wall, forming the right front si..."
2,3wdauVJT5Fx9drrREiDqA$,IFCWALL,house - outer wall - house right back,"A solid outer wall, forming the right back sid..."
3,0OfZwWc8j9QP5uX8xPTxDH,IFCWALL,house - outer wall - house left,"A solid outer wall, forming the left side of t..."
4,1uS5vfZPn9R8PlAaVd73on,IFCWALL,plumbing wall,A wall designed to house and protect plumbing ...


In [14]:
# Przygotowanie danych do klasyfikacji + embeddingi
ifc_df["full_text"] = (
    ifc_df["IfcType"].fillna('') + " — " +
    ifc_df["Name"].fillna('') + " — " +
    ifc_df["Text"].fillna('')
)

ifc_embeddings = model.encode(ifc_df["full_text"].tolist(), convert_to_tensor=True)

In [15]:
# Liczenie podobieństwa kosinusowego
results = []

for i, ifc_emb in enumerate(tqdm(ifc_embeddings)):
    similarities = util.cos_sim(ifc_emb, bsdd_embeddings)[0]
    best_idx = similarities.argmax().item()
    best_score = similarities[best_idx].item()

    results.append({
        "GlobalId": ifc_df.loc[i, "GlobalId"],
        "IfcType": ifc_df.loc[i, "IfcType"],
        "Name": ifc_df.loc[i, "Name"],
        "Opis_IFC": ifc_df.loc[i, "Text"],
        "Kod_bSDD": bsdd_df.loc[best_idx, "class_code"],
        "Nazwa_klasy_bSDD": bsdd_df.loc[best_idx, "class_name"],
        "Słownik": bsdd_df.loc[best_idx, "dictionary_name"],
        "Podobieństwo": round(best_score, 4)
    })

results_df = pd.DataFrame(results)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 304.39it/s]


In [16]:
# Wyniki
results_df.to_csv("../results/parsed_results.csv", index=False)
results_df.sort_values("Podobieństwo", ascending=False).head(10)

Unnamed: 0,GlobalId,IfcType,Name,Opis_IFC,Kod_bSDD,Nazwa_klasy_bSDD,Słownik,Podobieństwo
6,12UVOn4wvAJPMUExKdZLb8,IFCSLAB,house - roof - slab right,A roof slab that's got it all covered. IsExter...,Ac_15_50_73,Roof surveying,Uniclass,0.6207
5,0ZTBBPo6f6bxqV2K7Oelrq,IFCSLAB,house - roof - slab left,A roof slab that's got it all covered. IsExter...,Ac_15_50_73,Roof surveying,Uniclass,0.6142
4,1uS5vfZPn9R8PlAaVd73on,IFCWALL,plumbing wall,A wall designed to house and protect plumbing ...,EC000073,Junction box for wall duct,ETIM,0.5653
0,3zR0BOEcLADRKln4HYporH,IFCSLAB,floor,"A solid, site-cast concrete floor, providing a...",L-BA,Groundworks structure,CCI,0.5432
1,1AQAupaRP1txwK1AGiN61V,IFCWALL,house - outer wall - house right front,"A solid outer wall, forming the right front si...",EC000073,Junction box for wall duct,ETIM,0.5261
3,0OfZwWc8j9QP5uX8xPTxDH,IFCWALL,house - outer wall - house left,"A solid outer wall, forming the left side of t...",L-BE,Roof structure,CCI,0.5236
2,3wdauVJT5Fx9drrREiDqA$,IFCWALL,house - outer wall - house right back,"A solid outer wall, forming the right back sid...",EC000350,Mounting frame for door station,ETIM,0.5217
