# Compare room IDs extracted from cad data and the movements (NBEW) database table of the IDP



In [None]:
import sys
import os
import re

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 500)
pd.set_option('display.max_colwidth', None)
import matplotlib.pyplot as plt

first_time = True
if first_time:
    sys.path.append('../')
    os.chdir("../")
    first_time = False

print(os.getcwd())


----------
# SAP Data

## Movement/Stay Data

In [None]:
# Find out what stay locations are requested and how often they are used

base_folder = "./data/interim/model_data/"
movement_df = pd.read_csv(base_folder + "LA_ISH_NBEW.csv", encoding="ISO-8859-1")

sap_nbew_rooms_df = movement_df[["Department", "Ward", "Room ID"]] #, "Bed ID"]]

sap_nbew_rooms_df = sap_nbew_rooms_df.groupby(sap_nbew_rooms_df.columns.tolist()).size().reset_index().rename(columns={0:'count'})
# sap_nbew_rooms_df = sap_nbew_rooms_df.drop_duplicates()

sap_nbew_rooms_df = sap_nbew_rooms_df.fillna(value={"Room ID": "-"})

sap_nbew_rooms_df.rename(columns={"Room ID": "SAP Room ID"}, inplace=True)

sap_nbew_rooms_count_df = sap_nbew_rooms_df.sort_values(by="count", ascending=False) #[sap_rooms_df["Room ID"].str.contains("BH ")]
sap_nbew_rooms_count_df.head()

## Room Data

In [None]:
# NOT SURE WHAT THOSE ROOM IDS ARE (CHECK VRE CODE)
base_folder = "./data/interim/model_data/"
sap_dim_room_df = pd.read_csv(base_folder + "DIM_RAUM.csv",  dtype=str, index_col=0)

sap_dim_room_df = sap_dim_room_df.fillna(value={"Room Common Name": "-"})
#sap_dim_room_df[sap_dim_room_df["Room Common Name"].str.contains("BH")]
sap_dim_room_df.sort_values(by=["Room Common Name"], inplace=True)
sap_dim_room_df.head()

In [None]:
# NOT SURE WHAT THOSE ROOM IDS ARE (CHECK VRE CODE)
base_folder = "./data/interim/model_data/"
fact_appointment_room_df = pd.read_csv(base_folder + "FAKT_TERMIN_RAUM.csv", encoding="ISO-8859-1", dtype=str)

appointment_rooms = fact_appointment_room_df[["Room ID", "Room Common Name"]]

distinct_rooms = appointment_rooms.drop_duplicates()

distinct_rooms = distinct_rooms.fillna(value={"Room Common Name": "-"})

bh_rooms = distinct_rooms[distinct_rooms["Room Common Name"].str.contains("BH")]

sap_fact_app_room_df = distinct_rooms.sort_values(by="Room Common Name")

sap_fact_app_room_df.set_index("Room ID", inplace=True)

sap_fact_app_room_df.head()

## SAP Building Unit Data

In [None]:
# Prepare the SAP building unit data for further processing

base_folder = "./data/raw/model_data/"
sap_building_unit_df = pd.read_csv(base_folder + "LA_ISH_NBAU.csv", dtype=str)
# SAP klingon translation: https://www.tcodesearch.com/sap-tables/detail?id=NBAU
sap_building_unit_df.drop(["MANDT", "TELNR", "TELFX", "TELTX", "LOEKZ", "LOUSR", "LODAT",
                           "ERDAT", "ERUSR", "UPDAT", "UPUSR", "BEGDT", "ENDDT", "FREIG",
                           "TALST", "ADDIN","XKOOR", "YKOOR", "BREIT", "LAENG", "ARCHV",
                           "MIGRATED_OBJID", "BATCH_RUN_ID", "ZZBEMK", "ZZVERLEGUNG", "ZZVORHALTE",
                           "ZZPRIVAT", "EANNR", "BETTST_TYP"], axis=1, inplace=True)

sap_building_unit_df.columns = ["SAP Room ID", "Unit Type", "Unit Name", "SAP Room ID 1", "SAP Room ID 2",
                                "Short Text", "Long Text", "Address Information", "Address Object",
                                "Waveware Campus", "Waveware Building ID", "Waveware Floor ID", "Waveware Room ID"]


sap_building_unit_df.loc[sap_building_unit_df["Unit Type"] == "Z", "Unit Type"] = "Room"
sap_building_unit_df.loc[sap_building_unit_df["Unit Type"] == "B", "Unit Type"] = "Bettstellplatz"
sap_building_unit_df = sap_building_unit_df[sap_building_unit_df["Unit Type"] == "Room"]

sap_building_unit_df

## Raumbuch Building Data

In [None]:
# Show data we have about buildings from the Raumbuch (apparently this is Waveware)

base_folder = "./data/raw/model_data/"
sap_building_df = pd.read_csv(base_folder + "LA_ISH_ZHC_RB_BUILDING.csv", dtype=str)
sap_building_df.drop(["MANDT", "ERDAT", "ERNAM", "AEDAT", "AENAM", "BATCH_RUN_ID"], axis=1, inplace=True)
sap_building_df.columns = ["Waveware Campus", "Waveware Building ID", "Building Common Name"]
sap_building_df.set_index("Waveware Building ID", inplace=True)
sap_building_df

## Raumbuch Room Data

In [None]:
# Show data we have about rooms from the Raumbuch (apparently this is Waveware)

base_folder = "./data/raw/model_data/"
rb_room_df = pd.read_csv(base_folder + "LA_ISH_ZHC_RB_RAUM.csv", dtype=str)
rb_room_df.drop(["MANDT", "ERDAT", "ERNAM", "AEDAT", "AENAM", "BATCH_RUN_ID"], axis=1, inplace=True)
rb_room_df.columns = ["Waveware Campus", "Waveware Building ID", "Waveware Floor ID", "Waveware Room ID", "Room Common Name", "Waveware Room Full ID"]
rb_room_df.set_index("Waveware Room Full ID", inplace=True)
rb_room_df = pd.merge(rb_room_df, sap_building_df, on="Waveware Building ID")
rb_room_df.head()

## Raumbuch Campus Data

In [None]:
# Show data we have about the campi from the Raumbuch (apparently this is Waveware)

base_folder = "./data/raw/model_data/"
rb_campus_df = pd.read_csv(base_folder + "LA_ISH_ZHC_RB_STANDORT.csv", dtype=str)
rb_campus_df.drop(["MANDT", "ERDAT", "ERNAM", "AEDAT", "AENAM", "BATCH_RUN_ID"], axis=1, inplace=True)
rb_campus_df.columns = ["Waveware Campus", "Common Name"]
rb_campus_df

## Raumbuch Floor Data

In [None]:
# Show data we have about the floors from the Raumbuch (apparently this is Waveware)

base_folder = "./data/raw/model_data/"
rb_floor_df = pd.read_csv(base_folder + "LA_ISH_ZHC_RB_STOCKWERK.csv", dtype=str)
rb_floor_df.drop(["MANDT", "ERDAT", "ERNAM", "AEDAT", "AENAM", "BATCH_RUN_ID"], axis=1, inplace=True)
rb_floor_df.columns = ["Waveware Campus", "Waveware Building ID", "Waveware Floor ID", "Floor Common Name"]
rb_floor_df = pd.merge(rb_floor_df, sap_building_df, on="Waveware Building ID")
rb_floor_df.head()

----------
# Waveware Data

## Room Data

In [None]:
# Show room data we have directly from Waveware

base_folder = "./data/raw/model_data/"
waveware_rooms_df = pd.read_csv(base_folder + "Waveware_Auszug Flaechenmanagement IDSC (Stand 02.07.20).csv", encoding="ISO-8859-1", dtype=str)

waveware_rooms_df.columns = ["Waveware Building ID", "Building Common Name", "Waveware Floor ID", "Waveware Room ID", "Waveware Room Full ID", "Room Common Name", "Room Area", "PC Group ID", "Sub-EC(PC) Nr", "Profitcenter"]
waveware_rooms_df = waveware_rooms_df.drop(["Room Area", "PC Group ID", "Sub-EC(PC) Nr", "Profitcenter"], axis=1)
# waveware_rooms_df.set_index("Waveware Room Full ID", inplace=True)
waveware_rooms_df

## Building Data

In [None]:
# Show building data we have directly from Waveware

base_folder = "./data/raw/model_data/"
waveware_buildings_df = pd.read_csv(base_folder + "Waveware_Auszug Gebaeudeinformation Stand 03.12.2020.csv", encoding="ISO-8859-1", dtype=str)

waveware_buildings_df = waveware_buildings_df.drop(["Standort", "Parzellennummer", "Zonenplan", "Denkmalpflege", "Anlage-ID", "Bemerkung", "Eigentümer (SAP)", "Vermietung (SAP)", "Portfolio (SAP)", "Baujahr", "Gebäudetyp", "GVB-Nummer", "Amtlicher Wert", "Gebäudeversicherungswert", "Gebäudezustand", "Technologiestand HLKSE", "Techn. Ausb.standard", "Zustand Technik", "Klimatisierung", "Aufzug", "Gebäudezustand Bem.", "Status"], axis=1)
waveware_buildings_df.columns = ["Waveware Building Full ID", "Building Code", "Waveware Building ID", "Building abbreviation", "Building Common Name", "Street", "Zip Code", "Location", "SAP-Anlage Nr."]
waveware_buildings_df.drop(["Zip Code", "Location","SAP-Anlage Nr.", "Building Code"], axis=1, inplace=True)
waveware_buildings_df = waveware_buildings_df[waveware_buildings_df["Building Common Name"] != "Grundstück Inselareal"]

waveware_buildings_df = waveware_buildings_df[~pd.isna(waveware_buildings_df["Building abbreviation"])]
waveware_buildings_df.sort_values(by=["Building abbreviation"], inplace=True)
# waveware_buildings_df.set_index("Waveware Building ID", inplace=True)
waveware_buildings_df

## CAD Data

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm
import re

# get all label csv files
label_csv_files = list(Path("./data/processed/cad_maps/").glob("**/*_labels.csv"))

pbar = tqdm(label_csv_files)
floor_dfs = []
for csv_file in pbar:
    csv_file_path = str(csv_file)
    pbar.set_description(f"Processing {csv_file_path}...")

    floor_labels = pd.read_csv(csv_file_path, index_col=0)
    floor_labels = floor_labels[floor_labels["Label Type"] == "Room ID"]
    floor_dfs.append(floor_labels)

cad_rooms_df = pd.concat(floor_dfs)

cad_rooms_df.reset_index()

cad_rooms_df = cad_rooms_df[["Building ID", "Floor ID", "Label Text"]]

cad_rooms_df.rename(columns={"Building ID": "Building Nr", "Label Text": "Room ID"}, inplace=True)

cad_rooms_df.sort_values(by=["Building Nr"], inplace=True)

cad_rooms_df.head()

## Preparing intermediate tables

## SAP ISH NBEW table

In [None]:
# Split NBEW SAP Room IDs into Waveware Building, Floor, Room ID
# TODO: Understand and split KKENO rooms

def extract_ids(row):
    """
    Extract waveware tokens from SAP Room IDs for Insel Hospital rooms.
    """
    row['SAP Building Abbreviation'] = pd.NA
    row['Waveware Floor ID'] = pd.NA
    row['Waveware Room ID'] = pd.NA
    
    pattern = '([A-Za-z]{2,3}[0-9]*)[\.\s]+([A-Za-z]*[0-9]*)[\.N\s-]+([0-9]+[A-Za-z]*)$'
    match = re.search(pattern, row['SAP Room ID'])

    if match is not None:
        # if extraction was successful, read metric df and compose tuple
        row['Waveware Building ID'], row['Waveware Floor ID'], row['Waveware Room ID'] = match.groups()
    else:
            pattern = '([A-Za-z]+)[\.\s]*([0-9]*)[\.N\s-]+([0-9]+[A-Za-z]*)$'
            match = re.search(pattern, row['SAP Room ID'])

            if match is not None:
                # if extraction was successful, read metric df and compose tuple
                row['Waveware Building ID'], row['Waveware Floor ID'], row['Waveware Room ID'] = match.groups()
            else:
                pattern = '([A-Za-z]+[0-9]*)[\s]+([A-Za-z]*)[\.N\s-]*([0-9]+[A-Za-z]*)'
                match = re.search(pattern, row['SAP Room ID'])

                if match is not None:
                    # if extraction was successful, read metric df and compose tuple
                    row['SAP Building Abbreviation'], row['Waveware Floor ID'], row['Waveware Room ID'] = match.groups()
    return row

def extract_waveware_ids(row):
    """
    Run all above methods on the same row.
    """
    row = extract_ids(row)
    
    return pd.Series({'SAP Building Abbreviation': row['SAP Building Abbreviation'], 'Waveware Floor ID': row['Waveware Floor ID'], 'Waveware Room ID': row['Waveware Room ID']})

sap_nbew_rooms_count_fix_df = pd.concat([sap_nbew_rooms_count_df, sap_nbew_rooms_count_df.apply(extract_waveware_ids, axis=1)], axis=1)
sap_nbew_rooms_count_fix_df.head()

In [None]:
# Check which ones failed to parse
sap_nbew_rooms_count_fix_df[pd.isna(sap_nbew_rooms_count_fix_df["Waveware Room ID"]) & ~sap_nbew_rooms_count_fix_df["SAP Room ID"].str.isdecimal() & ~sap_nbew_rooms_count_fix_df["SAP Room ID"].str.startswith("7") & ~sap_nbew_rooms_count_fix_df["SAP Room ID"].str.startswith("6")].head()

### SAP Building unit table

In [None]:
# improve the SAP building unit table's waveware data

sap_building_unit_df['SAP Building Abbreviation 1'] = pd.NA
sap_building_unit_df['SAP Building Abbreviation 2'] = pd.NA


In [None]:
def extract_campi(row):
    """
    Extract the campus of the building and room based on hints in the Unit Name.
    """
    if pd.isna(row['Waveware Campus']):
        if row['Unit Name'].find("Aarberg") != -1:
            row['Waveware Campus'] = 'AARB'
        if row['Unit Name'].find("Riggisberg") != -1:
            row['Waveware Campus'] = 'RIGG'
        if row['Unit Name'].find("R_") != -1:
            row['Waveware Campus'] = 'RIGG'
        if row['Unit Name'].find("Tiefenau") != -1:
            row['Waveware Campus'] = 'TIEF'
        if row['Unit Name'].find("Münsigen") != -1:
            row['Waveware Campus'] = 'MUEN'
        if row['Unit Name'].find("Belp") != -1:
            row['Waveware Campus'] = 'BELP'

    return row

def extract_ids(row):
    """
    Extract waveware tokens from SAP Room IDs for Insel Hospital rooms.
    """
    if row['Waveware Campus'] not in ['Aarberg', 'Riggisberg', 'Tiefenau', 'Münsigen'] and not pd.isna(row['SAP Room ID 2']):
        if pd.isna(row["Waveware Room ID"]):
            pattern = '([A-Za-z]+[0-9]*)[\s]+([A-Za-z]*[0-9]*)[\.N\s-]+([0-9]+[A-Za-z]*)'
            match = re.search(pattern, row['SAP Room ID 2'])
            if match is not None:
                # if extraction was successful, read metric df and compose tuple
                row['Waveware Building ID'], row['Waveware Floor ID'], row['Waveware Room ID'] = match.groups()
    return row

def extract_waveware_ids(row):
    """
    Run all above methods on the same row.
    """
    row = extract_campi(row)
    row = extract_ids(row)
    
    return pd.Series({'Waveware Campus': row['Waveware Campus'], 'SAP Building Abbreviation 2': row['SAP Building Abbreviation 2'], 'Waveware Building ID': row['Waveware Building ID'], 'Waveware Floor ID': row['Waveware Floor ID'], 'Waveware Room ID': row['Waveware Room ID']})

sap_building_unit_fix_df = pd.concat([sap_building_unit_df.drop(["Waveware Campus", "Waveware Building ID", "SAP Building Abbreviation 2", "Waveware Floor ID", "Waveware Room ID"], axis=1), sap_building_unit_df.apply(extract_waveware_ids, axis=1)], axis=1)

In [None]:
def fix_SAP_ID_1(row):
    """
    Fix SAP ID 1 for Aarberg or unknown rooms.
    """
    if re.search('[0-9]+', row["SAP Room ID"]) is not None and (row["Waveware Campus"] != "ISB" or pd.isna(row["Waveware Campus"])):
        row["SAP Room ID 1"] = row['SAP Room ID']
    
    return pd.Series({'SAP Room ID 1': row["SAP Room ID 1"]})

# fix the Aarberg SAP Room IDs to make it possible to identify them in the SAP NBEW table data
sap_building_unit_fix_df = pd.concat([sap_building_unit_fix_df.drop(["SAP Room ID 1"], axis=1), sap_building_unit_fix_df.apply(fix_SAP_ID_1, axis=1)], axis=1)

# show number of unidentified rooms
print("Campus:", len(sap_building_unit_fix_df[pd.isna(sap_building_unit_fix_df["Waveware Campus"])]))
print("Building", len(sap_building_unit_fix_df[pd.isna(sap_building_unit_fix_df["Waveware Building ID"])]))
print("Floor", len(sap_building_unit_fix_df[pd.isna(sap_building_unit_fix_df["Waveware Floor ID"])]))
print("Room", len(sap_building_unit_fix_df[pd.isna(sap_building_unit_fix_df["Waveware Room ID"])]))

In [None]:
def extract_sap_1_ids(row):
    """
    Extract SAP abbreviations from SAP Room IDs for Insel Hospital rooms.
    """
    if row['Waveware Campus'] not in ['Aarberg', 'Riggisberg', 'Tiefenau', 'Münsigen'] and not pd.isna(row['SAP Room ID 1']):
            pattern = '([A-Za-z]+[0-9]*)[\s]+'
            match = re.search(pattern, row['SAP Room ID 1'])
            if match is not None:
                # if extraction was successful, read metric df and compose tuple
                row['SAP Building Abbreviation 1'] = match.groups()[0]
    return pd.Series({'SAP Building Abbreviation 1': row['SAP Building Abbreviation 1']})

# extract building abbreviation 1 from SAP Room ID 1
sap_building_unit_fix_df = pd.concat([sap_building_unit_fix_df.drop(['SAP Building Abbreviation 1'], axis=1), sap_building_unit_fix_df.apply(extract_sap_1_ids, axis=1)], axis=1)

In [None]:
def extract_sap_2_ids(row):
    """
    Extract SAP abbreviations from SAP Room IDs for Insel Hospital rooms.
    """
    if row['Waveware Campus'] not in ['Aarberg', 'Riggisberg', 'Tiefenau', 'Münsigen'] and not pd.isna(row['SAP Room ID 1']):
            pattern = '([A-Za-z]+[0-9]*)[\s]+'
            match = re.search(pattern, row['SAP Room ID 2'])
            if match is not None:
                # if extraction was successful, read metric df and compose tuple
                row['SAP Building Abbreviation 2'] = match.groups()[0]
    return pd.Series({'SAP Building Abbreviation 2': row['SAP Building Abbreviation 2']})

# extract building abbreviation 1 from SAP Room ID 1
sap_building_unit_fix_df = pd.concat([sap_building_unit_fix_df.drop(['SAP Building Abbreviation 2'], axis=1), sap_building_unit_fix_df.apply(extract_sap_2_ids, axis=1)], axis=1)

In [None]:
#sap_building_unit_fix_df#[~sap_building_unit_fix_df["Waveware Campus"].isin(["AARB", "TIEF", "RIGG", "MUEN"])][["SAP Room ID 1", "SAP Building Abbreviation 1", "SAP Room ID 2", "SAP Building Abbreviation 2"]]

In [None]:
# Show what we extracted
sap_building_unit_fix_df[['Unit Type', 'Unit Name', 'SAP Room ID', 'SAP Room ID 1', 'SAP Building Abbreviation 1', 'SAP Room ID 2', 'SAP Building Abbreviation 2', 'Waveware Campus', 'Waveware Building ID', 'Waveware Floor ID', 'Waveware Room ID']]

In [None]:
# How many rooms are from Insel Hospital?

len(sap_building_unit_fix_df[sap_building_unit_fix_df["Waveware Campus"] == "ISB"])

### Building Abbreviations Table

In [None]:
building_abbreviation_df = pd.merge(waveware_buildings_df[["Building Common Name", "Waveware Building ID", "Building abbreviation", "Waveware Building Full ID"]], sap_building_unit_fix_df[["SAP Building Abbreviation 1", "SAP Building Abbreviation 2", "Waveware Building ID"]], how="outer", on="Waveware Building ID", indicator=True).drop_duplicates().sort_values(by="Waveware Building ID")
building_abbreviation_df = building_abbreviation_df[building_abbreviation_df["_merge"] != "right_only"].drop(["_merge"],axis=1)
building_abbreviation_melt_df = pd.melt(building_abbreviation_df, id_vars=['Building Common Name', "Waveware Building ID", "Waveware Building Full ID"], value_vars=['Building abbreviation', 'SAP Building Abbreviation 1','SAP Building Abbreviation 2'])
building_abbreviation_melt_df = building_abbreviation_melt_df.drop(["variable"], axis=1).drop_duplicates().sort_values(by="Waveware Building ID")
building_abbreviation_df = building_abbreviation_melt_df[building_abbreviation_melt_df['value'].notna()].rename(columns={"value": "Building Abbreviation"})
building_abbreviation_df

## Finding movements/stays of patients

In [None]:
# Merge SAP building unit data with the nbew stays
# As the SAP building unit data contains Waveware {Building, Floor, Room} IDs,
# this allows us to merge with the Waveware data
nbew_sap_merge = pd.merge(sap_building_unit_fix_df, sap_nbew_rooms_count_fix_df, how="outer", left_on="SAP Room ID 1", right_on="SAP Room ID", suffixes=("_x",""), indicator=True)
len(nbew_sap_merge[nbew_sap_merge["_merge"] == "both"])

In [None]:
# Just continue with the mergable data (but allow to analyse the unmerged data)
nbew_sap_merge_both = nbew_sap_merge[nbew_sap_merge["_merge"] == "both"]#[["count", "SAP Room ID", "Unit Name","Waveware Campus", "Waveware Building ID", "Waveware Floor ID", "Waveware Room ID", "SAP Room ID 1", "SAP Room ID 2", "Department", "Ward"]]
nbew_sap_merge_both

In [None]:
# Reduce data to rooms on the Insel Hospital Campus
nbew_sap_merge_isb = nbew_sap_merge_both[nbew_sap_merge_both["Waveware Campus"] == "ISB"]
nbew_sap_merge_isb

In [None]:
# Analyse the unmergable SAP NBEW locations
nbew_sap_merge_fails = nbew_sap_merge[nbew_sap_merge["_merge"] == "right_only"]
nbew_sap_merge_fails_show = nbew_sap_merge_fails[["_merge", "count", "SAP Room ID", "Department", "Ward", "Waveware Building ID", "Waveware Floor ID", "Waveware Room ID"]]
nbew_sap_merge_fails_show.sort_values(by="count", ascending=False)
# nbew_sap_merge_fails_show.sort_values(by="SAP Room ID", ascending=True)

In [None]:
len(nbew_sap_merge_fails_show)

In [None]:
merge_fails = pd.merge(nbew_sap_merge_fails_show.drop(["_merge"], axis=1), building_abbreviation_df, how="outer", left_on="Waveware Building ID", right_on="Building Abbreviation", indicator=True)
merge_fails[merge_fails["_merge"] == "both"].sort_values(by="count", ascending=False)

In [None]:
#pd.merge(nbew_sap_merge_fails_show, , on=["Waveware Building ID", "Waveware Floor ID", "Waveware Room ID"])

Observations:
* [X] Most not found room IDs are probably still those from not(ISB) (check by looking at both and right_only, sorted by count, then SAP Room ID). Or it could be that SAP Room ID 1 is missing there!
* [X] What is located in BH/BHH A, C,D, I? (larger amount of BH A-184 or BH C111 or BHHA408 not found) \
=> You can find the answers to this question in waveware_rooms_df
* [ ] Furthermore there are some weird Naming schemes such as H5 G-19B. \
=> Possibly not a patient room. So we might have to take the leap to waveware_rooms_df
* [X] What building is IN D 123 (INO D 123)? It seems to be located in many different departments \
=> Try to make the leap function to waveware_rooms_df
* [X] INO C 09 and friends seem to be in NOTA INO and NOTB INO wards, but their name seems wrong. \
=> they are not in waveware_rooms_df but they seem to be in INO
* [X] What is located in KK/KKL D, E, F? (KKB211 or KK C-826 or KKLG322 or KKL G322 are alternate patterns) \
=> You can find the answers to this question in waveware_rooms_df
* [X] Frequently used: AS1.131B (ASH 1 131B Besprechung), BHH B118, TK B 179, IN D 123, INO C 10 \
=> Try to make the leap function to waveware_rooms_df
* [ ] Where is AKUT NEPH? \
=> NO IDEA, but there seem to be a remaining number of rooms that are not in waveware somehow
* [X] SH1 37B not found (Sahli-Haus 1) \
=> Try to make the leap function to waveware_rooms_df
* [X] TK B 179 not found (Theodor Kocher Haus) \
=> Try to make the leap function to waveware_rooms_df

In [None]:
# Analyse merge fails
nbew_sap_merge_fail_both_right = nbew_sap_merge[(nbew_sap_merge["_merge"] == "both") & (nbew_sap_merge["Waveware Campus"] == "ISB") | (nbew_sap_merge["_merge"] == "right_only") & (pd.isna(nbew_sap_merge["Waveware Campus"]))][["_merge", "count", "SAP Room ID", "Unit Name","Waveware Campus", "Waveware Building ID", "Waveware Floor ID", "Waveware Room ID", "SAP Room ID 1", "SAP Room ID 2", "Department", "Ward"]]
nbew_sap_merge_fail_both_right.sort_values(by="count", ascending=False, inplace=True)
nbew_sap_merge_fail_both_right.sort_values(by="SAP Room ID", ascending=True)

In [None]:
# Merge waveware data with SAP NBEW Insel hospital rooms
waveware_nbew_merge = pd.merge(waveware_rooms_df, nbew_sap_merge_isb, how='outer', on=["Waveware Building ID", "Waveware Floor ID", "Waveware Room ID"], indicator=True)

waveware_nbew_merge_show = waveware_nbew_merge[waveware_nbew_merge["_merge"] == "both"][["count", "Waveware Building ID", "SAP Room ID", "Building Common Name", "Waveware Floor ID", "Waveware Room ID", "Waveware Room Full ID", "Room Common Name", "SAP Room ID 1", "SAP Room ID 2"]]
waveware_nbew_merge_show.sort_values(by="count", ascending=False)

In [None]:
# Analyse the unmergable rooms from SAP NBEW
waveware_nbew_merge[waveware_nbew_merge["_merge"] == "right_only"][["count", "Waveware Building ID", "SAP Room ID", "Building Common Name", "Waveware Floor ID", "Waveware Room ID", "Waveware Room Full ID", "Room Common Name", "SAP Room ID 1", "SAP Room ID 2"]]

In [None]:
# Merge waveware buildings and rooms
waveware_room_building_df = pd.merge(waveware_rooms_df, waveware_buildings_df, on="Waveware Building ID", suffixes=("","_y"))
waveware_room_building_df = waveware_room_building_df[["Waveware Building ID", "Building abbreviation", "Waveware Floor ID", "Waveware Room ID", "Waveware Room Full ID", "Room Common Name"]]
waveware_room_building_df

# Prepare location data

## Get location data (Open Street Map)

In [None]:
import requests

def get_long_lat(street_string):
    response = requests.get(f"https://nominatim.openstreetmap.org/search?q={street_string.replace(' ', '+')}+Bern&format=json")
    types = []
    for loc in response.json():
        types.append(loc["type"] + ": " + loc["display_name"][:15])
        if loc["type"] in ["hospital", "childcare", "clinic"]:
            id_string = loc["type"] + ": " + loc["display_name"][:15]
            long_lat = (loc["lon"], loc["lat"])
            return pd.Series({'Type': id_string, 'Long/Lat': long_lat})
        
    id_string = response.json()[0]["type"] + ": " + response.json()[0]["display_name"][:15]
    long_lat = (response.json()[0]["lon"], response.json()[0]["lat"])
    return pd.Series({'Type': id_string, 'Long/Lat': long_lat})


waveware_buildings_coords_df = pd.concat([waveware_buildings_df, waveware_buildings_df["Street"].apply(lambda s: get_long_lat(s))], axis=1)
waveware_buildings_coords_df["Longitude"] = waveware_buildings_coords_df["Long/Lat"].apply(lambda ll: float(ll[0]))
waveware_buildings_coords_df["Latitude"] = waveware_buildings_coords_df["Long/Lat"].apply(lambda ll: float(ll[1]))
waveware_buildings_coords_df.drop(["Long/Lat"], axis=1, inplace=True)
waveware_buildings_coords_df

In [None]:
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
import contextily as ctx
gdf = geopandas.GeoDataFrame(
    waveware_buildings_coords_df, geometry=geopandas.points_from_xy(waveware_buildings_coords_df.Longitude, waveware_buildings_coords_df.Latitude))
gdf.set_crs(epsg=4326, inplace=True)
gdf = gdf.to_crs(epsg=3857)

gdf

In [None]:
ax = gdf.plot(figsize=(16, 12), alpha=0.5, edgecolor='k', label="Building abbreviation")
ctx.add_basemap(ax, zoom=15, source=ctx.providers.CartoDB.Positron, alpha=0.5) # OpenStreetMap.Mapnik, CartoDB.Positron, CartoDB.Voyager
# providers: https://contextily.readthedocs.io/en/latest/providers_deepdive.html

gdf['coords'] = gdf['geometry'].apply(lambda x: x.representative_point().coords[:])
gdf['coords'] = [coords[0] for coords in gdf['coords']]

for idx, row in gdf.iterrows():
    plt.annotate(s=row['Building abbreviation'], xy=row['coords'], horizontalalignment='center', verticalalignment='bottom')
plt.show()