# UJIIndoorLoc

In [None]:

import os


requirements_path = os.path.join(os.getcwd(), 'requirements.txt')

os.system(f'pip install -r {requirements_path}')
print("All dependencies have been installed.")

1

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#feature encoding
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler


## Read Data into Dataframe

In [7]:
datafolder = "../../datasets/UJIIndoorLoc"

trainfile = os.path.join(datafolder, "trainingData.csv")
validfile = os.path.join(datafolder, "validationData.csv")

#read data into pandas dataframes
train_data = pd.read_csv(trainfile)
valid_data = pd.read_csv(validfile)
print("Training data shape:", train_data.shape)
train_data.head()


Training data shape: (19937, 529)


Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691
2,100,100,100,100,100,100,100,-97,100,100,...,100,-7519.1524,4864950.0,2,1,103,2,2,23,1371714095
3,100,100,100,100,100,100,100,100,100,100,...,100,-7524.5704,4864934.0,2,1,102,2,2,23,1371713807
4,100,100,100,100,100,100,100,100,100,100,...,100,-7632.1436,4864982.0,0,0,122,2,11,13,1369909710


## Split into Feature and Tagret

In [22]:
# Feature and Target split
# Features: WAP columns
train_data_features = train_data.drop(columns=['LONGITUDE', 'LATITUDE', 'BUILDINGID', 'FLOOR', 'SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'])


train_data_target = train_data[['SPACEID']]

print(train_data_features.head())
print(train_data_target.head())


   WAP001  WAP002  WAP003  WAP004  WAP005  WAP006  WAP007  WAP008  WAP009  \
0     100     100     100     100     100     100     100     100     100   
1     100     100     100     100     100     100     100     100     100   
2     100     100     100     100     100     100     100     -97     100   
3     100     100     100     100     100     100     100     100     100   
4     100     100     100     100     100     100     100     100     100   

   WAP010  ...  WAP511  WAP512  WAP513  WAP514  WAP515  WAP516  WAP517  \
0     100  ...     100     100     100     100     100     100     100   
1     100  ...     100     100     100     100     100     100     100   
2     100  ...     100     100     100     100     100     100     100   
3     100  ...     100     100     100     100     100     100     100   
4     100  ...     100     100     100     100     100     100     100   

   WAP518  WAP519  WAP520  
0     100     100     100  
1     100     100     100  
2     10

## Target Values Feature Encoding

In [19]:
cat_cols = ["BUILDINGID", "FLOOR", "RELATIVEPOSITION", "SPACEID"]

enc = OneHotEncoder(handle_unknown="ignore", sparse_output=False)  

X_cat_enc = enc.fit_transform(train_data_target[cat_cols])
enc_cols = enc.get_feature_names_out(cat_cols)

X_cat_enc_df = pd.DataFrame(X_cat_enc, columns=enc_cols, index=train_data_target.index)

# Combine encoded categorical features with numerical target features
train_data_target_enc = pd.concat([X_cat_enc_df, train_data_target.drop(columns=cat_cols)], axis=1)
print(train_data_target_enc.head())

# min max

   BUILDINGID_0  BUILDINGID_1  BUILDINGID_2  FLOOR_0  FLOOR_1  FLOOR_2  \
0           0.0           1.0           0.0      0.0      0.0      1.0   
1           0.0           1.0           0.0      0.0      0.0      1.0   
2           0.0           1.0           0.0      0.0      0.0      1.0   
3           0.0           1.0           0.0      0.0      0.0      1.0   
4           1.0           0.0           0.0      1.0      0.0      0.0   

   FLOOR_3  FLOOR_4  RELATIVEPOSITION_1  RELATIVEPOSITION_2  ...  SPACEID_243  \
0      0.0      0.0                 0.0                 1.0  ...          0.0   
1      0.0      0.0                 0.0                 1.0  ...          0.0   
2      0.0      0.0                 0.0                 1.0  ...          0.0   
3      0.0      0.0                 0.0                 1.0  ...          0.0   
4      0.0      0.0                 0.0                 1.0  ...          0.0   

   SPACEID_244  SPACEID_245  SPACEID_246  SPACEID_247  SPACEID_248  

## Scaling of Features

In [21]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import MissingIndicator
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer

# Beispiel: eine Spalte 'WAP001' mit Werten [-104..0] und 100 für "nicht gesehen"
def map_100_to_nan(x):
    x = x.copy()
    x[x == 100] = np.nan
    return x

pipe_wap = Pipeline([
    # 1) 100 -> NaN (nicht gesehen)
    ("map_missing", FunctionTransformer(map_100_to_nan)),
    # 2) Missing-Indicator (liefert zusätzliches Flag-Feature)
    ("features", ColumnTransformer([
        # a) Stärke: NaN -> -110, danach StandardScaler
        ("strength_scaled", Pipeline([
            ("imp", SimpleImputer(strategy="constant", fill_value=-110.0)),
            ("scaler", StandardScaler())
        ]), [0]),
        # b) Seen-Flag: 1 wenn NaN (nicht gesehen), sonst 0
        ("not_seen_flag", MissingIndicator(features="all"), [0]),
    ], remainder="drop"))
])

# Beispiel-Daten
s = pd.Series([-75, -92, 100, -40, 0, -104], name="WAP001")
X = s.to_frame()

X_trans = pipe_wap.fit_transform(X)
# Ergebnis hat 2 Spalten: [scaled_strength, not_seen_flag
print(X_trans)

[[-0.12434905  0.        ]
 [-0.56171466  0.        ]
 [-1.02480766  1.        ]
 [ 0.77610957  0.        ]
 [ 1.80520513  0.        ]
 [-0.87044333  0.        ]]
