# KNN and Bayesian Model Fusion For Indoor localization

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss, mean_squared_error, mean_absolute_error


In [3]:
# Step 1: Load the dataset
file_path = r'C:\Users\Hp\OneDrive\Indoor Localization using RSSI Dataset\archive (1)\TrainingData.csv'
data = pd.read_csv(file_path)

# Step 2: Examine data structure
display(data.head())
print(data.info())

# Step 3: Define features and target variable(s)
features = data.iloc[:, :520]  # Wi-Fi Access Points as features
target = data[['LONGITUDE', 'LATITUDE', 'FLOOR', 'BUILDINGID', 'SPACEID']]

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691
2,100,100,100,100,100,100,100,-97,100,100,...,100,-7519.1524,4864950.0,2,1,103,2,2,23,1371714095
3,100,100,100,100,100,100,100,100,100,100,...,100,-7524.5704,4864934.0,2,1,102,2,2,23,1371713807
4,100,100,100,100,100,100,100,100,100,100,...,100,-7632.1436,4864982.0,0,0,122,2,11,13,1369909710


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19937 entries, 0 to 19936
Columns: 529 entries, WAP001 to TIMESTAMP
dtypes: float64(2), int64(527)
memory usage: 80.5 MB
None


In [4]:
# Step 4: Reduce dimensions using PCA to reduce memory usage
pca = PCA(n_components=50)  # Keep 50 principal components
X_reduced = pca.fit_transform(features)

# Step 5: Split the data into training and test sets for one target column (e.g., 'FLOOR')
X_train, X_test, y_train, y_test = train_test_split(X_reduced, target['FLOOR'], test_size=0.2, random_state=42)

# Step 6: Initialize individual models for KNN and Naive Bayes
knn = KNeighborsClassifier(n_neighbors=5)
nb = GaussianNB()

# Step 7: Combine the models into a Voting Classifier for fusion
fusion_model = VotingClassifier(estimators=[('knn', knn), ('nb', nb)], voting='hard')

# Step 8: Train the fusion model
fusion_model.fit(X_train, y_train)

# Step 9: Make predictions on the test set
y_pred = fusion_model.predict(X_test)


In [5]:
# Step 10: Calculate and display evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
hamming = hamming_loss(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# Display results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Hamming Loss:", hamming)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

Accuracy: 0.8503009027081244
Precision: 0.8652987333071035
Recall: 0.8503009027081244
F1 Score: 0.8499749964889561
Hamming Loss: 0.14969909729187564
Mean Squared Error: 0.27181544633901705
Mean Absolute Error: 0.18355065195586762
