In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
from sklearn.model_selection import train_test_split
from post_processing import plot_results, evaluate_model, l2_dist
import warnings
warnings.filterwarnings('ignore')

# Reading the Data

In [10]:
Trainingdf = pd.read_csv('TrainingData.csv')
Validationdf = pd.read_csv('ValidationData.csv')
df = pd.concat([Trainingdf, Validationdf], ignore_index=True)
df.head(2)

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691


## Location prediction

In [3]:
X = df[df.columns[0:len(df.columns)-9]]
Y = df[['LONGITUDE','LATITUDE']]
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [4]:
model = MultiOutputRegressor(RandomForestRegressor(random_state=1))
multi_target_forest = model.fit(X_train, y_train)
y_pred = multi_target_forest.predict(X_test)
y_test['pred_x'] = y_pred[:, 0]
y_test['pred_y'] = y_pred[:, 1]

In [5]:
l2dists_mean, l2dists = l2_dist((y_test['pred_x'], y_test['pred_y']), (y_test['LONGITUDE'], y_test['LATITUDE']))
y_test['dist'] = list(l2dists)

srt = np.sort(l2dists)
n = len(srt)
prec_90 = srt[int(0.9*n)]
print("mean distances : " + str(l2dists.mean()))
print("median : " + str(srt[int(0.5*n)]))
print("90 percentile : " + str(prec_90))

mean distances : 6.055419620106044
median : 3.5352180528246513
90 percentile : 13.507243651269079


## Buliding prediction

In [6]:
X = df[df.columns[0:len(df.columns)-9]]
Y = df['BUILDINGID']
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [7]:
model = RandomForestClassifier(random_state=1)
y_pred = model.fit(X_train, y_train).predict(X_test)
evaluate_model(y_test, y_pred)

Confusion matrix:
[[1742    0    0]
 [   1 1625   13]
 [   0    0 2934]]
f1 Score :  0.9977377772836262
Accuracy Score : 0.9977830562153602
Precision Score :  0.9983383369469281
Recall Score :  0.997152735407769
MCC Score : 0.9965447948547613


## Floor prediction

In [8]:
df2 = df[df['BUILDINGID']==2]
X = df2[df2.columns[0:len(df2.columns)-9]]
Y = df2['FLOOR']
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                  random_state=42, 
                                                  test_size=0.3, shuffle=True)

In [9]:
model = RandomForestClassifier(random_state=1)
y_pred = model.fit(X_train, y_train).predict(X_test)
evaluate_model(y_test, y_pred)

Confusion matrix:
[[595   0   0   0   0]
 [ 13 637   5   1   0]
 [  6  19 472   5   0]
 [  0   2   7 829   0]
 [  0   0   1   7 329]]
f1 Score :  0.9770339039089333
Accuracy Score : 0.9774590163934426
Precision Score :  0.9789793854671462
Recall Score :  0.975359379997017
MCC Score : 0.9712802283034752
