In [72]:
import numpy as np
import pandas as pd
from sklearn import metrics as sm
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [73]:
training_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

print(training_data.shape)
print(test_data.shape)

(7352, 563)
(2947, 563)


In [None]:
combined = pd.concat([training_data, test_data], sort = False)
X = combined.iloc[:,:-2]
y = combined.iloc[:,-1]
X_train_act, X_test_act, y_train_act, y_test_act = train_test_split( X, y, test_size=0.2, random_state=42)

In [5]:
print("Training Data shape:", format(X_train_act.shape))
print("Test Data shape:",format(X_test_act.shape))
print("Training Data shape:", format(y_train_act.shape))
print("Test Data shape:",format(y_test_act.shape))

Training Data shape: (8239, 561)
Test Data shape: (2060, 561)
Training Data shape: (8239,)
Test Data shape: (2060,)


In [6]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestClassifier 
import pickle

activity_model = RandomForestClassifier()
activity_model.fit(X_train_act, y_train_act)
pickle.dump(activity_model, open('activity.sav', 'wb'))

In [7]:
y_hat = activity_model.predict(X_test_act)

In [8]:
print(sm.confusion_matrix(y_test_act,y_hat))
print(sm.classification_report(y_test_act, y_hat))

[[406   0   0   0   0   0]
 [  0 361  16   0   0   0]
 [  0  14 340   0   0   0]
 [  0   0   0 366   2   1]
 [  0   0   0   3 275   6]
 [  0   0   0   1   4 265]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       406
           SITTING       0.96      0.96      0.96       377
          STANDING       0.96      0.96      0.96       354
           WALKING       0.99      0.99      0.99       369
WALKING_DOWNSTAIRS       0.98      0.97      0.97       284
  WALKING_UPSTAIRS       0.97      0.98      0.98       270

          accuracy                           0.98      2060
         macro avg       0.98      0.98      0.98      2060
      weighted avg       0.98      0.98      0.98      2060



In [9]:
### ================= LOCALISATION ================= ###

In [10]:
from sklearn.metrics import classification_report, confusion_matrix

In [75]:
def getXY(X):
  
  x_drop = ["LONGITUDE", "LATITUDE", "FLOOR", "BUILDINGID", "SPACEID", "RELATIVEPOSITION", "USERID", "PHONEID", "TIMESTAMP"]
  y_cols = ["LONGITUDE", "LATITUDE","FLOOR","BUILDINGID","SPACEID","RELATIVEPOSITION"]

  x_cols = list(set(X.columns)-set(x_drop))
  X_train = X[x_cols]
  X_train.iloc[:, 0:520] = np.where(X_train.iloc[:, 0:520] <= 0, X_train.iloc[:, 0:520] + 105, X_train.iloc[:, 0:520] - 100)

  Y_train = X[y_cols]
  print(Y_train["LONGITUDE"].min())
  print(Y_train["LATITUDE"].min())

  Y_train["LONGITUDE"] += 7700
  Y_train["LATITUDE"] -= 4864745

  return X_train, Y_train

In [76]:
training_data = pd.read_csv('TrainingData.csv')
test_data = pd.read_csv('ValidationData.csv')
print(training_data["LONGITUDE"].max())
print(training_data["LONGITUDE"].min())
print(training_data["LATITUDE"].max())
print(training_data["LATITUDE"].min())
print(training_data.shape)
print(test_data.shape)

-7300.818990092725
-7691.338399998844
4865016.687799998
4864745.745015971
(19937, 529)
(1111, 529)


In [None]:
combined = pd.concat([training_data, test_data], sort = False)
X,y = getXY(combined)

X_train_loc, X_test_loc, Y_train_loc, Y_test_loc = train_test_split( X, y, test_size=0.2, random_state=42)

In [13]:
print(X_train_loc.shape)
print(Y_train_loc.shape)
print(X_test_loc.shape)
print(Y_test_loc.shape)

print(X_train_loc)
print(Y_train_loc)
print(X_test_loc)
print(Y_test_loc)

(16838, 520)
(16838, 6)
(4210, 520)
(4210, 6)
       WAP079  WAP168  WAP117  WAP221  WAP482  WAP113  WAP345  WAP313  WAP176  \
16667       0       0       0       0       0       0       0       0       0   
9792        0       0      24       0       0       0       0       0       0   
3753        0       0       0       0       0       0       0       0       0   
7270        0       0       0       0       0       0       0       0       0   
4046        0       0      39       0       0       0       0       0       0   
...       ...     ...     ...     ...     ...     ...     ...     ...     ...   
11284       0       0       0       0       0       0       0       0       0   
11964       0       0      33       0       0       0       0       0       0   
5390        0       0       0       0       0       0       0       0       0   
860         0       0       0       0       0       0       0       0       0   
15795       0       0       0       0       0       0       0  

In [16]:
from sklearn.ensemble import RandomForestRegressor

y_cols = ["LONGITUDE","LATITUDE","FLOOR","BUILDINGID","SPACEID","RELATIVEPOSITION"]
models = []
for i in range(2):
    model = RandomForestRegressor(random_state=1)
    model.fit(X_train_loc, Y_train_loc[y_cols[i]])
    models.append(model)

In [18]:
for i in range(2):    
    yhat = models[i].predict(X_test_loc)    
    print("====================",y_cols[i],"===============")
    print("MAE: ",sm.mean_absolute_error(Y_test_loc[y_cols[i]], yhat))
    print("Score: ",sm.r2_score(Y_test_loc[y_cols[i]], yhat))
    

MAE:  3.0670669854823003
Score:  0.9958232644879249
MAE:  2.495003954066567
Score:  0.9922451701183995


In [19]:
from sklearn.ensemble import RandomForestClassifier

for i in range(2,6):
    global models
    model = RandomForestClassifier()
    model.fit(X_train_loc, Y_train_loc[y_cols[i]])
    models.append(model)

In [20]:
for i in range(2,6):
    yhat = models[i].predict(X_test_loc)
    print("====================",y_cols[i],"===============")  
    print(sm.classification_report(Y_test_loc[y_cols[i]], yhat))
    print(" ")    

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       927
           1       0.99      1.00      1.00      1121
           2       0.99      1.00      1.00       899
           3       0.99      1.00      0.99      1028
           4       1.00      1.00      1.00       235

    accuracy                           0.99      4210
   macro avg       0.99      0.99      0.99      4210
weighted avg       0.99      0.99      0.99      4210

 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1180
           1       0.99      1.00      1.00      1090
           2       1.00      1.00      1.00      1940

    accuracy                           1.00      4210
   macro avg       1.00      1.00      1.00      4210
weighted avg       1.00      1.00      1.00      4210

 
              precision    recall  f1-score   support

           0       0.84      0.73      0.78       214
           1     

In [21]:
# Save models

import pickle

for i in range(len(y_cols)):
    pickle.dump(models[i], open(y_cols[i]+str('.sav'), 'wb'))

In [22]:
### ============ MERGE TEST DATA ============= ###

In [42]:
n = min(y_test_act.shape[0], Y_test_loc.shape[0])
print(n)

2060


In [70]:
X_test_act.reset_index(inplace=True,drop=True)
y_test_act.reset_index(inplace=True,drop=True)
X_test_loc.reset_index(inplace=True,drop=True)
Y_test_loc.reset_index(inplace=True,drop=True)

merged = pd.concat([X_test_act.iloc[:n,:] , y_test_act.iloc[:n]] ,axis = 1)
merged = pd.concat([merged, X_test_loc.iloc[:n,:]] ,axis = 1)
merged = pd.concat([merged,Y_test_loc.iloc[:n,:]] , axis = 1)

print(merged.shape)

(2060, 1088)


In [71]:
merged.to_csv("merged1.csv")