# <center>COMP9336 Mobile Data Networking 2021 Term 2 Project</center>

## <div align="right">Hongxiao Jin (z5241154)</div>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
# set label and obtain formatted data
def format_data(df, la):
    df["Time"] = df["Time"].map(lambda x: int(x))
    time = df["Time"].values.tolist()
    rss = df["RSS"].values.tolist()

    interval = 5
    temp, ds_rss = [], []
    for j in range(len(time)):
        if time[j] < interval and len(temp) < 45:
            temp.append(rss[j])
        else:
            ds_rss.append(temp)
            interval += 5
            temp = []

    ds = pd.DataFrame({"RSS": ds_rss, "label": [la] * len(ds_rss)})
    return ds

In [3]:
# read gesture data
df1 = pd.read_excel("../Dataset/pull-push.xlsx")
df2 = pd.read_excel("../Dataset/up-down.xlsx")
df3 = pd.read_excel("../Dataset/wave.xlsx")

In [4]:
# set labels
df1_ds = format_data(df1, "pull-push")
df2_ds = format_data(df2, "up-down")
df3_ds = format_data(df3, "wave")

In [5]:
# concat three excel file to a dataset
dataset = pd.concat([df1_ds, df2_ds, df3_ds], axis=0, ignore_index=True)

In [6]:
dataset

Unnamed: 0,RSS,label
0,"[-56, -55, -58, -57, -57, -56, -57, -56, -56, ...",pull-push
1,"[-57, -57, -57, -57, -57, -57, -58, -58, -58, ...",pull-push
2,"[-55, -55, -56, -56, -56, -56, -57, -56, -58, ...",pull-push
3,"[-57, -56, -56, -56, -56, -57, -57, -56, -57, ...",pull-push
4,"[-59, -59, -59, -59, -59, -59, -58, -57, -57, ...",pull-push
...,...,...
64,"[-55, -54, -54, -54, -54, -54, -54, -54, -55, ...",wave
65,"[-54, -54, -54, -54, -54, -54, -54, -54, -54, ...",wave
66,"[-55, -54, -53, -54, -54, -54, -54, -54, -54, ...",wave
67,"[-55, -55, -55, -54, -54, -54, -53, -53, -54, ...",wave


In [7]:
# obtain x: rss_list and y: label_list
rss_list = dataset["RSS"].values.tolist()
label_list = dataset["label"].values.tolist()

In [8]:
# Standardized data
rss_list = StandardScaler().fit(rss_list).transform(rss_list)

In [9]:
rss_list

array([[-0.40773893,  0.00655798, -1.39732487, ..., -0.47942503,
        -0.49480154, -0.47940708],
       [-0.87663871, -0.89844275, -0.92928887, ..., -0.47942503,
        -0.02061673, -0.47940708],
       [ 0.06116084,  0.00655798, -0.46125287, ..., -1.39832302,
        -1.44317116, -0.95196548],
       ...,
       [ 0.06116084,  0.45905834,  0.94285513, ..., -0.47942503,
        -0.02061673, -0.47940708],
       [ 0.06116084,  0.00655798,  0.00678313, ..., -0.01997604,
        -0.49480154, -0.47940708],
       [ 0.06116084, -0.44594239, -0.46125287, ..., -0.93887403,
        -0.02061673, -0.00684867]])

In [10]:
# Normalization
rss_list = MinMaxScaler().fit_transform(rss_list)

In [11]:
rss_list

array([[0.5       , 0.625     , 0.14285714, ..., 0.42857143, 0.42857143,
        0.5       ],
       [0.375     , 0.375     , 0.28571429, ..., 0.42857143, 0.57142857,
        0.5       ],
       [0.625     , 0.625     , 0.42857143, ..., 0.14285714, 0.14285714,
        0.375     ],
       ...,
       [0.625     , 0.75      , 0.85714286, ..., 0.42857143, 0.57142857,
        0.5       ],
       [0.625     , 0.625     , 0.57142857, ..., 0.57142857, 0.42857143,
        0.5       ],
       [0.625     , 0.5       , 0.42857143, ..., 0.28571429, 0.57142857,
        0.625     ]])

In [12]:
# split train set and test set
x_train, x_test, y_train, y_test = train_test_split(rss_list, label_list, test_size=0.2, shuffle=True, random_state=2)

In [13]:
k_fold = KFold(n_splits=10)
knn = GridSearchCV(KNeighborsClassifier(), param_grid={'n_neighbors': list([3, 10]), 'leaf_size': list([15, 30])},
                       cv=k_fold, scoring="accuracy")
knn.fit(x_train, y_train)

GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             estimator=KNeighborsClassifier(),
             param_grid={'leaf_size': [15, 30], 'n_neighbors': [3, 10]},
             scoring='accuracy')

In [14]:
knn.best_estimator_

KNeighborsClassifier(leaf_size=15, n_neighbors=3)

In [15]:
# fit data
knn_classification = KNeighborsClassifier(leaf_size=15, n_neighbors=3)
knn_classification.fit(x_train, y_train)

KNeighborsClassifier(leaf_size=15, n_neighbors=3)

In [16]:
# prediction
y_pred = knn_classification.predict(x_test)

In [17]:
confusion_matrix(y_test, y_pred, labels=["pull-push", "up-down", "wave"])

array([[4, 0, 0],
       [0, 7, 1],
       [0, 0, 2]], dtype=int64)

In [18]:
# Performance analysis
accuracy_score(y_test, y_pred)

0.9285714285714286