## Libraries and Helper functions

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import json
import pandas as pd
import os

def converter(dBm): #helper function to interpret RSSI values
    return 10**((dBm+90)/10)


In [2]:
#---------------------------Data Analysis--------------------------------------
#1. Get all the labels (locations)
#data file name is in structure of "label_attempt.json" (e.g "0,0_1.json"), each file contains a list of dictionaries {"MAC":mac, "RSSI":rssi}
base_path = "data"
labels_set = set()
#iterate through all the files in base_path and get all the labels first
for filename in os.listdir(base_path):
    label = filename.split("_")[0]
    labels_set.add(label)
labels = list(labels_set)
print(labels)
print("Number of labels: ", len(labels))

#2. for each location, get a mac list (set)
mac_set = set()
for label in labels:

    #initial set
    set_a = set()
    with open(os.path.join(base_path, label+"_0_0.json"),'r') as f: #file location
        data = json.load(f)
        for d in data:
            d = json.loads(d)
            set_a.add(d['MAC'])

    #for all files that start with label
    for filename in os.listdir(base_path):      
        if filename.startswith(label): #if the file starts with the label
            set_b = set()
            with open(os.path.join(base_path, filename), 'r') as f:
                data = json.load(f)
                for d in data:
                    d = json.loads(d)
                    set_b.add(d['MAC'])
            set_a = set_a.intersection(set_b)
    mac_set = mac_set.union(set_a)
mac_list = list(mac_set)
print("Number of mac addresses: ", len(mac_list))

#3. Now read data and put it into a dataframe
data_list = []
for label in labels:
    #first check the label is valid  
    for i in range(100):
        filename = label+"_0_"+str(i)+".json"
        try:
            with open(os.path.join(base_path, filename), 'r') as f:
                vector = np.full(len(mac_list),-100)
                data = json.load(f)
                for d in data:
                    d = json.loads(d) #for each mac entry
                    if d['MAC'] in mac_list:
                        idx = mac_list.index(d['MAC'])
                        vector[idx] = d['RSSI']
                data_list.append((label[0],vector))
        except:
            continue

#convert to dataframe
df = pd.DataFrame(data_list, columns=['label','vector'])


['5', '3', '0', '6', '7', '4', '2', '1']
Number of labels:  8
Number of mac addresses:  89


In [3]:
df

Unnamed: 0,label,vector
0,5,"[-100, -100, -51, -62, -100, -100, -100, -100,..."
1,5,"[-100, -100, -46, -58, -100, -100, -100, -100,..."
2,5,"[-100, -100, -43, -58, -100, -100, -100, -100,..."
3,5,"[-100, -100, -44, -61, -93, -100, -100, -100, ..."
4,5,"[-100, -100, -45, -61, -92, -100, -100, -100, ..."
...,...,...
695,1,"[-100, -77, -72, -86, -81, -100, -100, -79, -9..."
696,1,"[-100, -77, -75, -87, -80, -100, -100, -77, -9..."
697,1,"[-100, -77, -76, -86, -80, -100, -100, -80, -9..."
698,1,"[-100, -76, -73, -87, -78, -100, -100, -82, -9..."


## Data Visualization

In [4]:
#---------------------------Data Visualization--------------------------------------
#vector is len(mac_set) long, which is a large dimension. So let's use auto encoder to reduce the dimension and visualize the data
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

#standardize the data
scaler = StandardScaler()
vec = df["vector"].values.tolist()
scaled_data = scaler.fit_transform(vec)

#PCA
pca = PCA(n_components=2)
pca_data = pca.fit_transform(scaled_data)

#plot 3d
x = pca_data[:,0]
y = pca_data[:,1]

#import plotly
import plotly.graph_objects as go
import plotly.express as px

fig = px.scatter(x=x, y=y, color=df["label"])
#adjust figure size
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
)
fig.show()

## Classifier

In [9]:
#---------------------------Model Training------------...everything--------------------------
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier


classifier = KNeighborsClassifier(n_neighbors=len(labels))

#split data
X = df["vector"].values.tolist()
y = df["label"].values.tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

#train model
classifier.fit(X_train, y_train)

#predict
pred = classifier.predict(X_test)
#calculate accuracy
acc = accuracy_score(y_test, pred)
print("Accuracy: ", acc)

Accuracy:  1.0


In [6]:
#------------------------------------model validation------------------------------------
#10 samples from X_test, y_test
sample_idx =  np.random.choice(len(X_test), 10, replace=False)
X_sample = [X_test[i] for i in sample_idx]
y_sample = [y_test[i] for i in sample_idx]

#predict
pred = classifier.predict(X_sample)

#plot both ground truth and predict with color coding
x = []
y = []
x2 = []
y2 = []
for i in range(len(pred)):
    x.append(y_sample[i][0])
    y.append(y_sample[i][1])
    x2.append(pred[i][0])
    y2.append(pred[i][1])
#scatter plot, plot each point with different color
colors = [plt.cm.viridis(i) for i in np.linspace(0, 1, len(x))]
plt.scatter(x, y, c=colors)
colors = [plt.cm.viridis(i) for i in np.linspace(0, 1, len(x2))]
plt.scatter(x2, y2, c=colors)

for i, txt in enumerate(y_sample):
    plt.text(x[i], y[i], str(i), fontsize=8)

for i, txt in enumerate(pred):
    plt.text(x2[i], y2[i], str(i), fontsize=8)

plt.show()


IndexError: string index out of range

In [45]:
from client import Client

test_subject = Client("Tester")

#evaluation with separate evaluation data
eval_path = "test_data"
filenames = os.listdir(eval_path)
filenames.sort()
for i in range(16):
    try:
        filename = f"test0_{i}_0.json"
        with open(os.path.join(eval_path, filename), 'r') as f:
            data = json.load(f)
            vector = np.full(len(mac_list),-100)
            #creating vector...
            for d in data:
                d = json.loads(d)
                if d['MAC'] in mac_list:
                    idx = mac_list.index(d['MAC'])
                    vector[idx] = d['RSSI']
                    test_subject.put_vector(vector)
    except:
        print("File not found: ",filename)
        continue

path = []
for i in range(len(test_subject)):
    vec = test_subject.get_vector(i)
    pred = regressor.predict([vec])
    path.append(pred[0])

#use plotly to plot the path
x = []
y = []
for p in path:
    x.append(p[0])
    y.append(p[1])

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y, mode='lines+markers'))
fig.show()


File not found:  test0_10_0.json
