# Step 1: data engineering

In [None]:
# query data from the API and store it in a dictionary
import requests
import json

# query data from asset API
devices = ['Asset-0', 'Asset-1', 'Asset-2', 'Asset-3', 'Asset-A', 'Asset-B', 'Asset-C']
base_url = 'http://localhost:5000/api/asset/'
raw_data = {}
for device in devices:
    url = base_url + device + '?mins=100000&rssi=-100'
    resp = requests.get(url)
    data = json.loads(resp.text)
    raw_data[device] = data['data']
print(raw_data)
# Remove devices with no data
not_found_devices = []
for device in raw_data.keys():
    if len(raw_data[device]) == 0:
        not_found_devices.append(device)
for not_found_device in not_found_devices:
    print('Removing ' + not_found_device)
    del raw_data[not_found_device]
print(raw_data)

In [25]:
# re-organize the data into table format
rssi_data = []
idx = 0
for device in raw_data.keys():
    for data in raw_data[device]:
        record = []
        record.append(data['timestamp'])
        record.append(device)
        record.append(data['station'])
        record.append(data['rssi'])
        record.append(0)
        rssi_data.append(record)
print(len(rssi_data), rssi_data)

1076 [['2025-03-17T08:48:32.791000', 'Asset-0', 'ESP32', -55, 0], ['2025-03-17T08:48:14.729000', 'Asset-0', 'ESP32', -57, 0], ['2025-03-17T08:47:53.523000', 'Asset-0', 'ESP32', -65, 0], ['2025-03-17T08:43:19.361000', 'Asset-0', 'esp32', -77, 0], ['2025-03-17T08:43:16.258000', 'Asset-0', 'esp32', -76, 0], ['2025-03-17T08:43:13.322000', 'Asset-0', 'esp32', -72, 0], ['2025-03-17T08:42:55.093000', 'Asset-0', 'esp32', -77, 0], ['2025-03-17T08:42:43.079000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:40.043000', 'Asset-0', 'esp32', -69, 0], ['2025-03-17T08:42:27.964000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:18.885000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:12.905000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:09.844000', 'Asset-0', 'esp32', -75, 0], ['2025-03-17T08:42:03.808000', 'Asset-0', 'esp32', -75, 0], ['2025-03-17T08:41:54.774000', 'Asset-0', 'esp32', -68, 0], ['2025-03-17T08:41:36.605000', 'Asset-0', 'esp32', -69, 0], ['2025-03-17T08:41:27.545000', 'As

In [32]:
# import the data into a pandas dataframe
import pandas as pd
from datetime import datetime
df = pd.read_csv("dataset.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
df

Unnamed: 0,timestamp,station,device,rssi
0,2025-03-17 20:38:21.936000+00:00,station1,Ton-M5StickC-0,-63
1,2025-03-17 20:38:22.962000+00:00,station2,Ton-M5StickC-0,-76
2,2025-03-17 20:38:24.950000+00:00,station1,Ton-M5StickC-0,-73
3,2025-03-17 20:38:26.044000+00:00,station2,Ton-M5StickC-0,-77
4,2025-03-17 20:38:28.021000+00:00,station1,Ton-M5StickC-0,-54
...,...,...,...,...
7843,2025-03-19 19:17:02.107000+00:00,station1,Asset-T1,-47
7844,2025-03-19 19:17:04.660000+00:00,station2,Asset-T1,-82
7845,2025-03-19 19:17:07.627000+00:00,station2,Asset-T1,-81
7846,2025-03-19 19:17:13.798000+00:00,station2,Asset-T1,-85


In [33]:
# group data by timestamp
station_list = df['station'].unique()
device_list = df['device'].unique()
df_dicts = {}
for station in station_list:
    for device in device_list:
        sub_df = df[(df['station'] == station) & (df['device'] == device)]
        if sub_df.empty:
            continue
        rssi_values = sub_df[['timestamp','rssi']].resample('1min', on='timestamp').mean()      # Mean -> Low Pass Filter
        rssi_values = rssi_values.dropna().reset_index()
        idx = station + '+' + device
        df_dicts[idx] = rssi_values
print(df_dicts)

{'station1+Ton-M5StickC-0':                    timestamp       rssi
0  2025-03-17 20:38:00+00:00 -61.222222
1  2025-03-17 20:39:00+00:00 -68.400000
2  2025-03-17 20:40:00+00:00 -62.000000
3  2025-03-17 20:43:00+00:00 -52.500000
4  2025-03-17 20:45:00+00:00 -52.500000
5  2025-03-17 20:46:00+00:00 -65.000000
6  2025-03-17 20:47:00+00:00 -77.000000
7  2025-03-17 20:50:00+00:00 -54.333333
8  2025-03-17 20:51:00+00:00 -67.090909
9  2025-03-17 20:52:00+00:00 -61.666667
10 2025-03-17 20:56:00+00:00 -56.000000
11 2025-03-17 20:57:00+00:00 -65.636364
12 2025-03-17 21:01:00+00:00 -50.000000
13 2025-03-17 21:02:00+00:00 -68.000000
14 2025-03-17 21:03:00+00:00 -74.000000, 'station1+Ton-M5StickC-1':                    timestamp       rssi
0  2025-03-17 20:40:00+00:00 -57.888889
1  2025-03-17 20:41:00+00:00 -70.000000
2  2025-03-17 20:47:00+00:00 -58.300000
3  2025-03-17 20:48:00+00:00 -69.250000
4  2025-03-17 20:52:00+00:00 -52.166667
5  2025-03-17 20:53:00+00:00 -69.916667
6  2025-03-17 20:54:00+0

In [34]:
# export the data to a CSV file with columns station, device, timestamp, rssi
import csv
import random

with open('dataset.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['station', 'device', 'timestamp', 'rssi', 'label'])
    for key in df_dicts.keys():
        station, device = key.split('+')
        for idx, row in df_dicts[key].iterrows():
            writer.writerow([station, device, row['timestamp'], row['rssi'], random.choice([0, 1, 2])])

# Step 2: ML engineering

In [35]:
import csv

# import label data from CSV file
label_data = []
with open('dataset.csv', mode='r') as file:
    reader = csv.reader(file)
    for idx, row in enumerate(reader):
        if idx == 0:
            continue
        label_data.append(row)
print(label_data)

[['station1', 'Ton-M5StickC-0', '2025-03-17 20:38:00+00:00', '-61.22222222222222', '2'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:39:00+00:00', '-68.4', '2'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:40:00+00:00', '-62.0', '2'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:43:00+00:00', '-52.5', '1'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:45:00+00:00', '-52.5', '0'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:46:00+00:00', '-65.0', '1'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:47:00+00:00', '-77.0', '2'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:50:00+00:00', '-54.333333333333336', '2'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:51:00+00:00', '-67.0909090909091', '0'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:52:00+00:00', '-61.666666666666664', '0'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:56:00+00:00', '-56.0', '1'], ['station1', 'Ton-M5StickC-0', '2025-03-17 20:57:00+00:00', '-65.63636363636364', '1'], ['station1', 'Ton-M5StickC-0', '2

In [36]:
# prepare data into ML training format
import pandas as pd

feature_df = pd.DataFrame(label_data, columns=["station", "device", "timestamp", "rssi", "label"])
feature_df["label"] = feature_df["label"].astype(int)
feature_df["rssi"] = feature_df["rssi"].astype(float)
feature_station1_df = feature_df[feature_df["station"] == "station1"].reset_index(drop=True)
feature_station2_df = feature_df[feature_df["station"] == "station2"].reset_index(drop=True)
feature_station3_df = feature_df[feature_df["station"] == "station3"].reset_index(drop=True)
data_len = min(feature_station1_df.shape[0], feature_station2_df.shape[0], feature_station3_df.shape[0])
rows = []
for index in range(data_len):
    row = [feature_station1_df.loc[index, "rssi"], feature_station1_df.loc[index, "rssi"], feature_station3_df.loc[index, "rssi"], feature_df.loc[index, "label"]]
    rows.append(row)

train_df = pd.DataFrame(rows, columns=["station1", "station2", "station3", "label"])
train_df

Unnamed: 0,station1,station2,station3,label
0,-61.222222,-61.222222,-88.111111,2
1,-68.400000,-68.400000,-77.200000,2
2,-62.000000,-62.000000,-81.000000,2
3,-52.500000,-52.500000,-87.000000,1
4,-52.500000,-52.500000,-82.500000,0
...,...,...,...,...
153,-66.800000,-66.800000,-83.937500,0
154,-67.400000,-67.400000,-83.894737,1
155,-65.666667,-65.666667,-83.555556,0
156,-68.000000,-68.000000,-84.058824,1


In [43]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

x = train_df[["station1", "station2", "station3"]]
y = train_df["label"]

model = DecisionTreeClassifier(max_depth=8)
model.fit(x, y)
pred = model.predict(x)
report = classification_report(y, pred)
print(report)

              precision    recall  f1-score   support

           0       0.80      0.77      0.79        48
           1       0.71      0.92      0.80        60
           2       0.94      0.64      0.76        50

    accuracy                           0.78       158
   macro avg       0.82      0.78      0.78       158
weighted avg       0.81      0.78      0.78       158

