# Step 1: data engineering

In [None]:
# import the data into a pandas dataframe
import pandas as pd
from datetime import datetime
df = pd.read_csv("dataset.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
df

In [None]:
# query data from the API and store it in a dictionary
import requests
import json

# query data from asset API
devices = ['Asset-T0', 'Asset-T1', 'Asset-T2', 'Ton-M5StickC-0', 'Ton-M5StickC-1', 'Ton-M5StickC-2', 'Ton-M5StickC-3']
base_url = 'http://localhost:5000/api/asset/'
raw_data = {}
for device in devices:
    url = base_url + device + '?mins=100000&rssi=-100'
    resp = requests.get(url)
    data = json.loads(resp.text)
    raw_data[device] = data['data']
print(raw_data)
# Remove devices with no data
not_found_devices = []
for device in raw_data.keys():
    if len(raw_data[device]) == 0:
        not_found_devices.append(device)
for not_found_device in not_found_devices:
    print('Removing ' + not_found_device)
    del raw_data[not_found_device]
print(raw_data)

In [None]:
# re-organize the data into table format
rssi_data = []
idx = 0
for device in raw_data.keys():
    for data in raw_data[device]:
        record = []
        record.append(data['timestamp'])
        record.append(device)
        record.append(data['station'])
        record.append(data['rssi'])
        record.append(0)
        rssi_data.append(record)
print(len(rssi_data), rssi_data)

In [None]:
# import the data into a pandas dataframe
import pandas as pd
from datetime import datetime
df = pd.DataFrame(rssi_data, columns=['timestamp', 'device', 'station', 'rssi', 'label'])
df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
df.shape

In [None]:
# group data by timestamp
station_list = df['station'].unique()
device_list = df['device'].unique()
df_dicts = {}
for station in station_list:
    for device in device_list:
        sub_df = df[(df['station'] == station) & (df['device'] == device)]
        if sub_df.empty:
            continue
        rssi_values = sub_df[['timestamp','rssi']].resample('1min', on='timestamp').mean()      # Mean -> Low Pass Filter
        rssi_values = rssi_values.dropna().reset_index()
        idx = station + '+' + device
        df_dicts[idx] = rssi_values
print(df_dicts)

In [None]:
# export the data to a CSV file with columns station, device, timestamp, rssi
import csv
import random

with open('dataset.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['station', 'device', 'timestamp', 'rssi', 'label'])
    for key in df_dicts.keys():
        station, device = key.split('+')
        for idx, row in df_dicts[key].iterrows():
            writer.writerow([station, device, row['timestamp'], row['rssi'], random.choice([0, 1, 2])])

# Step 2: ML engineering

In [None]:
import csv

# import label data from CSV file
label_data = []
with open('dataset.csv', mode='r') as file:
    reader = csv.reader(file)
    for idx, row in enumerate(reader):
        if idx == 0:
            continue
        label_data.append(row)
print(label_data)

In [None]:
# prepare data into ML training format
import pandas as pd

feature_df = pd.DataFrame(label_data, columns=["station", "device", "timestamp", "rssi", "label"])
feature_df["label"] = feature_df["label"].astype(int)
feature_df["rssi"] = feature_df["rssi"].astype(float)
feature_station1_df = feature_df[feature_df["station"] == "station1"].reset_index(drop=True)
feature_station2_df = feature_df[feature_df["station"] == "station2"].reset_index(drop=True)
feature_station3_df = feature_df[feature_df["station"] == "station3"].reset_index(drop=True)
data_len = min(feature_station1_df.shape[0], feature_station2_df.shape[0], feature_station3_df.shape[0])
rows = []
for index in range(data_len):
    row = [feature_station1_df.loc[index, "rssi"], feature_station1_df.loc[index, "rssi"], feature_station3_df.loc[index, "rssi"], feature_df.loc[index, "label"]]
    rows.append(row)

train_df = pd.DataFrame(rows, columns=["station1", "station2", "station3", "label"])
train_df

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

x = train_df[["station1", "station2", "station3"]]
y = train_df["label"]

model = DecisionTreeClassifier(max_depth=8)
model.fit(x, y)
pred = model.predict(x)
report = classification_report(y, pred)
print(report)

## Assignment

In [None]:
# import the data into a pandas dataframe
import pandas as pd
from datetime import datetime
df = pd.read_csv("dataset.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
df.head()

In [None]:
station_dfs = {}
for station in df["station"].unique():
    station_dfs[station] = df[df["station"] == station]
for station_id in station_dfs.keys():
    station_dfs[station_id] = station_dfs[station_id][["timestamp", "rssi"]].resample("1min", on="timestamp").mean()

station_df = pd.concat(station_dfs.values(), axis=1)
station_df.columns = station_dfs.keys()
station_df.dropna(inplace=True)
station_df["label"] = 0
station_df.head()

In [None]:
def label_create(x):
    return x.idxmax()

column_mapping = {"station1": 0, "station2": 1, "station3": 2}
station_df["label"] = station_df[["station1", "station2", "station3"]].apply(label_create, axis=1).map(column_mapping)
print(station_df)

In [None]:
station_df.to_csv("station_df.csv")

In [None]:
import csv

label_data = []
with open("station_df.csv", mode="r") as file:
    reader = csv.reader(file)
    for idx, row in enumerate(reader):
        if idx == 0:
            continue
        label_data.append(row)
train_df = pd.DataFrame(label_data, columns=["timestamp", "station1", "station2", "station3", "label"])
train_df["station1"] = train_df["station1"].astype(float)
train_df["station2"] = train_df["station2"].astype(float)
train_df["station3"] = train_df["station3"].astype(float)
train_df["label"] = train_df["label"].astype(int)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

x = train_df[["station1", "station2", "station3"]]
y = train_df["label"]

model = DecisionTreeClassifier(max_depth=8)
model.fit(x, y)
pred = model.predict([[-60, -80, -90]])
result = [key for key, val in column_mapping.items() if val == pred]
result