## Pre-processing code
This code will convert the original data file into a csv for the Aruba dataset. Saved as pre_processed_data.csv

In [2]:
import re
from datetime import datetime
import time
import csv
import numpy as np
import pandas as pd

In [13]:
# Open the input file for reading
with open("Raw Data/Aruba_17/data", "r") as f:
    data = f.readlines()

# Create an empty list to store the processed data
processed_data = []

# Possible activities
activities = ["Meal_Preparation", "Relax", "Eating", "Work", "Sleeping", "Wash_Dishes", "Bed_to_Toilet", "Enter_Home", "Leave_Home", "Housekeeping", "Respirate"]

# Loop through each line of the data
for line in data:
    # Split the line into its components
    components = re.split("\s+", line.strip())

    date = components[0]
    time = components[1]
    device_id = components[2]
    device_status = components[3]
    if len(components) > 4:
        activity = components[4]
        activity_status = components[5]

    formatted_date = int(date.replace("-", ""))
    formatted_time = int(time.replace(":", "")[:6] + time.replace(":", "")[7:])

    if device_id.startswith("M"):
        if device_status.startswith("ON"):
            device_status = "ON"
        elif device_status.startswith("OFF"):
            device_status = "OFF"

    # Append the processed data to the list
    if len(components) > 4:
        processed_data.append([formatted_date, formatted_time, device_id, device_status, activity, activity_status])
    else:
        processed_data.append([formatted_date, formatted_time, device_id, device_status, "", ""])

# Write the processed data to a new file
with open("Processed Data/Aruba_17/pre_processed_data.csv", "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Date","Time", "Device ID", "Status", "Activity", "Activity Status"])
    for data in processed_data:
        writer.writerow(data)

## Model Processing Code
The original data is saved in a way that the model cannot use. The model needs the data to be numerical. This code will convert the previous file into a csv that the model can be trained on. Saved as processed_data.csv

In [5]:
from sklearn.preprocessing import LabelEncoder
# Load the processed data file
data = pd.read_csv("Processed Data/Aruba_17/pre_processed_data.csv")

# Encode the following columns: Timestamp,Device ID,Status,Activity,Activity Status

device_id_encoder = LabelEncoder()
status_encoder = LabelEncoder()
activity_encoder = LabelEncoder()
activity_status_encoder = LabelEncoder()

device_id_encoder.fit(data['Device ID'])
status_encoder.fit(data['Status'])
activity_encoder.fit(data['Activity'])
activity_status_encoder.fit(data['Activity Status'])

device_id_mapping = dict(zip(device_id_encoder.classes_, device_id_encoder.transform(device_id_encoder.classes_)))
status_mapping = dict(zip(status_encoder.classes_, status_encoder.transform(status_encoder.classes_)))
activity_mapping = dict(zip(activity_encoder.classes_, activity_encoder.transform(activity_encoder.classes_)))
activity_status_mapping = dict(zip(activity_status_encoder.classes_, activity_status_encoder.transform(activity_status_encoder.classes_)))

data['Device ID'] = device_id_encoder.transform(data['Device ID'])
data['Status'] = status_encoder.transform(data['Status'])
data['Activity'] = activity_encoder.transform(data['Activity'])
data['Activity Status'] = activity_status_encoder.transform(data['Activity Status'])

data.to_csv('Processed Data/Aruba_17/processed_data.csv', index=False)

print("Device ID Mapping:", device_id_mapping)
print("Status Mapping:", status_mapping)
print("Activity Mapping:", activity_mapping)
print("Activity Status Mapping:", activity_status_mapping)

Device ID Mapping: {'D001': 0, 'D002': 1, 'D004': 2, 'M001': 3, 'M002': 4, 'M003': 5, 'M004': 6, 'M005': 7, 'M006': 8, 'M007': 9, 'M008': 10, 'M009': 11, 'M010': 12, 'M011': 13, 'M012': 14, 'M013': 15, 'M014': 16, 'M015': 17, 'M016': 18, 'M017': 19, 'M018': 20, 'M019': 21, 'M020': 22, 'M021': 23, 'M022': 24, 'M023': 25, 'M024': 26, 'M025': 27, 'M026': 28, 'M027': 29, 'M028': 30, 'M029': 31, 'M030': 32, 'M031': 33, 'T001': 34, 'T002': 35, 'T003': 36, 'T004': 37, 'T005': 38}
Status Mapping: {'16': 0, '16.5': 1, '17': 2, '17.5': 3, '18': 4, '18.5': 5, '19': 6, '19.5': 7, '20': 8, '20.5': 9, '21': 10, '21.5': 11, '22': 12, '22.5': 13, '23': 14, '23.5': 15, '24': 16, '24.5': 17, '25': 18, '25.5': 19, '26': 20, '26.5': 21, '27': 22, '27.5': 23, '28': 24, '28.5': 25, '29': 26, '29.5': 27, '30': 28, '30.5': 29, '31': 30, '31.5': 31, '32': 32, '32.5': 33, '33': 34, '33.5': 35, '34': 36, '34.5': 37, '35': 38, '35.5': 39, '36': 40, '36.5': 41, '37': 42, '37.5': 43, '38': 44, '38.5': 45, '39': 46,

## Model Post-processing Code
This code will inport the prediction data from the model. It will then convert the Label Encoded data back into the original labels. Saved as COMPLETE_PREDICTION.csv

In [12]:
def round_and_inverse_transform(value, mapping, encoder):
    max_val = len(mapping) - 1
    rounded_val = round(value)
    clipped_val = min(max(rounded_val, 0), max_val)
    return encoder.inverse_transform([clipped_val])[0]

data = pd.read_csv("Predictions/Aruba_17_prediction_OLD.txt")
data.columns = ['Date', 'Time', 'Device ID', 'Status', 'Activity', 'Activity Status']
print(data.head())

data['Device ID'] = data['Device ID'].apply(round_and_inverse_transform, args=(device_id_mapping, device_id_encoder))
data['Status'] = data['Status'].apply(round_and_inverse_transform, args=(status_mapping, status_encoder))
data['Activity'] = data['Activity'].apply(round_and_inverse_transform, args=(activity_mapping, activity_encoder))
data['Activity Status'] = data['Activity Status'].apply(round_and_inverse_transform, args=(activity_status_mapping, activity_status_encoder))

# save the data to a new file
data.to_csv('Predictions/Aruba_17_completed_prediction_test.txt', index=False)


         Date          Time  Device ID  Status  Activity  Activity Status
0  20101690.0  3.656886e+10       19.0    48.0      12.0              2.0
1  20101512.0  3.318584e+10       19.0    48.0      12.0              2.0
2  20101472.0  3.691396e+10       17.0    48.0      11.0              2.0
3  20101430.0  2.712579e+10       17.0    50.0      12.0              2.0
4  20101244.0  3.607466e+10       16.0    50.0      11.0              2.0
