In [3]:
import sys
#sys.path.append('')
#print(sys.path)

from AretasPythonAPI.api_config import *
from AretasPythonAPI.sensor_data_query import *
from AretasPythonAPI.auth import *
from AretasPythonAPI.aretas_client import *
from AretasPythonAPI.data_classifier import DataClassifierCRUD
from AretasPythonAPI.labelled_data_query import LabelledDataQuery
from AretasPythonAPI.sensor_type_info import *
from AretasPythonAPI.utils import Utils as AUtils

import pandas as pd
from datetime import datetime
import logging
import math


C:\Users\aretas\Documents\GitHub\CANBusMiddleware


In [4]:
config = APIConfig()
auth = APIAuth(config)
client = APIClient(auth)

# we will almost always need the sensortypeinfo class
sensor_type_info = APISensorTypeInfo(auth)

# even if we don't need it right away, it's good practice to fetch the client location view
client_location_view = client.get_client_location_view()

data_classifier_crud = DataClassifierCRUD(auth)
labelled_data_query = LabelledDataQuery(auth)

data_classifiers = data_classifier_crud.list()

for data_classifier in data_classifiers:
    print("Description: {0} Label: {1} Id: {2}".format(
        data_classifier['description'],
        data_classifier['label'],
        data_classifier['id']))

Description: Amperage Testing Label: amptest Id: d19359107b474399bb4c9a9ff61ef3dd
Description: Discharge Label: discharge Id: d8f903be53f546ad95d80d58c4f9759d
Description: Charge Label: charge Id: f1714cd045a14ab6b3ae01d32c2488df


In [12]:
labelled_data_amp_test = labelled_data_query.get_labelled_data("d19359107b474399bb4c9a9ff61ef3dd")

print(len(labelled_data_amp_test))

print(labelled_data_amp_test[:20])

1299
[{'key': 1701984714689, 'value': {'514': 46.56, '515': 63.8, '516': -1.5, '518': 372.0, '550': 13.59, '520': 17.0, '521': 18.0, '522': 16.0, '523': 17.0, '525': 100.0}}, {'key': 1701984734036, 'value': {'514': 46.56, '515': 63.8, '516': -1.5, '518': 372.5, '550': 13.56, '520': 17.0, '521': 17.0, '522': 16.0, '523': 17.0, '525': 100.0}}, {'key': 1701984749119, 'value': {'514': 46.56, '515': 63.8, '516': -1.5, '518': 372.0, '550': 13.56, '520': 17.0, '521': 17.0, '522': 16.0, '523': 17.0, '525': 100.0}}, {'key': 1701984773185, 'value': {'514': 46.56, '515': 63.8, '516': -1.5, '518': 372.0, '550': 13.53, '520': 17.0, '521': 17.0, '522': 16.0, '523': 17.0, '525': 100.0}}, {'key': 1701984792435, 'value': {'514': 46.56, '515': 63.8, '516': -2.0, '518': 372.0, '550': 13.56, '520': 17.0, '521': 17.0, '522': 16.0, '523': 17.0, '525': 100.0}}, {'key': 1701984814412, 'value': {'514': 46.56, '515': 63.8, '516': -2.0, '518': 372.0, '550': 13.55, '520': 17.0, '521': 17.0, '522': 16.0, '523': 17

In [6]:
def reshape_dataset_clean(dataset: dict, can_cols: list):
    """
    We are building a list of rows for pandas, however, we need to account for missing values
    As such, we should either discard the datum or let pandas/numpy fill it in for us 
    the canonical definition of 'what columns belong in the rows' should be defined beforehand
    """
    count = 0
    data = []
    for datum in dataset:
        row = []
        timestamp = datum['key']
        row.append(int(timestamp))
        for sensor_type in can_cols:
            data_dict_keyset = [int(i) for i in datum['value'].keys()]
            if sensor_type in data_dict_keyset:
                data_value = float(datum['value'][str(sensor_type)])
            else:
                data_value = float("NaN")

            count += 1
            row.append(data_value)
            
        data.append(row)

    return data

def get_columns(dataset: dict) -> list:
    """
    get the distinct columns from the dataset to use for indexing
    """
    cols = set[int]()
    for datum in dataset:
        dict_data = datum['value']
        sensor_types = [int(i) for i in dict_data.keys()]
        for k in sensor_types:
            cols.add(k)

    return sorted(cols)


sensor_types_set = get_columns(labelled_data_amp_test)
print(sensor_types_set)

data_amperage_reshaped = reshape_dataset_clean(labelled_data_amp_test, sensor_types_set)
print(np.array(data_amperage_reshaped).shape)


[514, 515, 516, 518, 520, 521, 522, 523, 525, 550]
(1299, 11)


In [9]:
# get the labels from the sensor metadata service
columns = sensor_type_info.get_labels(sensor_types_set)

# prepend the timestamp column label
columns.insert(0, 'timestamp')
print(columns)

numeric_columns = list(sensor_types_set)
numeric_columns = [str(i) for i in numeric_columns]
numeric_columns.insert(0, 'timestamp')
print(numeric_columns)

['timestamp', 'HV Bat State of Charge %', 'HV Bat Capacity Ah', 'HV Bat Current A', 'HV Bat Voltage V', 'HV Bat Temp °C', 'HV Bat Temp 2 °C', 'HV Bat Temp 3 °C', 'HV Bat Temp 4 °C', 'HV Bat SoH %', 'LV Supply V']
['timestamp', '514', '515', '516', '518', '520', '521', '522', '523', '525', '550']


In [11]:
df_amperage = pd.DataFrame((data_amperage_reshaped), columns=numeric_columns, dtype='float32')

df_amperage.head(10)

Unnamed: 0,timestamp,514,515,516,518,520,521,522,523,525,550
0,1701985000000.0,46.560001,63.799999,-1.5,372.0,17.0,18.0,16.0,17.0,100.0,13.59
1,1701985000000.0,46.560001,63.799999,-1.5,372.5,17.0,17.0,16.0,17.0,100.0,13.56
2,1701985000000.0,46.560001,63.799999,-1.5,372.0,17.0,17.0,16.0,17.0,100.0,13.56
3,1701985000000.0,46.560001,63.799999,-1.5,372.0,17.0,17.0,16.0,17.0,100.0,13.53
4,1701985000000.0,46.560001,63.799999,-2.0,372.0,17.0,17.0,16.0,17.0,100.0,13.56
5,1701985000000.0,46.560001,63.799999,-2.0,372.0,17.0,17.0,16.0,17.0,100.0,13.55
6,1701985000000.0,46.560001,63.799999,-2.0,372.0,17.0,17.0,16.0,17.0,100.0,13.51
7,1701985000000.0,46.560001,63.799999,-1.5,372.0,17.0,17.0,16.0,17.0,100.0,13.58
8,1701985000000.0,46.560001,63.799999,-2.0,372.0,17.0,17.0,16.0,17.0,100.0,13.55
9,1701985000000.0,46.560001,63.799999,-2.0,372.0,17.0,17.0,16.0,17.0,100.0,13.55
