In [24]:
import json
import os.path
import datetime

In [25]:
DATA_JUNKS = [
    {
        'prefix': '2022-Q4',
        'first_date': '2022-10-01',
        'last_date': '2022-12-31'
    },
    {
        'prefix': '2023-Q1',
        'first_date': '2023-01-01',
        'last_date': '2023-03-31'
    },
    {
        'prefix': '2023-Q2',
        'first_date': '2023-04-01',
        'last_date': '2023-06-30'
    },
    {
        'prefix': '2023-Q3',
        'first_date': '2023-07-01',
        'last_date': '2023-09-30'
    },    
    {
        'prefix': '2023-Q4',
        'first_date': '2023-10-01',
        'last_date': '2023-12-31'
    },
]

SENSOR_NAMES = ['Arkaden', 'FuZo_Eugen-Jaekle-Platz', 'FuZo_Olgastr', 'Brenzstr', 'Knoepfle_Nord', 'Knoepfle_Ost']

PATH_RAW_DATA = '../raw-data/'

VALID_VALUE_KEYS = ['bicycle.in', 'bicycle.out', 'person.in', 'person.out']

## Test if all files are present

In [26]:
for sensorName in SENSOR_NAMES:
    for dataJunk in DATA_JUNKS:
        
        fileName = "{0}-{1}.json".format(dataJunk['prefix'], sensorName)
        if not os.path.exists(PATH_RAW_DATA + fileName):
            print("Error! Could not find file: " + fileName)

## Create filter methods

Filter methods get the singleDataPoint element as first parameter and some filter criterians on further parameters:

```json
{
    "datetime": "2024-01-01T23:30:00+00:00",
    "value": "0",
    "valueKey": "van.out"
}

```

In [27]:
def filterDate(dataPoint, firstDate, lastDate):
    firstD = datetime.date.fromisoformat(firstDate)
    lastD = datetime.date.fromisoformat(lastDate)

    dataPointDateTime = datetime.datetime.strptime(dataPoint['datetime'][:10], '%Y-%m-%d')
    dataPointDate = dataPointDateTime.date()
    
    if dataPointDate < firstD:
        return False
    
    if dataPointDate > lastD:
        return False

    return True

def filterValueKey(dataPoint, allowedValueKeys):
    return dataPoint['valueKey'] in allowedValueKeys

**Test Filter**

In [28]:
singleDataPoints = [{
        "datetime": "2024-01-01T23:30:00+00:00",
        "value": "7",
        "valueKey": "bicycle.in"
    },
    {
        "datetime": "2024-01-02T23:30:00+00:00",
        "value": "2",
        "valueKey": "motorbike.in"
    },
    {
        "datetime": "2024-01-03T23:30:00+00:00",
        "value": "5",
        "valueKey": "bus.out"
    }
]

filter_WithoutEntries = [d for d in singleDataPoints if filterDate(d, "2020-01-02", "2020-01-03")] 
print("Should find no entries for date:", filter_WithoutEntries)

filter_LastOfQuarter = [d for d in singleDataPoints if filterDate(d, "2023-12-03", "2024-01-01")] 
print("Filter date First of Data:", filter_LastOfQuarter)

filter_FirstOfQuarter = [d for d in singleDataPoints if filterDate(d, "2024-01-03", "2024-01-06")] 
print("Filter date Last of Data:", filter_FirstOfQuarter)

filter_ValueKey = list(filter(lambda entry : filterValueKey(entry, ['bus.out']), singleDataPoints))
print("Filter all 'bus.out'", filter_ValueKey)

filter_ValueKey_Empty = list(filter(lambda entry : filterValueKey(entry, ['foo.bar']), singleDataPoints))
print("Filter all 'foo.bar'", filter_ValueKey_Empty)



Should find no entries for date: []
Filter date First of Data: [{'datetime': '2024-01-01T23:30:00+00:00', 'value': '7', 'valueKey': 'bicycle.in'}]
Filter date Last of Data: [{'datetime': '2024-01-03T23:30:00+00:00', 'value': '5', 'valueKey': 'bus.out'}]
Filter all 'bus.out' [{'datetime': '2024-01-03T23:30:00+00:00', 'value': '5', 'valueKey': 'bus.out'}]
Filter all 'foo.bar' []


## Create map methods


In [29]:
def mapSingleDataPointsToDateFormat(singleDataPoints, allowedValueKeys):
    mappedDates = {}

    for singleDataPoint in singleDataPoints:

        key = singleDataPoint['valueKey']

        if key not in allowedValueKeys:
            continue

        datetimeRoundedToMinute = datetime.datetime.strptime(singleDataPoint['datetime'][:16], '%Y-%m-%dT%H:%M')
        datetimeString = datetimeRoundedToMinute.strftime("%Y-%m-%d %H:%M:%S")

        if datetimeString not in mappedDates:
            emptyValues = { 
                "datetime": datetimeString,
                "timestamp": datetime.datetime.timestamp(datetimeRoundedToMinute)
            }
            for allowedKey in allowedValueKeys:
                emptyValues[allowedKey] = 0
            mappedDates[datetimeString] = emptyValues
        
        mappedDates[datetimeString][key] = singleDataPoint["value"]

    return mappedDates


**Test map methods:**

In [30]:

print(mapSingleDataPointsToDateFormat(singleDataPoints, ['bus.out', 'motorbike.in']))

{'2024-01-02 23:30:00': {'datetime': '2024-01-02 23:30:00', 'timestamp': 1704234600.0, 'bus.out': 0, 'motorbike.in': '2'}, '2024-01-03 23:30:00': {'datetime': '2024-01-03 23:30:00', 'timestamp': 1704321000.0, 'bus.out': '5', 'motorbike.in': 0}}


## Process Files

In [31]:
fileData = {}

for sensorName in SENSOR_NAMES:

    sensorData = {
        "id": None,
        "name": None,
        "latitude": None,
        "longitude": None,
        "labelIdentifier": None,
        "groupIdentifier": None,
        "singleDataPoints": {}        
    }

    for dataJunk in DATA_JUNKS:
        
        fileName = "{0}-{1}.json".format(dataJunk['prefix'], sensorName)
        with open(PATH_RAW_DATA + fileName) as jsonFile:
            jsonContent = json.load(jsonFile)
            data = jsonContent["data"][0]

            if sensorData["id"] is None:
                sensorData["id"] = data["id"]
                sensorData["name"] = data["name"]
                sensorData["latitude"] = data["latitude"]
                sensorData["longitude"] = data["longitude"]
                sensorData["labelIdentifier"] = data["labelIdentifier"]
                sensorData["groupIdentifier"] = data["groupIdentifier"]

            dataPointsInDateRange = list(filter(lambda dataPoint: filterDate(dataPoint, dataJunk['first_date'], dataJunk['last_date']), data['singleDataPoints']))
            
            mappedDataPoints = mapSingleDataPointsToDateFormat(dataPointsInDateRange, VALID_VALUE_KEYS)
                
            sensorData['singleDataPoints'] = {**sensorData['singleDataPoints'], **mappedDataPoints}

    fileData[sensorName] = sensorData

In [32]:
for sensorName, sensorData in fileData.items():
    with open(sensorName + '.json', 'w') as fp:
        json.dump(sensorData, fp, indent=2)


### Validate if all values are present in any sensor

In [33]:
allDatesSet = set()

for sensorName, sensorData in fileData.items():
    datesOfSensor = list(sensorData['singleDataPoints'].keys())
    allDatesSet.update(datesOfSensor)

missingDatesForSensor = {}

for sensorName, sensorData in fileData.items():
    datesOfSensor = list(sensorData['singleDataPoints'].keys())

    missingDatesForSensor[sensorName] = []

    for date in allDatesSet:
        if date not in datesOfSensor:
            missingDatesForSensor[sensorName].append(date)

    missingDatesForSensor[sensorName].sort()

for sensorName, missingDates in missingDatesForSensor.items():
    print("{0}: {1}".format(sensorName, len(missingDates)))

with open('missing_sensor_values.json', 'w') as fp:
    json.dump(missingDatesForSensor, fp, indent=2)

Arkaden: 567
FuZo_Eugen-Jaekle-Platz: 257
FuZo_Olgastr: 446
Brenzstr: 253
Knoepfle_Nord: 13
Knoepfle_Ost: 20
