In [5]:
# Martin Konečnik, https://git.siwim.si/machine-learning/fix-qa-binary-classification
# Notebook intended for getting data ready for the main script to process.
import tomllib
from pathlib import Path
from shutil import copy
from typing import Dict

from swm import factory

In [6]:
# Read the configuration file.
with open('conf.toml', 'rb') as f:
    conf = tomllib.load(f)

EVENTS_PATH = Path(conf['event_dir'])
INDEX = conf['channel']

In [7]:
# Get dicts of original and altered events with their size.
is_changed: Dict[str, bool] = {}
total = 0
multiple = 0
data_dir = Path().home() / EVENTS_PATH / 'raw'
data_dir.mkdir(exist_ok=True)
unaltered = {}
corrected = {}

for file in data_dir.glob('**/*.event'):
    vehicles = factory.read_file(file).detected_vehicles
    if len(vehicles) == 1:
        lane = vehicles[0].lane
        if vehicles[0].lane not in unaltered:  # Add a new lane.
            unaltered[lane] = []
            corrected[lane] = []
        if vehicles[0].flags & 0x00080000 != 0:  # Find events with the changed flag set.
            corrected[lane].append(file)
        else:
            unaltered[lane].append(file)
    else:
        multiple += 1

for lane in unaltered:
    print(f'Unaltered lane {lane}: {len(unaltered[lane])}')
for lane in corrected:
    print(f'Corrected lane {lane}: {len(corrected[lane])}')
print(f'Multiple: {multiple}')

Unaltered lane 0: 60309
Unaltered lane 1: 39
Corrected lane 0: 1192
Corrected lane 1: 8
Multiple: 4464


In [8]:
# Copy data to folders split based on classification
data_dir = Path.home() / EVENTS_PATH / 'prepared'
unaltered_dir = data_dir / 'unaltered'
corrected_dir = data_dir / 'corrected'

# Copy the data over
for lane in unaltered:
    folder = unaltered_dir / str(lane)
    folder.mkdir(parents=True)
    for file in unaltered[lane]:
        copy(file, folder)

for lane in corrected:
    folder = corrected_dir / str(lane)
    folder.mkdir(parents=True)
    for file in corrected[lane]:
        copy(file, folder)