Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 38 additions & 12 deletions client/platform/desktop/backend/native/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
import processTrackAttributes from './attributeProcessor';
import { upgrade } from './migrations';
import { getMultiCamUrls, transcodeMultiCam } from './multiCamUtils';
import { splitExt } from './utils';


const ProjectsFolderName = 'DIVE_Projects';
Expand Down Expand Up @@ -564,6 +565,7 @@ async function _ingestFilePath(
settings: Settings,
datasetId: string,
path: string,
imageMap?: Map<string, number>,
): Promise<(DatasetMetaMutable & { fps?: number }) | null> {
if (!fs.existsSync(path)) {
return null;
Expand Down Expand Up @@ -594,7 +596,7 @@ async function _ingestFilePath(
}
} else if (CsvFileName.test(path)) {
// VIAME CSV File
const data = await viameSerializers.parseFile(path);
const data = await viameSerializers.parseFile(path, imageMap);
tracks = data.tracks;
meta.fps = data.fps;
}
Expand Down Expand Up @@ -623,6 +625,7 @@ async function ingestDataFiles(
datasetId: string,
absPaths: string[],
multiCamResults?: Record<string, string>,
imageMap?: Map<string, number>,
): Promise<{
processedFiles: string[];
meta: DatasetMetaMutable & { fps?: number };
Expand All @@ -633,7 +636,7 @@ async function ingestDataFiles(
for (let i = 0; i < absPaths.length; i += 1) {
const path = absPaths[i];
// eslint-disable-next-line no-await-in-loop
const newMeta = await _ingestFilePath(settings, datasetId, path);
const newMeta = await _ingestFilePath(settings, datasetId, path, imageMap);
if (newMeta !== null) {
merge(meta, newMeta);
processedFiles.push(path);
Expand All @@ -647,7 +650,7 @@ async function ingestDataFiles(
const path = cameraAndPath[i][1];
const cameraDatasetId = `${datasetId}/${cameraName}`;
// eslint-disable-next-line no-await-in-loop
const newMeta = await _ingestFilePath(settings, cameraDatasetId, path);
const newMeta = await _ingestFilePath(settings, cameraDatasetId, path, imageMap);
if (newMeta !== null) {
merge(meta, newMeta);
processedFiles.push(path);
Expand Down Expand Up @@ -866,10 +869,31 @@ async function beginMediaImport(
};
}

function validImageNamesMap(jsonMeta: JsonMeta) {
if (jsonMeta.originalImageFiles.length > 0) {
const imageMap = new Map<string, number>();
jsonMeta.originalImageFiles.forEach((imgPath, i) => {
const [imageBaseName] = splitExt(imgPath);
if (imageMap.get(imageBaseName) !== undefined) {
throw new Error([
`An image named ${imageBaseName} was found twice in the dataset,`,
'probably in different folders. DIVE cannot handle this case.',
'Please contact support.',
].join(' '));
}
imageMap.set(imageBaseName, i);
});
return imageMap;
}
return undefined;
}

async function dataFileImport(settings: Settings, id: string, path: string) {
const result = await ingestDataFiles(settings, id, [path]);
const projectDirData = await getValidatedProjectDir(settings, id);
const jsonMeta = await loadJsonMetadata(projectDirData.metaFileAbsPath);
const result = await ingestDataFiles(
settings, id, [path], undefined, validImageNamesMap(jsonMeta),
);
merge(jsonMeta, result.meta);
await _saveAsJson(npath.join(projectDirData.basePath, JsonMetaFileName), jsonMeta);
return result;
Expand All @@ -882,22 +906,24 @@ async function _importTrackFile(
jsonMeta: JsonMeta,
userTrackFileAbsPath: string,
) {
/* custom image sort */
if (jsonMeta.imageListPath === undefined) {
jsonMeta.originalImageFiles.sort(strNumericCompare);
}
if (jsonMeta.transcodedImageFiles) {
jsonMeta.transcodedImageFiles.sort(strNumericCompare);
}
if (userTrackFileAbsPath) {
const processed = await ingestDataFiles(settings, dsId, [userTrackFileAbsPath]);
const processed = await ingestDataFiles(
settings, dsId, [userTrackFileAbsPath], undefined, validImageNamesMap(jsonMeta),
);
merge(jsonMeta, processed.meta);
if (processed.processedFiles.length === 0) {
await _saveSerialized(settings, dsId, {}, true);
}
} else {
await _saveSerialized(settings, dsId, {}, true);
}
/* custom image sort */
if (jsonMeta.imageListPath === undefined) {
jsonMeta.originalImageFiles.sort(strNumericCompare);
}
if (jsonMeta.transcodedImageFiles) {
jsonMeta.transcodedImageFiles.sort(strNumericCompare);
}
await _saveAsJson(npath.join(projectDirAbsPath, JsonMetaFileName), jsonMeta);
return jsonMeta;
}
Expand Down
9 changes: 9 additions & 0 deletions client/platform/desktop/backend/native/utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { spawn } from 'child_process';
import fs from 'fs-extra';
import path from 'path';

import { observeChild } from 'platform/desktop/backend/native/processManager';
import { DesktopJob, DesktopJobUpdater } from 'platform/desktop/constants';

Expand Down Expand Up @@ -62,7 +64,14 @@ Promise<{ output: null | string; exitCode: number | null; error: string}> {
});
}

/* same as os.path.splitext */
function splitExt(input: string): [string, string] {
const ext = path.extname(input);
return [path.basename(input, ext), ext];
}

export {
jobFileEchoMiddleware,
spawnResult,
splitExt,
};
35 changes: 31 additions & 4 deletions client/platform/desktop/backend/serializers/viame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { pipeline, Readable, Writable } from 'stream';

import { MultiTrackRecord } from 'dive-common/apispec';
import { JsonMeta } from 'platform/desktop/constants';

import { splitExt } from 'platform/desktop/backend/native/utils';
// Imports that involve actual code require relative imports because ts-node barely works
// https://github.com/TypeStrong/ts-node/issues/422
import Track, {
Expand Down Expand Up @@ -236,14 +236,15 @@ function _parseFeature(row: string[]) {
};
}

async function parse(input: Readable): Promise<AnnotationFileData> {
async function parse(input: Readable, imageMap?: Map<string, number>): Promise<AnnotationFileData> {
const parser = csvparser({
delimiter: ',',
// comment lines may not have the correct number of columns
relaxColumnCount: true,
});
let fps: number | undefined;
const dataMap = new Map<number, TrackData>();
let reordered = false;

return new Promise<AnnotationFileData>((resolve, reject) => {
pipeline(input, parser, (err) => {
Expand All @@ -261,6 +262,23 @@ async function parse(input: Readable): Promise<AnnotationFileData> {
const {
rowInfo, feature, trackAttributes, confidencePairs,
} = _parseFeature(record);

if (imageMap !== undefined) {
// validate image ordering if the imageMap is provided.
const [imageName] = splitExt(rowInfo.filename);
const expectedFrameNumber = imageMap.get(imageName);
if (expectedFrameNumber === undefined) {
throw new Error(
`encountered annotation for image not found in dataset: ${rowInfo.filename}`,
);
} else if (expectedFrameNumber !== feature.frame) {
// force reorder the annotations
reordered = true;
feature.frame = expectedFrameNumber;
rowInfo.frame = expectedFrameNumber;
}
}

let track = dataMap.get(rowInfo.trackId);
if (track === undefined) {
// Create new track if not exists in map
Expand All @@ -274,6 +292,11 @@ async function parse(input: Readable): Promise<AnnotationFileData> {
features: [],
};
dataMap.set(rowInfo.trackId, track);
} else if (reordered) {
// trackId was already in dataMap, so the track has more than 1 detection.
throw new Error(
'annotations were provided in an unexpected order and dataset contains multi-frame tracks',
);
}
track.begin = Math.min(rowInfo.frame, track.begin);
track.end = Math.max(rowInfo.frame, track.end);
Expand All @@ -286,6 +309,9 @@ async function parse(input: Readable): Promise<AnnotationFileData> {
track.attributes[key] = val;
});
} catch (err) {
if (!(err instanceof Error)) {
throw new Error(`Caught unexpected error ${err}`);
}
if (err.toString().includes('comment row')) {
// parse comment row
fps = fps || parseCommentRow(record).fps;
Expand All @@ -303,9 +329,10 @@ async function parse(input: Readable): Promise<AnnotationFileData> {
});
}

async function parseFile(path: string): Promise<AnnotationFileData> {
async function parseFile(path: string, imageMap?: Map<string, number>):
Promise<AnnotationFileData> {
const stream = fs.createReadStream(path);
return parse(stream);
return parse(stream, imageMap);
}

async function writeHeader(writer: Writable, meta: JsonMeta) {
Expand Down
9 changes: 9 additions & 0 deletions server/dive_server/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,12 @@ def unwrapItem(item1, item2):
images,
key=functools.cmp_to_key(unwrapItem),
)


def valid_image_names_dict(images: List[GirderModel]):
"""Get a map of image names (without extension) to frame numbers"""
imageNameMap = {}
for i, image in enumerate(images):
imageName, _ = os.path.splitext(image['name'])
imageNameMap[imageName] = i
return imageNameMap
13 changes: 10 additions & 3 deletions server/dive_server/crud_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,9 @@ def run_training(


def _get_data_by_type(
file: Optional[types.GirderModel], as_type: Optional[crud.FileType] = None
file: types.GirderModel,
image_map: Optional[Dict[str, int]] = None,
as_type: Optional[crud.FileType] = None,
) -> Tuple[Optional[crud.FileType], dict, dict]:
"""
Given an arbitrary Girder file model, figure out what kind of file it is and
Expand Down Expand Up @@ -295,7 +297,9 @@ def _get_data_by_type(

# Parse the file as the now known type
if as_type == crud.FileType.VIAME_CSV:
tracks, attributes = viame.load_csv_as_tracks_and_attributes(file_string.splitlines())
tracks, attributes = viame.load_csv_as_tracks_and_attributes(
file_string.splitlines(), image_map
)
return as_type, tracks, attributes

# All filetypes below are JSON, so if as_type was specified, it needs to be loaded.
Expand Down Expand Up @@ -332,7 +336,10 @@ def process_items(folder: types.GirderModel, user: types.GirderUserModel):
raise RestException('Item had no associated files')

try:
filetype, data, attrs = _get_data_by_type(file)
image_map = None
if fromMeta(folder, constants.TypeMarker) == 'image-sequence':
image_map = crud.valid_image_names_dict(crud.valid_images(folder, user))
filetype, data, attrs = _get_data_by_type(file, image_map=image_map)
except Exception as e:
Item().remove(item)
raise RestException(f'{file["name"]} was not valid JSON or CSV: {e}') from e
Expand Down
49 changes: 40 additions & 9 deletions server/dive_utils/serializers/viame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime
import io
import json
import os
import re
from typing import Any, Dict, Generator, List, Tuple, Union

Expand Down Expand Up @@ -166,7 +167,7 @@ def _parse_row(row: List[str]) -> Tuple[Dict, Dict, Dict, List]:

def _parse_row_for_tracks(row: List[str]) -> Tuple[Feature, Dict, Dict, List]:
head_tail_feature, attributes, track_attributes, confidence_pairs = _parse_row(row)
trackId, filename, frame, bounds, fishLength = row_info(row)
_, _, frame, bounds, fishLength = row_info(row)

feature = Feature(
frame=frame,
Expand Down Expand Up @@ -233,31 +234,61 @@ def calculate_attribute_types(
metadata_attributes[attributeKey]['datatype'] = attribute_type


def load_csv_as_tracks_and_attributes(rows: List[str]) -> Tuple[dict, dict]:
def load_csv_as_tracks_and_attributes(
rows: List[str], imageMap: Dict[str, int] = None
) -> Tuple[dict, dict]:
"""
Convert VIAME CSV to json tracks.
Expect detections to be in increasing order (either globally or by track).
Convert VIAME CSV to json tracks

:param rows: string rows of a VIAME CSV file
:param imageMap: map of image names to frame numbers. keys do NOT include file extension
"""
reader = csv.reader(row for row in rows if (not row.startswith("#") and row))
reader = csv.reader(row for row in rows)
tracks: Dict[int, Track] = {}
metadata_attributes: Dict[str, Dict[str, Any]] = {}
test_vals: Dict[str, Dict[str, int]] = {}
reordered = False

for row in reader:
if len(row) == 0 or row[0].startswith('#'):
# This is not a data row
continue
(
feature,
attributes,
track_attributes,
confidence_pairs,
) = _parse_row_for_tracks(row)

trackId, _, frame, _, _ = row_info(row)
trackId, imageFile, _, _, _ = row_info(row)

if imageMap:
# validate image ordering if the imageMap is provided
imageName, _ = os.path.splitext(os.path.basename(imageFile))
expectedFrameNumber = imageMap.get(imageName)
if expectedFrameNumber is None:
raise ValueError(
f'encountered annotation for image not found in dataset: {imageFile}'
)
elif expectedFrameNumber is not feature.frame:
# force reorder the annotations
reordered = True
feature.frame = expectedFrameNumber

if trackId not in tracks:
tracks[trackId] = Track(begin=frame, end=frame, trackId=trackId)
tracks[trackId] = Track(begin=feature.frame, end=feature.frame, trackId=trackId)
elif reordered:
# trackId was already in tracks, so the track consists of multiple frames
raise ValueError(
(
'images were provided in an unexpected order '
'and dataset contains multi-frame tracks. '
)
)

track = tracks[trackId]
track.begin = min(frame, track.begin)
track.end = max(track.end, frame)
track.begin = min(feature.frame, track.begin)
track.end = max(track.end, feature.frame)
track.features.append(feature)
track.confidencePairs = confidence_pairs

Expand Down