Skip to content

Commit

Permalink
adding juritools informations in document collection from ner response (
Browse files Browse the repository at this point in the history
#104)

* adding juritools informations in document collection from ner response

* renaming versions field in documentModel to nlpVersions

* adding nlpVersions on expert

* exporting nlpVersion in treatments

* rename nlpVersion to version

* adding sder nlp version commit
  • Loading branch information
Bouba-cassation committed Jun 21, 2024
1 parent 5ac69f1 commit 31afe6c
Show file tree
Hide file tree
Showing 20 changed files with 205 additions and 6 deletions.
16 changes: 15 additions & 1 deletion packages/courDeCassation/src/annotator/fetcher/api/nlpApiType.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { labelTreatmentsType } from '@label/backend';
import { documentType, settingsType } from '@label/core';

export type { nlpApiType, nlpResponseType, nlpLossType };
export type { nlpApiType, nlpResponseType, nlpLossType, nlpVersion };

type nlpApiType = {
fetchNlpAnnotations: (
Expand All @@ -14,6 +14,19 @@ type nlpApiType = {
) => Promise<nlpLossType>;
};

type nlpVersionDetails = {
version: string;
date: string;
};
type nlpVersion = {
juriSpacyTokenizer: nlpVersionDetails;
juritools: nlpVersionDetails;
pseudonymisationApi: nlpVersionDetails;
model: {
name: string;
};
};

type nlpResponseType = {
entities: nlpAnnotationType[];
checklist: string[];
Expand All @@ -22,6 +35,7 @@ type nlpResponseType = {
additionalTermsToAnnotate?: string[];
additionalTermsToUnAnnotate?: string[];
additionalTermsParsingFailed?: boolean;
versions: nlpVersion;
};

type nlpAnnotationType = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@ import { documentModule } from '@label/core';
import { nlpResponseType } from '../api';
import { nlpMapper } from './nlpMapper';

const nlpVersion = {
juriSpacyTokenizer: {
version: '0.13.21',
date: '2024-01-01 12:00:00',
},
juritools: {
version: '0.13.21',
date: '2024-01-01 12:00:00',
},
pseudonymisationApi: {
version: '0.13.21',
date: '2024-01-01 12:00:00',
},
model: {
name: 'new_categories_model.pt',
},
};

const nlpAnnotations: nlpResponseType = {
entities: [
{
Expand All @@ -24,6 +42,7 @@ const nlpAnnotations: nlpResponseType = {
},
],
checklist: ['CHECK 1', 'CHECK 2'],
versions: nlpVersion,
};

const nlpAnnotationsWithAdditionalTerms: nlpResponseType = {
Expand All @@ -49,6 +68,7 @@ const nlpAnnotationsWithAdditionalTerms: nlpResponseType = {
],
checklist: ['CHECK 1', 'CHECK 2'],
additionalTermsToUnAnnotate: ['blabla', 'toto'],
versions: nlpVersion,
};

const document = documentModule.generator.generate({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function buildNlpFetcher(nlpApiBaseUrl: string | undefined) {
),
additionalTermsParsingFailed:
nlpAnnotations.additionalTermsParsingFailed,
version: nlpAnnotations.versions,
};
},
async fetchLossOfDocument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ import { nlpResponseType } from '../../fetcher/api';
export { nlpAnnotationsGenerator };

const nlpAnnotationsGenerator: generatorType<nlpResponseType> = {
generate: ({ entities, checklist } = {}) => ({
generate: ({ entities, checklist, versions } = {}) => ({
entities: entities ? entities : generateRandomNlpAnnotations(),
checklist: checklist ? checklist : [],
versions: versions ? versions : generateRandomNlpVersion(),
}),
};

Expand All @@ -29,3 +30,24 @@ function generateRandomNlpAnnotation() {
};
return entity;
}

function generateRandomNlpVersion() {
const nlpVersion = {
juriSpacyTokenizer: {
version: `VERSION_${Math.random()}`,
date: `DATE_${Math.random()}`,
},
juritools: {
version: `VERSION_${Math.random()}`,
date: `DATE_${Math.random()}`,
},
pseudonymisationApi: {
version: `VERSION_${Math.random()}`,
date: `DATE_${Math.random()}`,
},
model: {
name: `MODEL_${Math.random()}`,
},
};
return nlpVersion;
}
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ async function mapCourtDecisionToDocument(
title,
text: sderCourtDecision.originalText,
zoning: zoning,
nlpVersions: {} as documentType['nlpVersions'],
});
}
function getNumberPrefix(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type annotatorConfigType = {
newCategoriesToUnAnnotate?: string[];
computedAdditionalTerms?: documentType['decisionMetadata']['computedAdditionalTerms'];
additionalTermsParsingFailed?: boolean;
version: documentType['nlpVersions'];
}>;
fetchLossOfDocument: (
document: documentType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ function buildFakeAnnotatorConfig(): annotatorConfigType {
documentId: document._id,
});

return { annotations, documentId: document._id, report };
const version = documentModule.generator.generate().nlpVersions;

return { annotations, documentId: document._id, report, version };
},
async fetchLossOfDocument() {
return 0;
Expand Down
2 changes: 2 additions & 0 deletions packages/generic/backend/src/lib/annotator/buildAnnotator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,13 @@ function buildAnnotator(
newCategoriesToUnAnnotate,
computedAdditionalTerms,
additionalTermsParsingFailed,
version,
} = await annotatorConfig.fetchAnnotationOfDocument(settings, document);
logger.log({
operationName: 'annotateDocument',
msg: 'NLP annotation succeeded',
});
documentService.updateDocumentNlpVersions(documentId, version);

if (document.route == 'simple' && annotations.length == 0) {
await documentService.updateDocumentRoute(documentId, 'automatic');
Expand Down
4 changes: 3 additions & 1 deletion packages/generic/backend/src/lib/exporter/buildExporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,12 @@ function buildExporter(
const anonymizer = buildAnonymizer(settingsForDocument, annotations, seed);

try {
const version = document.nlpVersions;
// check treatments in concat lib source if nlp set nlpVersions in labelTreatments
await exporterConfig.sendDocumentPseudonymisationAndTreatments({
externalId: document.externalId,
pseudonymizationText: anonymizer.anonymizeDocument(document).text,
labelTreatments: treatmentModule.lib.concat(treatments),
labelTreatments: treatmentModule.lib.concat(treatments, version),
});

await statisticService.saveStatisticsOfDocument(document, settings);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type labelTreatmentsType = Array<{
start: number;
text: string;
}>;
version?: documentType['nlpVersions'];
source: string;
order: number;
}>;
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ const buildDocumentRepository = buildRepositoryBuilder<
.toArray();
},

async updateNlpVersionsById(_id, nlpVersions) {
await collection.updateOne({ _id }, { $set: { nlpVersions } });
const updatedDocument = await collection.findOne({ _id });
return updatedDocument || undefined;
},

async updateLossById(_id, loss) {
await collection.updateOne({ _id }, { $set: { loss } });
const updatedDocument = await collection.findOne({ _id });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,25 @@ const buildFakeDocumentRepository = buildFakeRepositoryBuilder<
return documents;
},

async updateNlpVersionsById(_id, nlpVersions) {
updateFakeCollection(
collection,
collection.map((document) =>
idModule.lib.equalId(_id, document._id)
? {
...document,
nlpVersions,
}
: document,
),
);
const updatedDocument = collection.find((document) =>
idModule.lib.equalId(_id, document._id),
);

return updatedDocument;
},

async updateLossById(_id, loss) {
updateFakeCollection(
collection,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ type customDocumentRepositoryType = {
_id: idType,
loss: documentType['loss'],
) => Promise<documentType | undefined>;
updateNlpVersionsById: (
_id: idType,
nlpVersions: documentType['nlpVersions'],
) => Promise<documentType | undefined>;
updateAdditionalTermsParsingFailed: (
_id: idType,
additionalTermsParsingFailed: documentType['decisionMetadata']['additionalTermsParsingFailed'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import { updateDocumentComputedAdditionalTerms } from './updateDocumentComputedA
import { updateDocumentStatus } from './updateDocumentStatus';
import { fetchAllImporters } from './fetchAllImporters';
import { updateDocumentAdditionalTermsParsingFailed } from './updateDocumentAdditionalTermsParsingFailed';
import { updateDocumentNlpVersions } from './updateDocumentNlpVersions';

export { buildDocumentService, documentService };

Expand Down Expand Up @@ -109,5 +110,6 @@ function buildDocumentService() {
updateDocumentCategoriesToOmit,
updateDocumentComputedAdditionalTerms,
updateDocumentAdditionalTermsParsingFailed,
updateDocumentNlpVersions,
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { documentModule } from '@label/core';
import { buildDocumentRepository } from '../../repository';
import { updateDocumentNlpVersions } from './updateDocumentNlpVersions';
describe('updateDocumentNlpVersions', () => {
const documentRepository = buildDocumentRepository();

it('should update document nlpVersions', async () => {
const document = documentModule.generator.generate();
await documentRepository.insert(document);

const nlpVersionsMock = {
juriSpacyTokenizer: {
version: 'VERSION',
date: 'DATE',
},
juritools: {
version: 'VERSION',
date: 'DATE',
},
pseudonymisationApi: {
version: 'VERSION',
date: 'DATE',
},
model: {
name: 'MODEL',
},
};

const updatedDocument = await updateDocumentNlpVersions(
document._id,
nlpVersionsMock,
);

expect(updatedDocument).toEqual(nlpVersionsMock);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { errorHandlers } from 'sder-core';
import { documentType, idModule } from '@label/core';
import { buildDocumentRepository } from '../../repository';

export { updateDocumentNlpVersions };

async function updateDocumentNlpVersions(
_id: documentType['_id'],
nlpVersions: documentType['nlpVersions'],
) {
const documentRepository = buildDocumentRepository();
const updatedDocument = await documentRepository.updateNlpVersionsById(
_id,
nlpVersions,
);
if (!updatedDocument) {
throw errorHandlers.notFoundErrorHandler.build(
`The document ${idModule.lib.convertToString(
_id,
)} was not found in the document collection`,
);
}
return nlpVersions;
}
2 changes: 1 addition & 1 deletion packages/generic/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"bcryptjs": "^2.4.3",
"lodash": "^4.17.21",
"mongodb": "^3.6.1",
"sder": "https://github.com/Cour-de-cassation/sder#5333e7ac962fe80850cddbd3cd83b73fbb4fd656",
"sder": "https://github.com/Cour-de-cassation/sder#68f786a46e2e8d2aa45dbf26641e98567c152730",
"string-template": "^1.0.0",
"typescript": "~4.0.0"
},
Expand Down
38 changes: 38 additions & 0 deletions packages/generic/core/src/modules/document/documentType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,44 @@ const documentModelCommonFields = {
{ kind: 'primitive', content: 'undefined' },
],
},
nlpVersions: {
kind: 'or',
content: [
{
kind: 'object',
content: {
juriSpacyTokenizer: {
kind: 'object',
content: {
version: { kind: 'primitive', content: 'string' },
date: { kind: 'primitive', content: 'string' },
},
},
juritools: {
kind: 'object',
content: {
version: { kind: 'primitive', content: 'string' },
date: { kind: 'primitive', content: 'string' },
},
},
pseudonymisationApi: {
kind: 'object',
content: {
version: { kind: 'primitive', content: 'string' },
date: { kind: 'primitive', content: 'string' },
},
},
model: {
kind: 'object',
content: {
name: { kind: 'primitive', content: 'string' },
},
},
},
},
{ kind: 'primitive', content: 'undefined' },
],
},
publicationCategory: { kind: 'array', content: { kind: 'primitive', content: 'string' } },
reviewStatus: {
kind: 'object',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const documentGenerator: generatorType<documentType> = {
text,
updateDate,
zoning,
nlpVersions,
} = {}) => ({
creationDate: creationDate ? creationDate : new Date().getTime(),
decisionMetadata: decisionMetadata ? decisionMetadata : decisionMetadataGenerator.generate(),
Expand All @@ -84,5 +85,6 @@ const documentGenerator: generatorType<documentType> = {
text: text ?? `TEXT_${Math.random()}`,
updateDate: updateDate ?? new Date().getTime(),
zoning: zoning ?? undefined,
nlpVersions: nlpVersions ?? undefined,
}),
};
Loading

0 comments on commit 31afe6c

Please sign in to comment.