Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions __tests__/utils/schemaOrg/anvilDataset.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ describe("buildAnvilDatasetJsonLd", () => {
response.datasets[0].description = "Short.";
const result = buildAnvilDatasetJsonLd(response, BROWSER_URL);
expect(result!.description).toBe(
"Rare disease dataset — Short. — A genomic dataset in the AnVIL Data Explorer catalog."
"Rare disease dataset — Short. — A dataset in the AnVIL Data Explorer for NHGRI's Analysis Visualization and Informatics Lab-space."
);
expect(result!.description.length).toBeGreaterThanOrEqual(
DESCRIPTION_LENGTH.MIN
Expand All @@ -110,7 +110,7 @@ describe("buildAnvilDatasetJsonLd", () => {
response.datasets[0].description = undefined;
const result = buildAnvilDatasetJsonLd(response, BROWSER_URL);
expect(result!.description).toBe(
"Rare disease dataset — A genomic dataset in the AnVIL Data Explorer catalog."
"Rare disease dataset — A dataset in the AnVIL Data Explorer for NHGRI's Analysis Visualization and Informatics Lab-space."
);
expect(result!.description.length).toBeGreaterThanOrEqual(
DESCRIPTION_LENGTH.MIN
Expand Down
6 changes: 3 additions & 3 deletions __tests__/utils/schemaOrg/hcaProjectDataset.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ describe("buildHcaProjectJsonLd", () => {
expect(result!.isAccessibleForFree).toBe(true);
expect(result!.includedInDataCatalog).toEqual({
"@type": "DataCatalog",
name: "Human Cell Atlas Data Coordination Platform",
name: "Human Cell Atlas Data Explorer",
url: BROWSER_URL,
});
});
Expand All @@ -95,7 +95,7 @@ describe("buildHcaProjectJsonLd", () => {
response.projects[0].projectDescription = "Short.";
const result = buildHcaProjectJsonLd(response, BROWSER_URL);
expect(result!.description).toBe(
"Cells of the body — Short. — Human Cell Atlas Data Coordination Platform project."
"Cells of the body — Short. — A project in the Human Cell Atlas Data Explorer."
);
expect(result!.description.length).toBeGreaterThanOrEqual(
DESCRIPTION_LENGTH.MIN
Expand All @@ -107,7 +107,7 @@ describe("buildHcaProjectJsonLd", () => {
response.projects[0].projectDescription = "";
const result = buildHcaProjectJsonLd(response, BROWSER_URL);
expect(result!.description).toBe(
"Cells of the body — Human Cell Atlas Data Coordination Platform project."
"Cells of the body — A project in the Human Cell Atlas Data Explorer."
);
expect(result!.description.length).toBeGreaterThanOrEqual(
DESCRIPTION_LENGTH.MIN
Expand Down
74 changes: 74 additions & 0 deletions __tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import type { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses";
import { buildLungmapProjectJsonLd } from "../../../app/utils/schemaOrg/lungmapProjectDataset";

const BROWSER_URL = "https://data-browser.lungmap.net";

/**
* Builds a minimal valid project response for the LungMAP wrapper. The full
* mapping is covered by `hcaProjectDataset.test.ts` (same shared core); this
* file only verifies the LungMAP-specific catalog identity surfaces correctly.
* @returns A `ProjectsResponse` shape sufficient for catalog-identity checks.
*/
function makeProjectsResponse(): ProjectsResponse {
return {
dates: [],
donorOrganisms: [],
entryId: "abc",
fileTypeSummaries: [],
projects: [
{
accessible: true,
accessions: [],
bionetworkName: [],
contributedAnalyses: {},
contributors: [],
dataUseRestriction: null,
duosId: null,
estimatedCellCount: null,
laboratory: [],
matrices: {},
projectDescription:
"A study of lung development and disease across many donors.",
projectId: "uuid-1",
projectShortname: "Lung Study",
projectTitle: "Lung development atlas",
},
],
protocols: [],
samples: [],
specimens: [],
status: 200,
} as unknown as ProjectsResponse;
}

describe("buildLungmapProjectJsonLd", () => {
it("returns undefined when no project is present", () => {
const response = { ...makeProjectsResponse(), projects: [] };
expect(
buildLungmapProjectJsonLd(response as ProjectsResponse, BROWSER_URL)
).toBeUndefined();
});

it("surfaces LungMAP as the catalog identity and uses the projects URL pattern", () => {
const result = buildLungmapProjectJsonLd(
makeProjectsResponse(),
BROWSER_URL
);
expect(result).toBeDefined();
expect(result!.includedInDataCatalog).toEqual({
"@type": "DataCatalog",
name: "LungMAP Data Explorer",
url: BROWSER_URL,
});
expect(result!.url).toBe(`${BROWSER_URL}/projects/uuid-1`);
});

it("pads short descriptions with the LungMAP catalog suffix", () => {
const response = makeProjectsResponse();
response.projects[0].projectDescription = "Short.";
const result = buildLungmapProjectJsonLd(response, BROWSER_URL);
expect(result!.description).toBe(
"Lung development atlas — Short. — A project in the LungMAP Data Explorer."
);
});
});
2 changes: 1 addition & 1 deletion app/utils/schemaOrg/anvilDataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { SchemaDataset } from "./types";
import { buildDescription, uniqueNonEmpty } from "./utils";

const CATALOG_NAME = "AnVIL Data Explorer";
const DESCRIPTION_FALLBACK_SUFFIX = `A genomic dataset in the ${CATALOG_NAME} catalog.`;
const DESCRIPTION_FALLBACK_SUFFIX = `A dataset in the AnVIL Data Explorer for NHGRI's Analysis Visualization and Informatics Lab-space.`;

/**
* Builds a Schema.org Dataset JSON-LD object for an AnVIL CMG dataset.
Expand Down
202 changes: 9 additions & 193 deletions app/utils/schemaOrg/hcaProjectDataset.ts
Original file line number Diff line number Diff line change
@@ -1,81 +1,17 @@
import type {
AccessionResponse,
ContributorResponse,
PublicationResponse,
} from "../../apis/azul/hca-dcp/common/entities";
import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses";
import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper";
import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants";
import { MAX_KEYWORDS } from "./constants";
import type {
SchemaDataset,
SchemaOrganization,
SchemaPerson,
SchemaScholarlyArticle,
} from "./types";
import { buildDescription, uniqueNonEmpty } from "./utils";
import type { ProjectCatalogOptions } from "./projectDataset";
import { buildProjectJsonLd } from "./projectDataset";
import type { SchemaDataset } from "./types";

const CATALOG_NAME = "Human Cell Atlas Data Coordination Platform";
const DESCRIPTION_FALLBACK_SUFFIX = `${CATALOG_NAME} project.`;
const CATALOG_NAME = "Human Cell Atlas Data Explorer";

/**
* Builds the citation array from project publications. Skips entries without a
* title. Prefers DOI for `sameAs`, falling back to the publication URL.
* @param publications - HCA project publications.
* @returns Array of schema.org ScholarlyArticle objects.
*/
function buildCitations(
publications: PublicationResponse[]
): SchemaScholarlyArticle[] {
const citations: SchemaScholarlyArticle[] = [];
for (const publication of publications ?? []) {
if (!publication.publicationTitle) continue;
const article: SchemaScholarlyArticle = {
"@type": "ScholarlyArticle",
headline: publication.publicationTitle,
name: publication.publicationTitle,
};
if (publication.doi) {
article.sameAs = `https://doi.org/${publication.doi}`;
} else if (publication.publicationUrl) {
article.sameAs = publication.publicationUrl;
}
citations.push(article);
}
return citations;
}

/**
* Builds the creator array from project contributors. Skips entries without a
* name. When the contributor has an institution, attaches it as an affiliation.
* @param contributors - HCA project contributors.
* @returns Array of schema.org Person objects.
*/
function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] {
const creators: SchemaPerson[] = [];
for (const contributor of contributors ?? []) {
if (!contributor.contactName) continue;
const person: SchemaPerson = {
"@type": "Person",
name: normaliseContactName(contributor.contactName),
};
if (contributor.institution) {
const affiliation: SchemaOrganization = {
"@type": "Organization",
name: contributor.institution,
};
person.affiliation = affiliation;
}
creators.push(person);
}
return creators;
}
const OPTIONS: ProjectCatalogOptions = {
catalogName: CATALOG_NAME,
descriptionFallbackSuffix: `A project in the Human Cell Atlas Data Explorer.`,
};

/**
* Builds a Schema.org Dataset JSON-LD object for an HCA DCP project.
*
* Returns `undefined` when the response does not carry a project we can
* describe (i.e. no project entity), so the caller can skip rendering.
* @param data - HCA DCP project detail response from Azul.
* @param browserURL - Site base URL used for canonical and catalog URLs.
* @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable.
Expand All @@ -84,125 +20,5 @@ export function buildHcaProjectJsonLd(
data: ProjectsResponse,
browserURL: string
): SchemaDataset | undefined {
const project = data.projects?.[0];
if (!project) return undefined;

const name = project.projectTitle || project.projectShortname;
const description = buildDescription(
project.projectDescription,
name,
DESCRIPTION_FALLBACK_SUFFIX
);
const identifier = uniqueNonEmpty([
project.projectId,
...project.accessions.flatMap((accession) =>
splitAccessionIds(accession.accession)
),
]);

const jsonLd: SchemaDataset = {
"@context": "https://schema.org",
"@type": "Dataset",
description,
identifier,
includedInDataCatalog: {
"@type": "DataCatalog",
name: CATALOG_NAME,
url: browserURL,
},
isAccessibleForFree: true,
name,
url: `${browserURL}/projects/${project.projectId}`,
};

const sameAs = buildSameAs(project.accessions);
if (sameAs.length > 0) jsonLd.sameAs = sameAs;

const keywords = buildKeywords(data);
if (keywords.length > 0) jsonLd.keywords = keywords;

const creator = buildCreators(project.contributors);
if (creator.length > 0) jsonLd.creator = creator;

const citation = buildCitations(project.publications);
if (citation.length > 0) jsonLd.citation = citation;

return jsonLd;
}

/**
* Builds a keywords array by unioning biologically-meaningful fields from the
* project's aggregated donor/sample/specimen/protocol responses.
* @param data - HCA project detail response.
* @returns Deduplicated keywords array.
*/
function buildKeywords(data: ProjectsResponse): string[] {
const values: (string | null | undefined)[] = [];
for (const donor of data.donorOrganisms ?? []) {
values.push(...(donor.genusSpecies ?? []));
values.push(...(donor.disease ?? []));
}
for (const sample of data.samples ?? []) {
values.push(...(sample.organ ?? []));
values.push(...(sample.organPart ?? []));
values.push(...(sample.disease ?? []));
values.push(...(sample.sampleEntityType ?? []));
}
for (const specimen of data.specimens ?? []) {
values.push(...(specimen.organ ?? []));
values.push(...(specimen.organPart ?? []));
values.push(...(specimen.disease ?? []));
}
for (const protocol of data.protocols ?? []) {
values.push(...(protocol.libraryConstructionApproach ?? []));
values.push(...(protocol.instrumentManufacturerModel ?? []));
}
return uniqueNonEmpty(values).slice(0, MAX_KEYWORDS);
}

/**
* Builds the sameAs array of external accession URLs via identifiers.org.
* Only includes accessions whose namespace maps to a known identifier prefix.
* @param accessions - Project accessions from the Azul response.
* @returns Array of canonical accession URLs.
*/
function buildSameAs(accessions: AccessionResponse[]): string[] {
const urls: string[] = [];
for (const { accession, namespace } of accessions) {
const prefix =
ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix;
if (!prefix) continue;
for (const id of splitAccessionIds(accession)) {
const url = transformAccessionURL(id, prefix);
if (url) urls.push(url);
}
}
return uniqueNonEmpty(urls);
}

/**
* Normalises an HCA contributor's contactName from "Last,First,Middle" to
* "First Middle Last" for use as a Schema.org Person.name value.
* @param contactName - Raw contactName from the Azul response.
* @returns Human-readable contributor name.
*/
function normaliseContactName(contactName: string): string {
const parts = contactName.split(",").map((part) => part.trim());
if (parts.length < 2) return contactName;
const [last, ...rest] = parts;
return [...rest, last].filter(Boolean).join(" ");
}

/**
* Splits an Azul accession string into individual accession IDs. Azul returns
* accessions as a semicolon-separated string when a project carries multiple
* IDs under the same namespace (mirrors the split done by `mapAccessions`).
* @param accession - Raw accession value from the Azul response.
* @returns Trimmed, non-empty accession IDs.
*/
function splitAccessionIds(accession: string): string[] {
return accession
.split(";")
.map((id) => id.trim())
.filter(Boolean);
return buildProjectJsonLd(data, browserURL, OPTIONS);
}
27 changes: 27 additions & 0 deletions app/utils/schemaOrg/lungmapProjectDataset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses";
import type { ProjectCatalogOptions } from "./projectDataset";
import { buildProjectJsonLd } from "./projectDataset";
import type { SchemaDataset } from "./types";

const CATALOG_NAME = "LungMAP Data Explorer";

const OPTIONS: ProjectCatalogOptions = {
catalogName: CATALOG_NAME,
descriptionFallbackSuffix: `A project in the LungMAP Data Explorer.`,
};

/**
* Builds a Schema.org Dataset JSON-LD object for a LungMAP project. LungMAP
* shares the HCA Azul backend, so the response shape matches HCA's
* `ProjectsResponse` and the shared `buildProjectJsonLd` core does the
* mapping; this wrapper just supplies LungMAP-specific catalog identity.
* @param data - LungMAP project detail response from Azul.
* @param browserURL - Site base URL used for canonical and catalog URLs.
* @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable.
*/
export function buildLungmapProjectJsonLd(
data: ProjectsResponse,
browserURL: string
): SchemaDataset | undefined {
return buildProjectJsonLd(data, browserURL, OPTIONS);
}
Loading
Loading