From b067ec9f1d2586e0bcbe1c04a9d85bc60b6a5791 Mon Sep 17 00:00:00 2001 From: Rishi Pandey Date: Mon, 30 Oct 2023 15:25:20 -0400 Subject: [PATCH 001/101] WIP - handle blur in AutoComplete --- .../migration.sql | 2 + .../migration.sql | 24 ++ .../migration.sql | 15 + api/prisma/schema.prisma | 137 ++++--- api/prisma/seed.js | 4 + api/prisma/seed_data/data.js | 15 + api/src/routes/datasets.js | 11 + .../dataset/CreateDataProductStepper.vue | 368 ++++++++++++++++++ ui/src/components/dataset/DatasetList.vue | 13 + ui/src/components/dataset/DatasetSelect.vue | 6 +- .../project/datasets/ProjectDatasetsForm.vue | 1 + ui/src/components/utils/AutoComplete.vue | 205 +++++++++- ui/src/pages/dataproducts/new.vue | 43 ++ ui/src/services/dataset.js | 4 + 14 files changed, 777 insertions(+), 71 deletions(-) create mode 100644 api/prisma/migrations/20231027111820_added_data_product_file_type_col/migration.sql create mode 100644 api/prisma/migrations/20231027222509_added_table_for_file_type/migration.sql create mode 100644 api/prisma/migrations/20231029012607_added_data_product_file_type_column/migration.sql create mode 100644 ui/src/components/dataset/CreateDataProductStepper.vue create mode 100644 ui/src/pages/dataproducts/new.vue diff --git a/api/prisma/migrations/20231027111820_added_data_product_file_type_col/migration.sql b/api/prisma/migrations/20231027111820_added_data_product_file_type_col/migration.sql new file mode 100644 index 000000000..20f7fa7b4 --- /dev/null +++ b/api/prisma/migrations/20231027111820_added_data_product_file_type_col/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "dataset" ADD COLUMN "data_product_file_type" TEXT; diff --git a/api/prisma/migrations/20231027222509_added_table_for_file_type/migration.sql b/api/prisma/migrations/20231027222509_added_table_for_file_type/migration.sql new file mode 100644 index 000000000..ed6be5156 --- /dev/null +++ b/api/prisma/migrations/20231027222509_added_table_for_file_type/migration.sql @@ -0,0 +1,24 @@ +/* + Warnings: + + - You are about to drop the column `data_product_file_type` on the `dataset` table. All the data in the column will be lost. + +*/ +-- AlterTable +ALTER TABLE "dataset" DROP COLUMN "data_product_file_type", +ADD COLUMN "data_product_file_typeId" INTEGER; + +-- CreateTable +CREATE TABLE "data_product_file_type" ( + "id" SERIAL NOT NULL, + "name" TEXT NOT NULL, + "extension" TEXT NOT NULL, + + CONSTRAINT "data_product_file_type_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE UNIQUE INDEX "data_product_file_type_name_extension_key" ON "data_product_file_type"("name", "extension"); + +-- AddForeignKey +ALTER TABLE "dataset" ADD CONSTRAINT "dataset_data_product_file_typeId_fkey" FOREIGN KEY ("data_product_file_typeId") REFERENCES "data_product_file_type"("id") ON DELETE SET NULL ON UPDATE CASCADE; diff --git a/api/prisma/migrations/20231029012607_added_data_product_file_type_column/migration.sql b/api/prisma/migrations/20231029012607_added_data_product_file_type_column/migration.sql new file mode 100644 index 000000000..3a246adb0 --- /dev/null +++ b/api/prisma/migrations/20231029012607_added_data_product_file_type_column/migration.sql @@ -0,0 +1,15 @@ +/* + Warnings: + + - You are about to drop the column `data_product_file_typeId` on the `dataset` table. All the data in the column will be lost. + +*/ +-- DropForeignKey +ALTER TABLE "dataset" DROP CONSTRAINT "dataset_data_product_file_typeId_fkey"; + +-- AlterTable +ALTER TABLE "dataset" DROP COLUMN "data_product_file_typeId", +ADD COLUMN "data_product_file_type_id" INTEGER; + +-- AddForeignKey +ALTER TABLE "dataset" ADD CONSTRAINT "dataset_data_product_file_type_id_fkey" FOREIGN KEY ("data_product_file_type_id") REFERENCES "data_product_file_type"("id") ON DELETE SET NULL ON UPDATE CASCADE; diff --git a/api/prisma/schema.prisma b/api/prisma/schema.prisma index d07afb790..670cf3b6e 100644 --- a/api/prisma/schema.prisma +++ b/api/prisma/schema.prisma @@ -8,34 +8,46 @@ datasource db { } model dataset { - id Int @id @default(autoincrement()) - name String - type String - num_directories Int? - num_files Int? - du_size BigInt? - size BigInt? - bundle_size BigInt? - description String? - created_at DateTime @default(now()) @db.Timestamp(6) - updated_at DateTime @default(now()) @updatedAt @db.Timestamp(6) - origin_path String? - archive_path String? - is_deleted Boolean @default(false) - is_staged Boolean @default(false) - metadata Json? - workflows workflow[] - files dataset_file[] - audit_logs dataset_audit[] - states dataset_state[] - derived_datasets dataset_hierarchy[] @relation("source_datasets") - source_datasets dataset_hierarchy[] @relation("derived_datasets") - projects project_dataset[] - accesses data_access_log[] - stage_requests stage_request_log[] + id Int @id @default(autoincrement()) + name String + type String + file_type data_product_file_type? @relation(name: "data_products", fields: [data_product_file_type_id], references: [id]) + data_product_file_type_id Int? + num_directories Int? + num_files Int? + du_size BigInt? + size BigInt? + bundle_size BigInt? + description String? + created_at DateTime @default(now()) @db.Timestamp(6) + updated_at DateTime @default(now()) @updatedAt @db.Timestamp(6) + origin_path String? + archive_path String? + is_deleted Boolean @default(false) + is_staged Boolean @default(false) + metadata Json? + workflows workflow[] + files dataset_file[] + audit_logs dataset_audit[] + states dataset_state[] + derived_datasets dataset_hierarchy[] @relation("source_datasets") + source_datasets dataset_hierarchy[] @relation("derived_datasets") + projects project_dataset[] + accesses data_access_log[] + stage_requests stage_request_log[] + @@unique([name, type, is_deleted]) } +model data_product_file_type { + id Int @id @default(autoincrement()) + name String + extension String + data_products dataset[] @relation(name: "data_products") + + @@unique([name, extension]) +} + model dataset_hierarchy { source_id Int derived_id Int @@ -47,19 +59,20 @@ model dataset_hierarchy { } model dataset_file { - id Int @id @default(autoincrement()) - name String? - path String - md5 String? - size BigInt? - filetype String? - metadata Json? - status String? - dataset_id Int - dataset dataset @relation(fields: [dataset_id], references: [id], onDelete: Cascade) - parents dataset_file_hierarchy[] @relation("child") - children dataset_file_hierarchy[] @relation("parent") - accesses data_access_log[] + id Int @id @default(autoincrement()) + name String? + path String + md5 String? + size BigInt? + filetype String? + metadata Json? + status String? + dataset_id Int + dataset dataset @relation(fields: [dataset_id], references: [id], onDelete: Cascade) + parents dataset_file_hierarchy[] @relation("child") + children dataset_file_hierarchy[] @relation("parent") + accesses data_access_log[] + @@unique([path, dataset_id]) @@index([dataset_id]) } @@ -96,36 +109,36 @@ model dataset_state { } model data_access_log { - id Int @id @default(autoincrement()) - timestamp DateTime @default(now()) @db.Timestamp(6) - access_type access_type - file_id Int? - dataset_file dataset_file? @relation(fields: [file_id], references: [id]) - dataset_id Int? - dataset dataset? @relation(fields: [dataset_id], references: [id]) - user_id Int - user user @relation(fields: [user_id], references: [id]) + id Int @id @default(autoincrement()) + timestamp DateTime @default(now()) @db.Timestamp(6) + access_type access_type + file_id Int? + dataset_file dataset_file? @relation(fields: [file_id], references: [id]) + dataset_id Int? + dataset dataset? @relation(fields: [dataset_id], references: [id]) + user_id Int + user user @relation(fields: [user_id], references: [id]) } model stage_request_log { - id Int @id @default(autoincrement()) - timestamp DateTime @default(now()) @db.Timestamp(6) - dataset_id Int? - dataset dataset? @relation(fields: [dataset_id], references: [id]) - user_id Int - user user @relation(fields: [user_id], references: [id]) + id Int @id @default(autoincrement()) + timestamp DateTime @default(now()) @db.Timestamp(6) + dataset_id Int? + dataset dataset? @relation(fields: [dataset_id], references: [id]) + user_id Int + user user @relation(fields: [user_id], references: [id]) } model user { - id Int @id @default(autoincrement()) - username String @unique @db.VarChar(100) - name String? @db.VarChar(100) - email String @unique @db.VarChar(100) - cas_id String? @unique @db.VarChar(100) + id Int @id @default(autoincrement()) + username String @unique @db.VarChar(100) + name String? @db.VarChar(100) + email String @unique @db.VarChar(100) + cas_id String? @unique @db.VarChar(100) notes String? - created_at DateTime @default(now()) @db.Timestamp(6) - updated_at DateTime @default(now()) @updatedAt @db.Timestamp(6) - is_deleted Boolean @default(false) + created_at DateTime @default(now()) @db.Timestamp(6) + updated_at DateTime @default(now()) @updatedAt @db.Timestamp(6) + is_deleted Boolean @default(false) user_role user_role[] settings user_settings? contacts contact[] @@ -279,4 +292,4 @@ model worker_process { enum access_type { BROWSER SLATE_SCRATCH -} \ No newline at end of file +} diff --git a/api/prisma/seed.js b/api/prisma/seed.js index cd99065bb..24cf61ab1 100644 --- a/api/prisma/seed.js +++ b/api/prisma/seed.js @@ -240,6 +240,10 @@ async function main() { })), ); + // create data_product_file_type + await prisma.data_product_file_type.deleteMany(); + await prisma.data_product_file_type.createMany({ data: data.data_product_file_type }); + // upsert dataset_files await put_dataset_files({ dataset_id: 1, num_files: 100, max_depth: 1 }); await put_dataset_files({ dataset_id: 2, num_files: 100, max_depth: 3 }); diff --git a/api/prisma/seed_data/data.js b/api/prisma/seed_data/data.js index 48a46d662..dfebf78d3 100644 --- a/api/prisma/seed_data/data.js +++ b/api/prisma/seed_data/data.js @@ -314,6 +314,20 @@ const project_contact_assoc = [ }, ]; +const data_product_file_type = [{ + name: 'FASTQ', + extension: 'fastq', +}, { + name: 'BAM', + extension: 'bam', +}, { + name: 'BIGWIG', + extension: 'bw', +}, { + name: 'IMAGE_HE', + extension: '.tif', +}]; + module.exports = { roles, admins, @@ -323,6 +337,7 @@ module.exports = { dataset_heirarchical_association, metrics, dataset_audit_data, + data_product_file_type, contacts, projects, project_user_assoc, diff --git a/api/src/routes/datasets.js b/api/src/routes/datasets.js index cb6e8b3f0..73a7fcb0e 100644 --- a/api/src/routes/datasets.js +++ b/api/src/routes/datasets.js @@ -240,6 +240,17 @@ router.get( }), ); +// Data Products - UI +router.get( + '/data_product_file_types', + isPermittedTo('read'), + asyncHandler(async (req, res, next) => { + const data_product_file_types = await prisma.data_product_file_type.findMany(); + res.json(data_product_file_types); + // res.json([]); + }), +); + const dataset_access_check = asyncHandler(async (req, res, next) => { // assumes req.params.id is the dataset id user is requesting // access check diff --git a/ui/src/components/dataset/CreateDataProductStepper.vue b/ui/src/components/dataset/CreateDataProductStepper.vue new file mode 100644 index 000000000..75e6f4eeb --- /dev/null +++ b/ui/src/components/dataset/CreateDataProductStepper.vue @@ -0,0 +1,368 @@ + + + + + diff --git a/ui/src/components/dataset/DatasetList.vue b/ui/src/components/dataset/DatasetList.vue index 19d773c78..0e6366b1f 100644 --- a/ui/src/components/dataset/DatasetList.vue +++ b/ui/src/components/dataset/DatasetList.vue @@ -22,6 +22,18 @@
+ + +
+ + Create Data Product + +
@@ -208,6 +220,7 @@ import { useToastStore } from "@/stores/toast"; import _ from "lodash"; const toast = useToastStore(); +const router = useRouter(); useSearchKeyShortcut(); const props = defineProps({ diff --git a/ui/src/components/dataset/DatasetSelect.vue b/ui/src/components/dataset/DatasetSelect.vue index bbb6b68fb..007237ac9 100644 --- a/ui/src/components/dataset/DatasetSelect.vue +++ b/ui/src/components/dataset/DatasetSelect.vue @@ -27,7 +27,11 @@ import { formatBytes } from "@/services/utils"; const datasets = ref([]); -datasetService.getAll().then((res) => { +const props = defineProps({ + dtype: { type: String }, +}); + +datasetService.getAll({ type: props.dtype }).then((res) => { datasets.value = res.data.datasets; }); diff --git a/ui/src/components/project/datasets/ProjectDatasetsForm.vue b/ui/src/components/project/datasets/ProjectDatasetsForm.vue index 1e79caebd..e1e589ad2 100644 --- a/ui/src/components/project/datasets/ProjectDatasetsForm.vue +++ b/ui/src/components/project/datasets/ProjectDatasetsForm.vue @@ -24,6 +24,7 @@ import { useProjectFormStore } from "@/stores/projects/projectForm"; const projectFormStore = useProjectFormStore(); function handleSelect(ds) { + // debugger; projectFormStore.addDataset(ds); } diff --git a/ui/src/components/utils/AutoComplete.vue b/ui/src/components/utils/AutoComplete.vue index 4c3ffd92f..aaf8ca6b6 100644 --- a/ui/src/components/utils/AutoComplete.vue +++ b/ui/src/components/utils/AutoComplete.vue @@ -3,14 +3,30 @@
+ + + + + + + + + + + + + @@ -34,7 +50,10 @@ {{ item[props.displayBy] }} -
  • +
  • @@ -42,6 +61,14 @@ None matched
  • +
  • + {{ props.addNewButtonText }} + +
  • @@ -51,6 +78,8 @@ import { OnClickOutside } from "@vueuse/components"; const props = defineProps({ + // testProp: { type: String }, + modelValue: { type: String }, placeholder: { type: String, default: "Type here", @@ -71,11 +100,148 @@ const props = defineProps({ type: String, default: "name", }, + showSelectedOption: { + type: Boolean, + default: () => false, + }, + formatSelectedOption: { + type: Function, + default: (option) => option, + }, + areOptionsEqual: { + type: Function, + default: (option1, option2) => option1 === option2, + }, + getOptionValue: { + type: Function, + default: (option) => option, + }, + showAddNewButton: { + type: Boolean, + default: () => false, + }, + addNewButtonText: { type: String, default: () => "Add New" }, +}); + +// const test_prop_ref = toRef(() => props.testProp); +// const test_prop_ref_computed = computed(() => test_prop_ref.value, { +// onTrack(e) { +// // triggered when count.value is tracked as a dependency +// // debugger; +// }, +// onTrigger(e) { +// // triggered when count.value is mutated +// // debugger; +// }, +// }); + +const _model_value = toRef(() => props.modelValue, { + onTrack(e) { + // triggered when count.value is tracked as a dependency + // debugger; + }, + onTrigger(e) { + // triggered when count.value is mutated + // debugger; + }, }); +// const model_value_prop_ref_computed = computed( +// () => _model_value.value, +// { +// onTrack(e) { +// // triggered when count.value is tracked as a dependency +// // debugger; +// }, +// onTrigger(e) { +// // triggered when count.value is mutated +// // debugger; +// }, +// }, +// ); + +const emit = defineEmits([ + "select", + "createNewElement", + "clear", + "input", + "change", + "update", +]); + +const emit_event = (event) => { + // debugger; + emit(event); +}; + +const handle_blur = () => { + // debugger; + + // If AutoComplete is open, and an invalid value (i.e. one not present in the list of options) + // is typed, clear AutoComplete's value upon blur + // debugger; + // if (input.value === "") { + // // // close AutoComplete + // // closeResults(); + // debugger; + // // return; + // } else { + if ( + input.value && + (props.data.filter((e) => props.getOptionValue(e) === input.value) || []) + .length === 0 + ) { + // debugger; + // // clear selected value + // updateSelection(""); + + // input.value = ""; + // selection.value = undefined; + + emit("clear"); + } + // } + // // close AutoComplete + // closeResults(); + + // emit("blur"); +}; -const emit = defineEmits(["select"]); +// User's text input. Also stores the currently selected value. +const input = ref(""); +// If selected value has other attributes besides text, it is tracked as the `selection` reactive +// object +const selection = ref(); + +// Provided as v-model to the AutoComplete's . Getter returns modelValue if modelValue is +// provided (indicating that the value is being set externally), and user-typed input otherwise. +// Setter is needed for the to be writable. +const auto_complete_val = computed( + { + get() { + let ret = + _model_value.value && _model_value.value + ? props.formatSelectedOption(_model_value.value) + : input.value; + // debugger; + + return ret; + }, + set(newValue) { + input.value = newValue; + }, + }, + { + onTrack(e) { + // triggered when count.value is tracked as a dependency + // debugger; + }, + onTrigger(e) { + // triggered when count.value is mutated + // debugger; + }, + }, +); -const text = ref(""); const visible = ref(false); // when clicked outside, hide the results ul @@ -83,15 +249,15 @@ const visible = ref(false); // when clicked on a search result, clear text and hide the results ul const search_results = computed(() => { - if (text.value === "") return props.data; + if (input.value === "") return props.data; const filterFn = props.filterFn instanceof Function - ? props.filterFn(text.value) + ? props.filterFn(input.value) : (item) => (item[props.filterBy] || "") .toLowerCase() - .includes(text.value.toLowerCase()); + .includes(input.value.toLowerCase()); return (props.data || []).filter(filterFn); }); @@ -104,11 +270,34 @@ function openResults() { visible.value = true; } +const updateSelection = (item) => { + // debugger; + // allows AutoComplete value to be updated based on option selected by user + input.value = + props.showSelectedOption && item ? props.formatSelectedOption(item) : ""; + // track the entire `item` object as well, in case it has other attributes that may need to be + // tracked + selection.value = item || undefined; + + // emit event to indicate that selected option has changed + emit_event("change"); +}; + function handleSelect(item) { - text.value = ""; + // debugger; + updateSelection(item); closeResults(); emit("select", item); } + +// If client is attempting to set the value of AutoComplete externally, treat it as an option +// being selected. +watch(_model_value, () => { + // debugger; + if (_model_value.value) { + updateSelection(_model_value.value); + } +}); diff --git a/ui/src/components/layout/Sidebar.vue b/ui/src/components/layout/Sidebar.vue index b0ae5f4d7..d37228d50 100644 --- a/ui/src/components/layout/Sidebar.vue +++ b/ui/src/components/layout/Sidebar.vue @@ -176,6 +176,11 @@ const operator_items = ref([ title: "Workflows", path: "/workflows", }, + { + icon: "mdi:folder-upload", + title: "Data Product Upload", + path: "/dataProductUploads", + }, // { // icon: "mdi-account-multiple", // title: "Group Management", diff --git a/ui/src/components/utils/AutoCompleteSelect.vue b/ui/src/components/utils/AutoCompleteSelect.vue new file mode 100644 index 000000000..e34af2fc6 --- /dev/null +++ b/ui/src/components/utils/AutoCompleteSelect.vue @@ -0,0 +1,60 @@ + + + diff --git a/ui/src/components/utils/AutocompleteSelect.vue b/ui/src/components/utils/AutocompleteSelect.vue deleted file mode 100644 index 644af6ee4..000000000 --- a/ui/src/components/utils/AutocompleteSelect.vue +++ /dev/null @@ -1,51 +0,0 @@ - - - diff --git a/ui/src/components/utils/UploadFile.vue b/ui/src/components/utils/UploadFile.vue new file mode 100644 index 000000000..498da3b8f --- /dev/null +++ b/ui/src/components/utils/UploadFile.vue @@ -0,0 +1,29 @@ + + + diff --git a/ui/src/pages/dataProductUploads/index.vue b/ui/src/pages/dataProductUploads/index.vue new file mode 100644 index 000000000..8541392ce --- /dev/null +++ b/ui/src/pages/dataProductUploads/index.vue @@ -0,0 +1,17 @@ + + + + + diff --git a/ui/src/pages/dataProductUploads/new.vue b/ui/src/pages/dataProductUploads/new.vue new file mode 100644 index 000000000..56a8310d0 --- /dev/null +++ b/ui/src/pages/dataProductUploads/new.vue @@ -0,0 +1,25 @@ + + + diff --git a/ui/src/pages/test_file_upload.vue b/ui/src/pages/test_file_upload.vue new file mode 100644 index 000000000..873b3c7ab --- /dev/null +++ b/ui/src/pages/test_file_upload.vue @@ -0,0 +1,270 @@ + + + + + diff --git a/ui/src/services/dataset.js b/ui/src/services/dataset.js index c569ef00c..ecfafa1d0 100644 --- a/ui/src/services/dataset.js +++ b/ui/src/services/dataset.js @@ -7,19 +7,21 @@ const toast = useToastStore(); class DatasetService { /** * - * @param deleted Boolean field to filter datasets by `is_deleted` field - * @param processed Field to filter datasets by number of associated workflows. Can be one of - * 'some' or 'none' - * @param archived Boolean field to filter datasets by the presence/absence of `archive_path` - * field - * @param staged Boolean field to filter datasets by `is_deleted` field - * @param type Field to filter datasets by `type`. One of 'RAW_DATA' or 'DATA_PRODUCT' - * @param name Field to filter datasets by `name` - * @param limit The number of datasets to be retrieved - * @param offset Database offset starting at which results will be retrieved - * @param sortBy Object containing property to sort datasets by, whose key is the name - * of said property, and value is one of 'asc' or 'desc' - * @returns Object containing matching datasets, and count of matching datasets + * @param deleted Boolean field to filter datasets by `is_deleted` field + * @param processed Field to filter datasets by number of associated workflows. Can be one of + * 'some' or 'none' + * @param archived Boolean field to filter datasets by the presence/absence of `archive_path` + * field + * @param staged Boolean field to filter datasets by `is_deleted` field + * @param type Field to filter datasets by `type`. One of 'RAW_DATA' or 'DATA_PRODUCT' + * @param name Field to filter datasets by `name` + * @param match_name_exact Boolean field to determine whether records will be matched by + * exact or matching values of `name` + * @param limit The number of datasets to be retrieved + * @param offset Database offset starting at which results will be retrieved + * @param sortBy Object containing property to sort datasets by, whose key is the name + * of said property, and value is one of 'asc' or 'desc' + * @returns Object containing matching datasets, and count of matching datasets */ getAll({ deleted = null, @@ -28,6 +30,7 @@ class DatasetService { staged = null, type = null, name = null, + match_name_exact = null, limit = null, offset = null, sortBy = null, @@ -40,6 +43,7 @@ class DatasetService { staged, type, name, + match_name_exact, limit, offset, sortBy, @@ -67,7 +71,15 @@ class DatasetService { } getDataProductFileTypes() { - return api.get("/datasets/data_product_file_types"); + return api.get("/datasets/data-product-file-types"); + } + + getDataProductUploads() { + return api.get("/datasets/data-product-uploads"); + } + + uploadFileChunk(data) { + return api.post("/datasets/file-chunk", data); } stage_dataset(id) { diff --git a/workers/workers/tasks/create_files.py b/workers/workers/tasks/create_files.py new file mode 100644 index 000000000..d1a7a2e18 --- /dev/null +++ b/workers/workers/tasks/create_files.py @@ -0,0 +1,62 @@ +from pathlib import Path + +from celery import Celery +from celery.utils.log import get_task_logger +from sca_rhythm.progress import Progress + +import workers.api as api +import workers.cmd as cmd +import workers.config.celeryconfig as celeryconfig +import workers.utils as utils +from workers import exceptions as exc +from workers.config import config + +app = Celery("tasks") +app.config_from_object(celeryconfig) +logger = get_task_logger(__name__) + +def create_dataset_files(celery_task, files_details, **kwargs): + # TODO - file_details should be sent through the HTTP request + # that initiates the file merge + # files_details: [{ + # data_product: '', + # file: { + # file_name: '', + # file_hash: '', + # file_destination: '', + # num_chunks: '', + # } + # }] + + # TODO - move path to config + data_products_upload_dir = Path('/tmp/dataProductUploads') + data_product = files_details['data_product'] + + for f in files_details: + source_path = data_products_upload_dir / data_product / f.file_name + merged_file_path = Path(f.file_destination) + num_chunks_expected = f.num_chunks + file_name = f.file_name + file_hash = f.file_hash + chunks_path = source_path / 'chunks' + + print(f'Processing file {file_name}') + num_chunk_files = len([p for p in chunks_path.iterdir()]) + if num_chunk_files != num_chunks_expected: + print(f'Expected {num_chunks_expected} chunks, but found {num_chunk_files}') + print('This file\'s chunks will not be merged') + break + for i in range(num_chunk_files): + chunk_file = chunks_path / f'{file_hash}-{i}' + print(f'Processing chunk {chunk_file}') + print(f'Appending chunk {chunk_file.name} to {merged_file_path}') + + with open(chunk_file, 'rb') as chunk: + with open(merged_file_path, 'ab') as destination: + destination.write(chunk.read()) + print(f'Successfully appended chunk {chunk_file.name} to {merged_file_path}') + print(f'Deleting chunk {chunk_file}') + chunk_file.unlink() + + + diff --git a/workers/workers/tasks/declarations.py b/workers/workers/tasks/declarations.py index b523576db..77d477bde 100644 --- a/workers/workers/tasks/declarations.py +++ b/workers/workers/tasks/declarations.py @@ -49,6 +49,15 @@ def inspect_dataset(celery_task, dataset_id, **kwargs): raise exc.RetryableException(e) +@app.task(base=WorkflowTask, bind=True, name='create_dataset_files', + autoretry_for=(Exception,), + max_retries=3, + default_retry_delay=5) +def create_dataset_files(celery_task, files_path, **kwargs): + from workers.tasks.create_files import create_dataset_files as task_body + return task_body(celery_task, files_path, **kwargs) + + @app.task(base=WorkflowTask, bind=True, name='generate_qc', autoretry_for=(Exception,), max_retries=3, From 16109a4c3b47b4026450db042cd83719f2e0f14d Mon Sep 17 00:00:00 2001 From: Rishi Pandey Date: Tue, 7 Nov 2023 12:08:23 -0500 Subject: [PATCH 007/101] tested merging code locally --- workers/workers/tasks/create_files.py | 39 ++++++++++++++------------- workers/workers/tasks/declarations.py | 4 +-- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/workers/workers/tasks/create_files.py b/workers/workers/tasks/create_files.py index d1a7a2e18..1c7328108 100644 --- a/workers/workers/tasks/create_files.py +++ b/workers/workers/tasks/create_files.py @@ -15,29 +15,33 @@ app.config_from_object(celeryconfig) logger = get_task_logger(__name__) -def create_dataset_files(celery_task, files_details, **kwargs): +def create_dataset_files(celery_task, files_attrs, **kwargs): # TODO - file_details should be sent through the HTTP request # that initiates the file merge - # files_details: [{ - # data_product: '', - # file: { - # file_name: '', - # file_hash: '', - # file_destination: '', - # num_chunks: '', - # } - # }] + # files_attrs = { + # 'data_product': 'test', + # 'files': [{ + # 'file_name': 'file_1.pdf', + # 'file_hash': '31904f92c817767de2bb7e9241f0f7fc', + # 'num_chunks': 3 + # }, { + # 'file_name': 'file_2.pdf', + # 'file_hash': '50ddf82278203f3813749b90c77aee24', + # 'num_chunks': 1 + # }] + # } # TODO - move path to config data_products_upload_dir = Path('/tmp/dataProductUploads') - data_product = files_details['data_product'] + data_product = files_attrs['data_product'] + files_details = files_attrs['files'] for f in files_details: - source_path = data_products_upload_dir / data_product / f.file_name - merged_file_path = Path(f.file_destination) - num_chunks_expected = f.num_chunks - file_name = f.file_name - file_hash = f.file_hash + source_path = data_products_upload_dir / data_product / f['file_hash'] + file_name = f['file_name'] + merged_file_path = source_path / file_name + num_chunks_expected = f['num_chunks'] + file_hash = f['file_hash'] chunks_path = source_path / 'chunks' print(f'Processing file {file_name}') @@ -57,6 +61,3 @@ def create_dataset_files(celery_task, files_details, **kwargs): print(f'Successfully appended chunk {chunk_file.name} to {merged_file_path}') print(f'Deleting chunk {chunk_file}') chunk_file.unlink() - - - diff --git a/workers/workers/tasks/declarations.py b/workers/workers/tasks/declarations.py index 77d477bde..db9f49452 100644 --- a/workers/workers/tasks/declarations.py +++ b/workers/workers/tasks/declarations.py @@ -53,9 +53,9 @@ def inspect_dataset(celery_task, dataset_id, **kwargs): autoretry_for=(Exception,), max_retries=3, default_retry_delay=5) -def create_dataset_files(celery_task, files_path, **kwargs): +def create_dataset_files(celery_task, files_attrs, **kwargs): from workers.tasks.create_files import create_dataset_files as task_body - return task_body(celery_task, files_path, **kwargs) + return task_body(celery_task, files_attrs, **kwargs) @app.task(base=WorkflowTask, bind=True, name='generate_qc', From ca43727c08b8d364b9b1d9f921ac16d861a1da1b Mon Sep 17 00:00:00 2001 From: Rishi Pandey Date: Tue, 7 Nov 2023 17:42:31 -0500 Subject: [PATCH 008/101] moved - - From d45b562f44b597396af0d5a0029e1495a1dafa72 Mon Sep 17 00:00:00 2001 From: Rishi Pandey Date: Tue, 7 Nov 2023 19:26:47 -0500 Subject: [PATCH 010/101] merged New File Type and File Type Select into one component again --- .../dataset/DataProductFileTypeSelect.vue | 133 ++++++++++++++-- .../dataset/DataProductFilesUpload.vue | 67 ++++---- .../dataset/DataProductNewFileType.vue | 143 ------------------ 3 files changed, 156 insertions(+), 187 deletions(-) delete mode 100644 ui/src/components/dataset/DataProductNewFileType.vue diff --git a/ui/src/components/dataset/DataProductFileTypeSelect.vue b/ui/src/components/dataset/DataProductFileTypeSelect.vue index 47f91c9d3..25ab661b9 100644 --- a/ui/src/components/dataset/DataProductFileTypeSelect.vue +++ b/ui/src/components/dataset/DataProductFileTypeSelect.vue @@ -2,7 +2,7 @@ Create New File Type + + + +
    + + + +
    + {{ error }} + +
    +
    +
    +
    diff --git a/ui/src/components/dataset/DataProductFilesUpload.vue b/ui/src/components/dataset/DataProductFilesUpload.vue index 77d89b51f..6c1ade65e 100644 --- a/ui/src/components/dataset/DataProductFilesUpload.vue +++ b/ui/src/components/dataset/DataProductFilesUpload.vue @@ -49,10 +49,15 @@ @@ -165,8 +170,6 @@ - - @@ -379,31 +382,6 @@ const removeFile = (index) => { dataProductFiles.value.splice(index, 1); }; -onMounted(() => { - // https://developer.mozilla.org/en-US/docs/Web/API/Window/beforeunload_event - window.addEventListener("beforeunload", (e) => { - if (inProgress.value) { - // show warning before user leaves page - e.returnValue = true; - } - }); -}); - -onMounted(() => { - datasetService.getAll({ type: "DATA_PRODUCT" }).then((res) => { - raw_data_list.value = res.data.datasets; - }); -}); - -// show warning before user moves to a different route -onBeforeRouteLeave(() => { - const answer = window.confirm( - "Leaving this page before all files have been processed/uploaded will" + - " cancel the upload. Do you wish to continue?", - ); - if (!answer) return false; -}); - function isValid_new_data_product_form() { // debugger; validate_data_product_upload_form(); @@ -435,18 +413,37 @@ const validateNotExists = (value) => { }); }; -watch(fileTypeSelected, () => { - // debugger; - console.log(fileTypeSelected.value); - fileTypeList.value.push(fileTypeSelected.value); -}); - onMounted(() => { datasetService.getDataProductFileTypes().then((res) => { // debugger; fileTypeList.value = res.data; }); }); + +onMounted(() => { + // https://developer.mozilla.org/en-US/docs/Web/API/Window/beforeunload_event + window.addEventListener("beforeunload", (e) => { + if (inProgress.value) { + // show warning before user leaves page + e.returnValue = true; + } + }); +}); + +onMounted(() => { + datasetService.getAll({ type: "DATA_PRODUCT" }).then((res) => { + raw_data_list.value = res.data.datasets; + }); +}); + +// show warning before user moves to a different route +onBeforeRouteLeave(() => { + const answer = window.confirm( + "Leaving this page before all files have been processed/uploaded will" + + " cancel the upload. Do you wish to continue?", + ); + if (!answer) return false; +}); From 162f42225db31ee198cfa2bf806100cae2438cd5 Mon Sep 17 00:00:00 2001 From: Rishi Pandey Date: Tue, 7 Nov 2023 20:53:14 -0500 Subject: [PATCH 011/101] persist value of Raw Data select through v-model --- ui/src/components/dataset/DataProductFilesUpload.vue | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ui/src/components/dataset/DataProductFilesUpload.vue b/ui/src/components/dataset/DataProductFilesUpload.vue index 6c1ade65e..571deba00 100644 --- a/ui/src/components/dataset/DataProductFilesUpload.vue +++ b/ui/src/components/dataset/DataProductFilesUpload.vue @@ -62,13 +62,15 @@