From 380873ed5c0d09c23753d087dd546c7a68b08d36 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 16:18:40 +0800 Subject: [PATCH 1/7] feat: update error handling in RagEtlService and add commons-io dependency --- backend/pom.xml | 6 ++++++ backend/services/rag-indexer-service/pom.xml | 6 ++++++ .../rag/indexer/infrastructure/event/RagEtlService.java | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/pom.xml b/backend/pom.xml index f57ea1571..7105089b6 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -65,6 +65,12 @@ pom import + + commons-io + commons-io + 2.16.1 + compile + com.google.protobuf protobuf-bom diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml index 5f7a4fb33..23db9dd33 100644 --- a/backend/services/rag-indexer-service/pom.xml +++ b/backend/services/rag-indexer-service/pom.xml @@ -86,6 +86,12 @@ dev.langchain4j langchain4j-document-parser-apache-poi + + + commons-io + commons-io + + dev.langchain4j diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java index 53c78fa8a..355e4b2ce 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java @@ -78,7 +78,7 @@ public void processAfterCommit(DataInsertedEvent event) { // 更新文件状态为已处理 ragFile.setStatus(FileStatus.PROCESSED); ragFileRepository.updateById(ragFile); - } catch (Exception e) { + } catch (Throwable e) { // 处理异常 log.error("Error processing RAG file: {}", ragFile.getFileId(), e); ragFile.setStatus(FileStatus.PROCESS_FAILED); From 751b97acedfb5af41cad1a54300fcb25e21e9fb3 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 17:05:33 +0800 Subject: [PATCH 2/7] feat: enhance Docker configuration with additional services and profiles --- .gitignore | 2 +- Makefile | 52 ++--- deployment/docker/datamate/docker-compose.yml | 212 +++++++++++++++++- .../docker/deer-flow/docker-compose.yml | 33 --- .../docker/label-studio/docker-compose.yml | 62 ----- deployment/docker/milvus/docker-compose.yml | 73 ------ editions/community/config/application.yml | 184 --------------- editions/community/config/log4j2.xml | 42 ---- editions/enterprise/config/application.yml | 181 --------------- editions/enterprise/config/log4j2.xml | 42 ---- scripts/images/frontend/Dockerfile | 2 +- .../images/frontend}/backend.conf | 0 12 files changed, 229 insertions(+), 656 deletions(-) delete mode 100644 deployment/docker/deer-flow/docker-compose.yml delete mode 100644 deployment/docker/label-studio/docker-compose.yml delete mode 100644 deployment/docker/milvus/docker-compose.yml delete mode 100644 editions/community/config/application.yml delete mode 100644 editions/community/config/log4j2.xml delete mode 100644 editions/enterprise/config/application.yml delete mode 100644 editions/enterprise/config/log4j2.xml rename {deployment/docker/datamate => scripts/images/frontend}/backend.conf (100%) diff --git a/.gitignore b/.gitignore index c0906e850..53ae0ad89 100644 --- a/.gitignore +++ b/.gitignore @@ -189,4 +189,4 @@ Thumbs.db *.sublime-workspace # Milvus -deployment/docker/milvus/volumes/ \ No newline at end of file +**/volumes/ diff --git a/Makefile b/Makefile index 4778f07d2..960e8b88b 100644 --- a/Makefile +++ b/Makefile @@ -238,7 +238,7 @@ endif # ========== Docker Install/Uninstall Targets ========== # Valid service targets for docker install/uninstall -VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" milvus "label-studio" "data-juicer" dj +VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python database gateway redis mineru deer-flow milvus label-studio data-juicer dj # Generic docker service install target .PHONY: %-docker-install @@ -252,21 +252,23 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi exit 1; \ fi @if [ "$*" = "label-studio" ]; then \ - $(call docker-compose-service,label-studio,up -d,deployment/docker/label-studio); \ - elif [ "$*" = "mineru" ]; then \ - REGISTRY=$(REGISTRY) && docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-mineru; \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile label-studio up -d; \ elif [ "$*" = "datamate" ]; then \ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d; \ - elif [ "$*" = "deer-flow" ]; then \ - cp runtime/deer-flow/.env deployment/docker/deer-flow/.env; \ - cp runtime/deer-flow/conf.yaml deployment/docker/deer-flow/conf.yaml; \ - REGISTRY=$(REGISTRY) docker compose -f deployment/docker/deer-flow/docker-compose.yml up -d; \ - elif [ "$*" = "milvus" ]; then \ - docker compose -f deployment/docker/milvus/docker-compose.yml up -d; \ + elif [ "$*" = "mineru" ]; then \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru up -d datamate-mineru; \ elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \ - REGISTRY=$(REGISTRY) && docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-data-juicer; \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile data-juicer up -d datamate-data-juicer; \ + elif [ "$*" = "redis" ]; then \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile redis up -d datamate-redis; \ + elif [ "$*" = "milvus" ]; then \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile milvus up -d; \ + elif [ "$*" = "deer-flow" ]; then \ + cp runtime/deer-flow/.env deployment/docker/datamate/.env; \ + cp runtime/deer-flow/conf.yaml deployment/docker/datamate/conf.yaml; \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile deer-flow up -d; \ else \ - $(call docker-compose-service,$*,up -d,deployment/docker/datamate); \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-$*; \ fi # Generic docker service uninstall target @@ -281,29 +283,23 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi exit 1; \ fi @if [ "$*" = "label-studio" ]; then \ - if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \ - cd deployment/docker/label-studio && docker compose down -v && cd - >/dev/null; \ - else \ - cd deployment/docker/label-studio && docker compose down && cd - >/dev/null; \ - fi; \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s label-studio pg-db; \ elif [ "$*" = "mineru" ]; then \ - $(call docker-compose-service,datamate-mineru,down,deployment/docker/datamate); \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-mineru; \ + elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-data-juicer; \ + elif [ "$*" = "redis" ]; then \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-redis; \ elif [ "$*" = "datamate" ]; then \ if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \ - docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru down -v; \ + docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru --profile redis --profile data-juicer --profile deer-flow --profile label-studio --profile milvus down -v; \ else \ - docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru down; \ + docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru --profile redis --profile data-juicer --profile deer-flow --profile label-studio --profile milvus down; \ fi; \ elif [ "$*" = "deer-flow" ]; then \ - docker compose -f deployment/docker/deer-flow/docker-compose.yml down; \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s deer-flow-backend deer-flow-frontend; \ elif [ "$*" = "milvus" ]; then \ - if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \ - docker compose -f deployment/docker/milvus/docker-compose.yml down -v; \ - else \ - docker compose -f deployment/docker/milvus/docker-compose.yml down; \ - fi; \ - elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \ - $(call docker-compose-service,datamate-data-juicer,down,deployment/docker/datamate); \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s milvus etcd minio; \ else \ $(call docker-compose-service,$*,down,deployment/docker/datamate); \ fi diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 422f5bd8c..191d9749a 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -1,5 +1,8 @@ services: - # 1) backend + # ============================== + # Core Datamate Services (always enabled) + # ============================== + datamate-backend: container_name: datamate-backend image: ${REGISTRY:-}datamate-backend @@ -15,7 +18,6 @@ services: depends_on: - datamate-database - # 1) backend (Python) datamate-backend-python: container_name: datamate-backend-python image: ${REGISTRY:-}datamate-backend-python @@ -38,13 +40,12 @@ services: privileged: true networks: [ datamate ] - # 2) frontend(NodePort 30000) datamate-frontend: container_name: datamate-frontend image: ${REGISTRY:-}datamate-frontend restart: on-failure ports: - - "30000:80" # nodePort → hostPort + - "30000:80" volumes: - frontend_log_volume:/var/log/datamate/frontend networks: [ datamate ] @@ -52,7 +53,6 @@ services: - datamate-backend - datamate-backend-python - # 3) database datamate-database: container_name: datamate-database image: ${REGISTRY:-}datamate-database @@ -72,7 +72,6 @@ services: - "3306:3306" networks: [ datamate ] - # 3) runtime datamate-runtime: container_name: datamate-runtime image: ${REGISTRY:-}datamate-runtime @@ -99,14 +98,16 @@ services: - operator-packages-volume:/usr/local/lib/ops/site-packages networks: [ datamate ] - # 4) mineru + # ============================= + # Optional: Mineru NPU Engine (profile: mineru) + # ============================== datamate-mineru: container_name: datamate-mineru image: datamate-mineru restart: on-failure environment: MINERU_MODEL_SOURCE: local - MINERU_DEVICE_MODE: npu # cpu|cuda|npu|mps + MINERU_DEVICE_MODE: npu VLLM_WORKER_MULTIPROC_METHOD: spawn privileged: true entrypoint: mineru-openai-server @@ -129,7 +130,9 @@ services: - /dev/devmm_svm - /dev/hisi_hdc - # 5) redis + # ============================== + # Optional: Redis (profile: redis) + # ============================== datamate-redis: container_name: datamate-redis image: redis:8.2.3 @@ -137,7 +140,11 @@ services: ports: - "6379:6379" networks: [ datamate ] + profiles: [ redis ] + # ============================== + # Optional: Data Juicer (profile: data-juicer) + # ============================== datamate-data-juicer: container_name: datamate-data-juicer image: datajuicer/data-juicer:v1.4.4 @@ -153,6 +160,171 @@ services: networks: [ datamate ] profiles: [ data-juicer ] + + # ============================== + # Optional: Deer Flow (profile: deer-flow) + # ============================== + deer-flow-backend: + image: ${REGISTRY:-}deer-flow-backend + container_name: deer-flow-backend + env_file: + - .env + volumes: + - ./conf.yaml:/app/conf.yaml:ro + - deer-flow-log-volume:/var/log/deer-flow + restart: unless-stopped + networks: + - datamate + profiles: + - deer-flow + + deer-flow-frontend: + image: ${REGISTRY:-}deer-flow-frontend + container_name: deer-flow-frontend + env_file: + - .env + depends_on: + - deer-flow-backend + restart: unless-stopped + networks: + - datamate + profiles: + - deer-flow + + # ============================== + # Optional: Label Studio (profile: label-studio) + # ============================== + label-studio: + container_name: label-studio + stdin_open: true + tty: true + image: heartexlabs/label-studio:latest + privileged: true + restart: unless-stopped + user: root + expose: + - "8000" + ports: + - "30001:8000" + depends_on: + - pg-db + environment: + - DJANGO_DB=default + - POSTGRE_NAME=postgres + - POSTGRE_USER=postgres + - POSTGRE_PASSWORD= + - POSTGRE_PORT=5432 + - POSTGRE_HOST=db + - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} + - LOCAL_FILES_SERVING_ENABLED=true + - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local + - USE_USERNAME_FOR_LOGIN=true + - LABEL_STUDIO_USERNAME=admin@demo.com + - LABEL_STUDIO_PASSWORD=demoadmin + - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true + - LABEL_STUDIO_USER_TOKEN=abc123abc123 + - LOG_LEVEL=DEBUG + volumes: + - label-studio-data:/label-studio/data:rw + - dataset_volume:/label-studio/local:rw + networks: + - datamate + command: label-studio-uwsgi + profiles: [ label-studio ] + + pg-db: + container_name: pg-db + image: pgautoupgrade/pgautoupgrade:13-alpine + hostname: db + restart: unless-stopped + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + - POSTGRES_USER=postgres + volumes: + - label-studio-db:/var/lib/postgresql/data + networks: + - datamate + profiles: [ label-studio ] + + # ============================== + # Optional: Milvus (profile: milvus) + # ============================== + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.18 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - etcd-volume:/etcd + command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + restart: always + networks: + - datamate + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + profiles: + - milvus + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2024-12-18T13-15-44Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - minio-volume:/minio_data + command: minio server /minio_data --console-address ":9001" + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + profiles: + - milvus + + milvus: + container_name: milvus-standalone + image: milvusdb/milvus:v2.6.5 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + MQ_TYPE: woodpecker + volumes: + - milvus-volume:/var/lib/milvus + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + profiles: + - milvus + +# ============================== +# Volumes +# ============================== volumes: dataset_volume: name: datamate-dataset-volume @@ -177,6 +349,28 @@ volumes: mineru_log_volume: name: datamate-mineru_log_volume + # Deer Flow + deer-flow-log-volume: + name: deer-flow-log-volume + + # Label Studio + label-studio-data: + name: label-studio-data-volume + label-studio-db: + name: label-studio-db-volume + + # Milvus + etcd-volume: + name: milvus-etcd-volume + minio-volume: + name: milvus-minio-volume + milvus-volume: + name: milvus-milvus-volume + + +# ============================== +# Networks +# ============================== networks: datamate: driver: bridge diff --git a/deployment/docker/deer-flow/docker-compose.yml b/deployment/docker/deer-flow/docker-compose.yml deleted file mode 100644 index edc2018a1..000000000 --- a/deployment/docker/deer-flow/docker-compose.yml +++ /dev/null @@ -1,33 +0,0 @@ -services: - deer-flow-backend: - image: ${REGISTRY:-}deer-flow-backend - container_name: deer-flow-backend - env_file: - - .env - volumes: - - ./conf.yaml:/app/conf.yaml:ro - - deer-flow-log-volume:/var/log/deer-flow - restart: unless-stopped - networks: - - datamate - - deer-flow-frontend: - image: ${REGISTRY:-}deer-flow-frontend - container_name: deer-flow-frontend - env_file: - - .env - depends_on: - - deer-flow-backend - restart: unless-stopped - networks: - - datamate - -volumes: - deer-flow-log-volume: - name: deer-flow-log-volume - -networks: - datamate: - driver: bridge - name: datamate-network - external: true diff --git a/deployment/docker/label-studio/docker-compose.yml b/deployment/docker/label-studio/docker-compose.yml deleted file mode 100644 index d68c6a0b9..000000000 --- a/deployment/docker/label-studio/docker-compose.yml +++ /dev/null @@ -1,62 +0,0 @@ -services: - - label-studio: - stdin_open: true - tty: true - image: heartexlabs/label-studio:latest - privileged: true - restart: unless-stopped - user: root - expose: - - "8000" - ports: - - "30001:8000" - depends_on: - - pg-db - environment: - - DJANGO_DB=default - - POSTGRE_NAME=postgres - - POSTGRE_USER=postgres - - POSTGRE_PASSWORD= - - POSTGRE_PORT=5432 - - POSTGRE_HOST=db - - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} - - LOCAL_FILES_SERVING_ENABLED=true - - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local - - USE_USERNAME_FOR_LOGIN=true - - LABEL_STUDIO_USERNAME=admin@demo.com - - LABEL_STUDIO_PASSWORD=demoadmin - - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true - - LABEL_STUDIO_USER_TOKEN=abc123abc123 - - LOG_LEVEL=DEBUG - volumes: - - label-studio-data:/label-studio/data:rw - - dataset_volume:/label-studio/local:rw - networks: - - datamate - command: label-studio-uwsgi - - pg-db: - image: pgautoupgrade/pgautoupgrade:13-alpine - hostname: db - restart: unless-stopped - environment: - - POSTGRES_HOST_AUTH_METHOD=trust - - POSTGRES_USER=postgres - volumes: - - label-studio-db:/var/lib/postgresql/data - networks: - - datamate - -volumes: - label-studio-data: - label-studio-db: - dataset_volume: - name: datamate-dataset-volume - external: true - -networks: - datamate: - driver: bridge - name: datamate-network - external: true diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml deleted file mode 100644 index f2f9ceb86..000000000 --- a/deployment/docker/milvus/docker-compose.yml +++ /dev/null @@ -1,73 +0,0 @@ -services: - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.18 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - restart: always - networks: - - datamate - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2024-12-18T13-15-44Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "9001:9001" - - "9000:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - networks: - - datamate - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - milvus: - container_name: milvus-standalone - image: milvusdb/milvus:v2.6.5 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - MQ_TYPE: woodpecker - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus - networks: - - datamate - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "9091:9091" - depends_on: - - "etcd" - - "minio" - -networks: - datamate: - name: datamate-network - external: true - driver: bridge diff --git a/editions/community/config/application.yml b/editions/community/config/application.yml deleted file mode 100644 index 689d09554..000000000 --- a/editions/community/config/application.yml +++ /dev/null @@ -1,184 +0,0 @@ -# 数据引擎平台 - 主应用配置 -spring: - application: - name: datamate - - # 暂时排除Spring Security自动配置(开发阶段使用) - autoconfigure: - exclude: - - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration - - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration - - # 数据源配置 - datasource: - driver-class-name: com.mysql.cj.jdbc.Driver - url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true - username: ${DB_USERNAME:root} - password: ${DB_PASSWORD:password} - hikari: - maximum-pool-size: 20 - minimum-idle: 5 - connection-timeout: 30000 - idle-timeout: 600000 - max-lifetime: 1800000 - - # Elasticsearch配置 - elasticsearch: - uris: ${ES_URIS:http://localhost:9200} - username: ${ES_USERNAME:} - password: ${ES_PASSWORD:} - connection-timeout: 10s - socket-timeout: 30s - - # Jackson配置 - jackson: - time-zone: Asia/Shanghai - date-format: yyyy-MM-dd HH:mm:ss - serialization: - write-dates-as-timestamps: false - deserialization: - fail-on-unknown-properties: false - - # 文件上传配置 - servlet: - multipart: - max-file-size: 100MB - max-request-size: 100MB - - # 任务调度配置 - task: - execution: - pool: - core-size: ${TASK_EXECUTION_CORE_SIZE:10} - max-size: ${TASK_EXECUTION_MAX_SIZE:20} - queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100} - keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s} - scheduling: - pool: - size: ${TASK_SCHEDULING_POOL_SIZE:5} - config: - import: - - classpath:config/application-datacollection.yml - - classpath:config/application-datamanagement.yml - - ai: - mcp: - server: - name: datamate-backend-mcp-server - base-url: /api - capabilities: - resource: false - prompt: false - completion: false - tool: true - -# MyBatis配置(需在顶层,不在 spring 下) -mybatis-plus: - configuration: - map-underscore-to-camel-case: true - default-fetch-size: 100 - default-statement-timeout: 30 - use-generated-keys: true - cache-enabled: true - lazy-loading-enabled: false - multiple-result-sets-enabled: true - use-column-label: true - auto-mapping-behavior: partial - auto-mapping-unknown-column-behavior: none - default-executor-type: simple - call-setters-on-nulls: false - return-instance-for-empty-row: false - log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl - mapper-locations: - - classpath*:mappers/**/*.xml - type-aliases-package: - - com.datamate.collection.domain.model - - com.datamate.datamanagement.domain.model.dataset - -# 应用配置 -server: - port: ${SERVER_PORT:8080} - servlet: - context-path: /api - encoding: - charset: UTF-8 - enabled: true - force: true - -# 日志配置 -logging: - config: file:/opt/backend/log4j2.xml - -# Actuator配置 -management: - endpoints: - web: - exposure: - include: health,info,metrics,prometheus - endpoint: - health: - show-details: when-authorized - health: - elasticsearch: - enabled: false # 禁用Elasticsearch健康检查 - -# 平台配置 -datamate: - # JWT配置 - jwt: - secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration} - expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒 - header: Authorization - prefix: "Bearer " - - # 文件存储配置 - storage: - type: ${STORAGE_TYPE:local} # local, minio, s3 - local: - base-path: ${STORAGE_LOCAL_PATH:./data/storage} - minio: - endpoint: ${MINIO_ENDPOINT:http://localhost:9000} - access-key: ${MINIO_ACCESS_KEY:minioadmin} - secret-key: ${MINIO_SECRET_KEY:minioadmin} - bucket-name: ${MINIO_BUCKET:data-mate} - - # Ray执行器配置 - ray: - enabled: ${RAY_ENABLED:false} - address: ${RAY_ADDRESS:ray://localhost:10001} - runtime-env: - working-dir: ${RAY_WORKING_DIR:./runtime/python-executor} - pip-packages: - - "ray[default]==2.7.0" - - "pandas" - - "numpy" - - "data-juicer" - - # 数据归集服务配置(可由模块导入叠加) - data-collection: {} - - # 算子市场配置 - operator-market: - repository-path: ${OPERATOR_REPO_PATH:./runtime/operators} - registry-url: ${OPERATOR_REGISTRY_URL:} - max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB} - - # 数据处理配置 - data-processing: - max-file-size: ${MAX_FILE_SIZE:1GB} - temp-dir: ${TEMP_DIR:./data/temp} - batch-size: ${BATCH_SIZE:1000} - - # 标注配置 - annotation: - auto-annotation: - enabled: ${AUTO_ANNOTATION_ENABLED:true} - model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:} - quality-control: - enabled: ${QC_ENABLED:true} - threshold: ${QC_THRESHOLD:0.8} - - # RAG配置 - rag: - milvus-host: ${MILVUS_HOST:milvus-standalone} - milvus-port: ${MILVUS_PORT:19530} diff --git a/editions/community/config/log4j2.xml b/editions/community/config/log4j2.xml deleted file mode 100644 index 5358fc94d..000000000 --- a/editions/community/config/log4j2.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - /var/log/datamate/backend - %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n - 100MB - 30 - INFO - WARN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/editions/enterprise/config/application.yml b/editions/enterprise/config/application.yml deleted file mode 100644 index 9326ab629..000000000 --- a/editions/enterprise/config/application.yml +++ /dev/null @@ -1,181 +0,0 @@ -# 数据引擎平台 - 主应用配置 -spring: - application: - name: datamate - - # 暂时排除Spring Security自动配置(开发阶段使用) - autoconfigure: - exclude: - - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration - - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration - - # 数据源配置 - datasource: - driver-class-name: com.mysql.cj.jdbc.Driver - url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true - username: ${DB_USERNAME:root} - password: ${DB_PASSWORD:password} - hikari: - maximum-pool-size: 20 - minimum-idle: 5 - connection-timeout: 30000 - idle-timeout: 600000 - max-lifetime: 1800000 - - # Elasticsearch配置 - elasticsearch: - uris: ${ES_URIS:http://localhost:9200} - username: ${ES_USERNAME:} - password: ${ES_PASSWORD:} - connection-timeout: 10s - socket-timeout: 30s - - # Jackson配置 - jackson: - time-zone: Asia/Shanghai - date-format: yyyy-MM-dd HH:mm:ss - serialization: - write-dates-as-timestamps: false - deserialization: - fail-on-unknown-properties: false - - # 文件上传配置 - servlet: - multipart: - max-file-size: 100MB - max-request-size: 100MB - - # 任务调度配置 - task: - execution: - pool: - core-size: ${TASK_EXECUTION_CORE_SIZE:10} - max-size: ${TASK_EXECUTION_MAX_SIZE:20} - queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100} - keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s} - scheduling: - pool: - size: ${TASK_SCHEDULING_POOL_SIZE:5} - config: - import: - - classpath:config/application-datacollection.yml - - classpath:config/application-datamanagement.yml - -# MyBatis配置(需在顶层,不在 spring 下) -mybatis-plus: - configuration: - map-underscore-to-camel-case: true - default-fetch-size: 100 - default-statement-timeout: 30 - use-generated-keys: true - cache-enabled: true - lazy-loading-enabled: false - multiple-result-sets-enabled: true - use-column-label: true - auto-mapping-behavior: partial - auto-mapping-unknown-column-behavior: none - default-executor-type: simple - call-setters-on-nulls: false - return-instance-for-empty-row: false - log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl - mapper-locations: - - classpath*:mappers/**/*.xml - type-aliases-package: - - com.datamate.collection.domain.model - - com.datamate.datamanagement.domain.model.dataset - -# 应用配置 -server: - port: ${SERVER_PORT:8080} - servlet: - context-path: /api - encoding: - charset: UTF-8 - enabled: true - force: true - -# 日志配置 -logging: - config: file:/opt/backend/log4j2.xml - -# Actuator配置 -management: - endpoints: - web: - exposure: - include: health,info,metrics,prometheus - endpoint: - health: - show-details: when-authorized - health: - elasticsearch: - enabled: false # 禁用Elasticsearch健康检查 - -# 平台配置 -datamate: - # JWT配置 - jwt: - secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration} - expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒 - header: Authorization - prefix: "Bearer " - - # 文件存储配置 - storage: - type: ${STORAGE_TYPE:local} # local, minio, s3 - local: - base-path: ${STORAGE_LOCAL_PATH:./data/storage} - minio: - endpoint: ${MINIO_ENDPOINT:http://localhost:9000} - access-key: ${MINIO_ACCESS_KEY:minioadmin} - secret-key: ${MINIO_SECRET_KEY:minioadmin} - bucket-name: ${MINIO_BUCKET:data-mate} - - # Ray执行器配置 - ray: - enabled: ${RAY_ENABLED:false} - address: ${RAY_ADDRESS:ray://localhost:10001} - runtime-env: - working-dir: ${RAY_WORKING_DIR:./runtime/python-executor} - pip-packages: - - "ray[default]==2.7.0" - - "pandas" - - "numpy" - - "data-juicer" - - # 数据归集服务配置(可由模块导入叠加) - data-collection: {} - - # 算子市场配置 - operator-market: - repository-path: ${OPERATOR_REPO_PATH:./runtime/operators} - registry-url: ${OPERATOR_REGISTRY_URL:} - max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB} - - # 数据处理配置 - data-processing: - max-file-size: ${MAX_FILE_SIZE:1GB} - temp-dir: ${TEMP_DIR:./data/temp} - batch-size: ${BATCH_SIZE:1000} - - # 标注配置 - annotation: - auto-annotation: - enabled: ${AUTO_ANNOTATION_ENABLED:true} - model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:} - quality-control: - enabled: ${QC_ENABLED:true} - threshold: ${QC_THRESHOLD:0.8} - - # RAG配置 - rag: - embedding: - model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002} - api-key: ${RAG_API_KEY:} - dimension: ${RAG_DIMENSION:1536} - chunk: - size: ${RAG_CHUNK_SIZE:512} - overlap: ${RAG_CHUNK_OVERLAP:50} - retrieval: - top-k: ${RAG_TOP_K:5} - score-threshold: ${RAG_SCORE_THRESHOLD:0.7} diff --git a/editions/enterprise/config/log4j2.xml b/editions/enterprise/config/log4j2.xml deleted file mode 100644 index f9d0cf3a5..000000000 --- a/editions/enterprise/config/log4j2.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - /var/log/data-mate/backend - %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n - 100MB - 30 - INFO - WARN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scripts/images/frontend/Dockerfile b/scripts/images/frontend/Dockerfile index db19a567a..fcdc78ef9 100644 --- a/scripts/images/frontend/Dockerfile +++ b/scripts/images/frontend/Dockerfile @@ -10,7 +10,7 @@ RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \ FROM nginx:1.29 AS runner COPY --from=builder /app/dist /opt/frontend -COPY deployment/docker/datamate/backend.conf /etc/nginx/conf.d/backend.conf +COPY scripts/images/frontend/backend.conf /etc/nginx/conf.d/backend.conf RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && rm -f /etc/nginx/conf.d/default.conf diff --git a/deployment/docker/datamate/backend.conf b/scripts/images/frontend/backend.conf similarity index 100% rename from deployment/docker/datamate/backend.conf rename to scripts/images/frontend/backend.conf From 4bae1ede33f2206fc762cd3b9b718f1ab8435f5b Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 19:45:08 +0800 Subject: [PATCH 3/7] feat: update knowledge base file detail view with pagination and metadata display --- deployment/docker/datamate/docker-compose.yml | 4 +- .../Detail/KnowledgeBaseDetail.tsx | 5 + .../FileDetail/KnowledgeBaseFileDetail.tsx | 595 +++++------------- .../pages/KnowledgeBase/knowledge-base.api.ts | 29 +- 4 files changed, 177 insertions(+), 456 deletions(-) diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 191d9749a..62165ea02 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -281,8 +281,8 @@ services: - "9001:9001" - "9000:9000" volumes: - - minio-volume:/minio_data - command: minio server /minio_data --console-address ":9001" + - minio-volume:/data + command: minio server /data --console-address ":9001" networks: - datamate healthcheck: diff --git a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx index 962d36bd9..4cd0c0836 100644 --- a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx @@ -170,6 +170,11 @@ const KnowledgeBaseDetailPage: React.FC = () => { width: 200, ellipsis: true, fixed: "left" as const, + render: (_: unknown, file: KBFile) => ( + navigate(`/data/knowledge-base/file-detail/${file.id}?knowledgeBaseId=${knowledgeBase?.id || ''}`)}> + {file.name} + + ) }, { title: "状态", diff --git a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx index de47e1189..bcff2f6b4 100644 --- a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx @@ -1,25 +1,15 @@ -import React, { useState } from "react"; -import { - Plus, - Eye, - Edit, - Clock, - Trash2, - Scissors, - VideoIcon as Vector, - Server, - FileText, - Download, -} from "lucide-react"; -import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag } from "antd"; -import { mockChunks, mockQAPairs, sliceOperators } from "@/mock/knowledgeBase"; -import type { - KnowledgeBase, - KBFile, -} from "@/pages/KnowledgeBase/knowledge-base.model"; -import { Link, useNavigate } from "react-router"; +import React, { useEffect, useState } from "react"; +import { Eye, Edit, Trash2, FileText, Download } from "lucide-react"; +import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert } from "antd"; +import { queryKnowledgeBaseFileDetailUsingGet } from "@/pages/KnowledgeBase/knowledge-base.api"; +import { Link, useParams } from "react-router"; import DetailHeader from "@/components/DetailHeader"; -import DevelopmentInProgress from "@/components/DevelopmentInProgress"; + +interface RagChunk { + id: string; + text: string; + metadata: unknown; // may be string or object +} // 状态标签 const getStatusLabel = (status: string) => { @@ -49,114 +39,127 @@ const getStatusColor = (status: string) => { }; const KnowledgeBaseFileDetail: React.FC = () => { - const navigate = useNavigate(); - // 假设通过 props 或路由参数获取 selectedFile/selectedKB - const [selectedFile] = useState( - mockChunks.length - ? { - id: 1, - name: "API文档.pdf", - type: "pdf", - size: "2.5 MB", - status: "completed", - chunkCount: mockChunks.length, - progress: 100, - uploadedAt: "2024-01-22 10:30", - source: "upload", - vectorizationStatus: "completed", - } - : ({} as KBFile) - ); - const [selectedKB] = useState({ - id: 1, - name: "API知识库", - description: "", - type: "unstructured", - status: "ready", - fileCount: 1, - chunkCount: mockChunks.length, - vectorCount: mockChunks.length, - size: "2.5 MB", - progress: 100, - createdAt: "2024-01-22", - lastUpdated: "2024-01-22", - vectorDatabase: "pinecone", - config: { - embeddingModel: "text-embedding-3-large", - chunkSize: 512, - overlap: 50, - sliceMethod: "semantic", - enableQA: true, - vectorDimension: 1536, - sliceOperators: ["semantic-split", "paragraph-split"], - }, - files: [], - vectorizationHistory: [], - }); + const { id } = useParams(); + // id 为路由中的 ragFileId,knowledgeBaseId 通过上一级 detail 路由或 query 传入,这里尝试从 URLSearchParams 获取 + const search = new URLSearchParams(window.location.search); + const knowledgeBaseId = search.get("knowledgeBaseId") || ""; + const ragFileId = id || ""; + const kbLink = knowledgeBaseId ? `/data/knowledge-base/detail/${knowledgeBaseId}` : "/data/knowledge-base"; - const [currentChunkPage, setCurrentChunkPage] = useState(1); - const chunksPerPage = 5; - const totalPages = Math.ceil(mockChunks.length / chunksPerPage); - const startIndex = (currentChunkPage - 1) * chunksPerPage; - const currentChunks = mockChunks.slice( - startIndex, - startIndex + chunksPerPage - ); + // 远程数据状态 + const [paged, setPaged] = useState<{ + page: number; + size: number; + totalElements: number; + totalPages: number; + content: RagChunk[]; + } | null>(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); - const [editingChunk, setEditingChunk] = useState(null); + // 本地 UI 状态 + const [editingChunk, setEditingChunk] = useState(null); const [editChunkContent, setEditChunkContent] = useState(""); - const [chunkDetailModal, setChunkDetailModal] = useState(null); - const [showSliceTraceDialog, setShowSliceTraceDialog] = useState< - number | null - >(null); + const [chunkDetailModal, setChunkDetailModal] = useState(null); + const [showSliceTraceDialog, setShowSliceTraceDialog] = useState(null); + + const pageSize = 20; + const [currentPageZeroBased, setCurrentPageZeroBased] = useState(0); + + const safeParse = (meta: unknown): unknown => { + if (typeof meta === "string") { + try { + return JSON.parse(meta); + } catch { + return meta; // 保持原样 + } + } + return meta; + }; - const handleEditChunk = (chunkId: number, content: string) => { + const fetchChunks = async (page: number) => { + if (!knowledgeBaseId || !ragFileId) return; + setLoading(true); + setError(null); + try { + const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize }); + // 兼容返回结构 ResponsePagedResponseRagChunk -> { code, message, data } + const raw = (res?.data ?? res) as { + page: number; + size: number; + totalElements: number; + totalPages: number; + content: RagChunk[]; + }; + const normalized = { + ...raw, + content: (raw?.content ?? []).map((c) => ({ + ...c, + metadata: safeParse((c as RagChunk)?.metadata), + })), + }; + setPaged(normalized); + } catch (err: unknown) { + const msg = typeof err === "object" && err !== null && "message" in err ? String((err as { message?: string }).message) : "加载失败"; + setError(msg); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + fetchChunks(currentPageZeroBased); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [knowledgeBaseId, ragFileId, currentPageZeroBased]); + + const totalElements = paged?.totalElements ?? 0; + const totalPages = paged?.totalPages ?? 0; + const currentChunks = paged?.content ?? []; + + const handleEditChunk = (chunkId: string, content: string) => { setEditingChunk(chunkId); setEditChunkContent(content); }; - const handleSaveChunk = (chunkId: number) => { - // 实际保存逻辑 + const handleSaveChunk = (chunkId: string) => { + // TODO: 保存到后端(暂不实现) setEditingChunk(null); setEditChunkContent(""); }; - const handleDeleteChunk = (chunkId: number) => { - // 实际删除逻辑 + const handleDeleteChunk = (chunkId: string) => { + // TODO: 删除后端分块(暂不实现) setEditingChunk(null); setEditChunkContent(""); }; - const handleViewChunkDetail = (chunkId: number) => { + const handleViewChunkDetail = (chunkId: string) => { setChunkDetailModal(chunkId); }; const renderChunks = () => (
+ {error && }
- 共 {mockChunks.length} 个分块,第 {startIndex + 1}- - {Math.min(startIndex + chunksPerPage, mockChunks.length)} 个 + 共 {totalElements} 个分块,第 {totalElements === 0 ? 0 : currentPageZeroBased * pageSize + 1}- + {Math.min((currentPageZeroBased + 1) * pageSize, totalElements)} 个
- {currentChunkPage} / {totalPages} + {totalPages === 0 ? 0 : currentPageZeroBased + 1} / {totalPages} @@ -170,11 +173,12 @@ const KnowledgeBaseFileDetail: React.FC = () => {

分块 {chunk.id}

- - {sliceOperators.find( - (op) => op.id === chunk.sliceOperator - )?.name || chunk.sliceOperator} - + {/* 算子名:从 metadata.sliceOperator 显示 */} + {chunk.metadata?.sliceOperator && ( + + {chunk.metadata.sliceOperator} + + )}
{editingChunk === chunk.id ? ( @@ -198,25 +202,13 @@ const KnowledgeBaseFileDetail: React.FC = () => { ) : ( <> - - - @@ -231,23 +223,32 @@ const KnowledgeBaseFileDetail: React.FC = () => { rows={3} /> ) : ( - chunk.content + chunk.text )}
+ {/* 元数据展示,保持和召回结果风格一致 */} +
+
metadata:
+
+                    {typeof chunk.metadata === "string"
+                      ? chunk.metadata
+                      : JSON.stringify(chunk.metadata ?? {}, null, 2)}
+                  
+
+ {/* 结构化元数据的快捷标签(若可用) */}
- 位置: {chunk.position} - Token: {chunk.tokens} - {chunk.metadata?.page && ( - 页码: {chunk.metadata.page} - )} - {chunk.metadata?.section && ( - 章节: {chunk.metadata.section} - )} + {chunk?.metadata?.position && 位置: {chunk.metadata.position}} + {chunk?.metadata?.tokens && Token: {chunk.metadata.tokens}} + {chunk?.metadata?.page && 页码: {chunk.metadata.page}} + {chunk?.metadata?.section && 章节: {chunk.metadata.section}}
))} + {!loading && currentChunks.length === 0 && ( + + )}
); @@ -261,81 +262,42 @@ const KnowledgeBaseFileDetail: React.FC = () => { }, { title: ( - - {selectedKB?.name} - + 知识库详情 ), }, { - title: selectedFile.name, + title: `文件 ${ragFileId}`, }, ]} /> + {/* 头部统计使用最简占位,后续可扩展 */} , iconColor: "bg-blue-500 text-blue-600", status: { - label: getStatusLabel(selectedFile.status), - color: getStatusColor(selectedFile.status), + label: "", + color: "default", }, - name: selectedFile.name, - description: `${selectedFile.size} • ${ - selectedFile.chunkCount - } 个分块${ - selectedFile.source === "dataset" - ? ` • 数据集: ${selectedFile.datasetId}` - : "" - }`, - createdAt: selectedFile.uploadedAt, - lastUpdated: selectedFile.uploadedAt, + name: `文件 ${ragFileId}`, + description: `${totalElements} 个分块`, + createdAt: "", + lastUpdated: "", }} - statistics={[ - { - icon: , - label: "分块", - value: selectedFile.chunkCount, - }, - { - icon: , - label: "向量化状态", - value: getStatusLabel( - selectedFile.vectorizationStatus || "pending" - ), - }, - { - icon: , - label: "文件大小", - value: selectedFile.size, - }, - { - icon: , - label: "上传时间", - value: selectedFile.uploadedAt, - }, - ]} + statistics={[]} operations={[ { key: "download", label: "下载", icon: , - onClick: () => { - // 下载逻辑 - }, - }, - { - key: "delete", - label: "删除", - icon: , - danger: true, - onClick: () => { - // 删除逻辑 - }, + onClick: () => {}, }, ]} /> - {renderChunks()} + + {loading ?
: renderChunks()} +
{/* Slice Trace Modal */} { width={800} destroyOnClose > + {/* 简化为内容占位,真实数据待后端提供更多字段 */}

切片处理流程

@@ -356,117 +319,12 @@ const KnowledgeBaseFileDetail: React.FC = () => {

原始文档导入

-

- 文档: {selectedFile.name} -

-
- 完成 -
-
-
- 2 -
-
-

语义分割算子

-

- 基于语义相似度智能分割,阈值: 0.7 -

-
- 完成 -
-
-
- 3 -
-
-

段落分割算子

-

按段落边界进一步细分

+

文件: {ragFileId}

完成
-
-
- 4 -
-
-

向量化处理

-

- 使用 {selectedKB?.config.embeddingModel} 生成向量 -

-
- - {selectedFile.vectorizationStatus === "completed" - ? "完成" - : "处理中"} - -
- -
- -

分块信息

-
-
- 分块ID: - {showSliceTraceDialog} -
-
- 父分块: - - {mockChunks.find((c) => c.id === showSliceTraceDialog) - ?.parentChunkId || "无"} - -
-
- Token数: - - { - mockChunks.find((c) => c.id === showSliceTraceDialog) - ?.tokens - } - -
-
- 创建时间: - - { - mockChunks.find((c) => c.id === showSliceTraceDialog) - ?.createdAt - } - -
-
-
- - -

向量信息

-
-
- 向量ID: - - { - mockChunks.find((c) => c.id === showSliceTraceDialog) - ?.vectorId - } - -
-
- 向量维度: - {selectedKB?.config.vectorDimension} -
-
- 相似度: - - { - mockChunks.find((c) => c.id === showSliceTraceDialog) - ?.similarity - } - -
-
-
-
@@ -475,7 +333,7 @@ const KnowledgeBaseFileDetail: React.FC = () => { open={!!chunkDetailModal} onCancel={() => setChunkDetailModal(null)} footer={null} - title={`分块详细信息 - 分块 ${chunkDetailModal}`} + title={`分块详细信息 - 分块 ${chunkDetailModal ?? ""}`} width={900} destroyOnClose > @@ -489,10 +347,7 @@ const KnowledgeBaseFileDetail: React.FC = () => {
分块内容
c.id === chunkDetailModal) - ?.content || "" - } + value={currentChunks.find((c) => c.id === chunkDetailModal)?.text || ""} rows={8} readOnly className="mt-2" @@ -507,169 +362,19 @@ const KnowledgeBaseFileDetail: React.FC = () => {
位置
- c.id === chunkDetailModal) - ?.position || "" - } - readOnly - /> + c.id === chunkDetailModal)?.metadata?.position || ""} readOnly />
Token数量
- c.id === chunkDetailModal) - ?.tokens || "" - } - readOnly - /> -
-
-
相似度
- c.id === chunkDetailModal) - ?.similarity || "" - } - readOnly - /> -
-
-
向量维度
- c.id === chunkDetailModal) - ?.embedding?.length || "" - } - readOnly - /> -
-
-
创建时间
- c.id === chunkDetailModal) - ?.createdAt || "" - } - readOnly - /> -
-
-
更新时间
- c.id === chunkDetailModal) - ?.updatedAt || "" - } - readOnly - /> + c.id === chunkDetailModal)?.metadata?.tokens || ""} readOnly />
-
向量ID
- c.id === chunkDetailModal) - ?.vectorId || "" - } - readOnly - /> +
页码
+ c.id === chunkDetailModal)?.metadata?.page || ""} readOnly />
-
切片算子
- c.id === chunkDetailModal) - ?.sliceOperator || "" - } - readOnly - /> -
-
- ), - }, - { - key: "qa", - label: "Q&A对", - children: ( -
-
- 关联的问答对 - -
- {mockQAPairs.map((qa) => ( - -
-
- - 问题 {qa.id} - -

{qa.question}

-
-
- - 答案 - -

{qa.answer}

-
-
- - -
-
-
- ))} -
- ), - }, - { - key: "trace", - label: "切片回溯", - children: ( -
-
- -
-

原始文档

-

- {selectedFile.name} -

-
-
-
- -
-

切片算子处理

-

- 应用算子:{" "} - { - sliceOperators.find( - (op) => - op.id === - mockChunks.find((c) => c.id === chunkDetailModal) - ?.sliceOperator - )?.name - } -

-
-
-
- -
-

向量化处理

-

- 生成 {selectedKB?.config.vectorDimension} 维向量 -

-
+
章节
+ c.id === chunkDetailModal)?.metadata?.section || ""} readOnly />
), diff --git a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts index b7992da9f..a26c949ca 100644 --- a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts +++ b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts @@ -1,12 +1,12 @@ import { get, post, put, del } from "@/utils/request"; // 获取知识库列表 -export function queryKnowledgeBasesUsingPost(params: any) { +export function queryKnowledgeBasesUsingPost(params: object) { return post("/api/knowledge-base/list", params); } // 创建知识库 -export function createKnowledgeBaseUsingPost(data: any) { +export function createKnowledgeBaseUsingPost(data: object) { return post("/api/knowledge-base/create", data); } @@ -16,8 +16,8 @@ export function queryKnowledgeBaseByIdUsingGet(baseId: string) { } // 更新知识库 -export function updateKnowledgeBaseByIdUsingPut(baseId: string, data: any) { - return put(`/api/knowledge-base/${baseId}`, data); +export function updateKnowledgeBaseByIdUsingPut(baseId: string, data: object) { + return (put as unknown as (url: string, data?: object) => Promise)(`/api/knowledge-base/${baseId}`, data); } // 删除知识库 @@ -26,18 +26,18 @@ export function deleteKnowledgeBaseByIdUsingDelete(baseId: string) { } // 获取知识生成文件列表 -export function queryKnowledgeBaseFilesUsingGet(baseId: string, data) { - return get(`/api/knowledge-base/${baseId}/files`, data); +export function queryKnowledgeBaseFilesUsingGet(baseId: string, params?: Record) { + return get(`/api/knowledge-base/${baseId}/files${params ? `?${new URLSearchParams(params).toString()}` : ""}`); } // 添加文件到知识库 -export function addKnowledgeBaseFilesUsingPost(baseId: string, data: any) { +export function addKnowledgeBaseFilesUsingPost(baseId: string, data: object) { return post(`/api/knowledge-base/${baseId}/files`, data); } // 删除知识生成文件 -export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: any) { - return del(`/api/knowledge-base/${baseId}/files`, data); +export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: object | null) { + return (del as unknown as (url: string, data?: object | null) => Promise)(`/api/knowledge-base/${baseId}/files`, data ?? null); } // 检索知识库内容 @@ -49,3 +49,14 @@ export function retrieveKnowledgeBaseContent(data: { }) { return post("/api/knowledge-base/retrieve", data); } + +// 新增:获取知识库文件详情(分页的切片数据) +export function queryKnowledgeBaseFileDetailUsingGet( + knowledgeBaseId: string, + ragFileId: string, + params: { page?: number; size?: number } = { page: 0, size: 20 } +) { + const page = params.page ?? 0; + const size = params.size ?? 20; + return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?page=${page}&size=${size}`); +} From eb6b655f6edbb48df78730b856e9cfd3c3b4d5e8 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 20:54:20 +0800 Subject: [PATCH 4/7] feat: enhance knowledge base file detail view with file name display in breadcrumbs --- .../Detail/KnowledgeBaseDetail.tsx | 2 +- .../FileDetail/KnowledgeBaseFileDetail.tsx | 33 +++++-------------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx index 4cd0c0836..ecc197df4 100644 --- a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx @@ -171,7 +171,7 @@ const KnowledgeBaseDetailPage: React.FC = () => { ellipsis: true, fixed: "left" as const, render: (_: unknown, file: KBFile) => ( - navigate(`/data/knowledge-base/file-detail/${file.id}?knowledgeBaseId=${knowledgeBase?.id || ''}`)}> + navigate(`/data/knowledge-base/file-detail/${file.id}?knowledgeBaseId=${knowledgeBase?.id || ''}&fileName=${encodeURIComponent(file.name || file.fileName || '')}`)}> {file.name} ) diff --git a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx index bcff2f6b4..694ec8749 100644 --- a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx @@ -40,9 +40,10 @@ const getStatusColor = (status: string) => { const KnowledgeBaseFileDetail: React.FC = () => { const { id } = useParams(); - // id 为路由中的 ragFileId,knowledgeBaseId 通过上一级 detail 路由或 query 传入,这里尝试从 URLSearchParams 获取 + // id 为路由中的 ragFileId,knowledgeBaseId 通过上一级 detail 路由或 query 传入 const search = new URLSearchParams(window.location.search); const knowledgeBaseId = search.get("knowledgeBaseId") || ""; + const fileName = search.get("fileName") || ""; const ragFileId = id || ""; const kbLink = knowledgeBaseId ? `/data/knowledge-base/detail/${knowledgeBaseId}` : "/data/knowledge-base"; @@ -257,17 +258,9 @@ const KnowledgeBaseFileDetail: React.FC = () => {
知识库, - }, - { - title: ( - 知识库详情 - ), - }, - { - title: `文件 ${ragFileId}`, - }, + { title: 知识库 }, + { title: (知识库详情) }, + { title: fileName || `文件 ${ragFileId}` }, ]} /> {/* 头部统计使用最简占位,后续可扩展 */} @@ -276,24 +269,14 @@ const KnowledgeBaseFileDetail: React.FC = () => { id: ragFileId, icon: , iconColor: "bg-blue-500 text-blue-600", - status: { - label: "", - color: "default", - }, - name: `文件 ${ragFileId}`, + status: { label: "就绪", color: "default" }, + name: fileName || `文件 ${ragFileId}`, description: `${totalElements} 个分块`, createdAt: "", lastUpdated: "", }} statistics={[]} - operations={[ - { - key: "download", - label: "下载", - icon: , - onClick: () => {}, - }, - ]} + operations={[{ key: "download", label: "下载", icon: , onClick: () => {} }]} /> {loading ?
: renderChunks()} From a06bcaffec05bd71cc30b3810510bc53c75c7fa2 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 21:16:10 +0800 Subject: [PATCH 5/7] feat: update KnowledgeBaseFileDetail component with improved icon size and color --- frontend/src/components/DetailHeader.tsx | 26 ++++++++++--------- .../FileDetail/KnowledgeBaseFileDetail.tsx | 6 ++--- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/frontend/src/components/DetailHeader.tsx b/frontend/src/components/DetailHeader.tsx index 516485200..385b393f9 100644 --- a/frontend/src/components/DetailHeader.tsx +++ b/frontend/src/components/DetailHeader.tsx @@ -49,32 +49,34 @@ function DetailHeader({
- {
{(data as any)?.icon}
|| ( + {(data as any)?.icon ? ( +
{(data as any).icon}
+ ) : ( )}
-

{data?.name}

- {data?.status && ( - +

{(data as any)?.name}

+ {(data as any)?.status && ( +
- {data.status?.icon && {data.status?.icon}} - {data.status?.label} + {(data as any).status?.icon && {(data as any).status?.icon}} + {(data as any).status?.label}
)}
- {data?.tags && ( + {(data as any)?.tags && (
- {data?.tags?.map((tag) => ( + {(data as any)?.tags?.map((tag: any) => ( {tag.name} @@ -89,9 +91,9 @@ function DetailHeader({ )}
)} -

{data?.description}

+

{(data as any)?.description}

- {statistics.map((stat) => ( + {statistics.map((stat: any) => (
{stat.icon} {stat.value} @@ -101,7 +103,7 @@ function DetailHeader({
- {operations.map((op) => { + {operations.map((op: any) => { if (op.isDropdown) { return ( { , - iconColor: "bg-blue-500 text-blue-600", + icon: , + iconColor: "#a27e7e", status: { label: "就绪", color: "default" }, name: fileName || `文件 ${ragFileId}`, description: `${totalElements} 个分块`, From 4d00da7e0bb10745a970bf44854f5deac449b3a7 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 21:19:23 +0800 Subject: [PATCH 6/7] feat: update KnowledgeBaseFileDetail component with improved icon size and color --- deployment/docker/datamate/docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 191d9749a..62165ea02 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -281,8 +281,8 @@ services: - "9001:9001" - "9000:9000" volumes: - - minio-volume:/minio_data - command: minio server /minio_data --console-address ":9001" + - minio-volume:/data + command: minio server /data --console-address ":9001" networks: - datamate healthcheck: From 967cfafae6445bfca0023c254cd6bd42c3cb1ea1 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 21:24:12 +0800 Subject: [PATCH 7/7] feat: update KnowledgeBaseFileDetail component with improved icon size and color --- .../FileDetail/KnowledgeBaseFileDetail.tsx | 20 +++++++++---------- .../pages/KnowledgeBase/knowledge-base.api.ts | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx index d0c907c3f..826dd7ec7 100644 --- a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx @@ -65,7 +65,7 @@ const KnowledgeBaseFileDetail: React.FC = () => { const [showSliceTraceDialog, setShowSliceTraceDialog] = useState(null); const pageSize = 20; - const [currentPageZeroBased, setCurrentPageZeroBased] = useState(0); + const [currentPage, setCurrentPage] = useState(1); const safeParse = (meta: unknown): unknown => { if (typeof meta === "string") { @@ -109,9 +109,9 @@ const KnowledgeBaseFileDetail: React.FC = () => { }; useEffect(() => { - fetchChunks(currentPageZeroBased); + fetchChunks(currentPage); // eslint-disable-next-line react-hooks/exhaustive-deps - }, [knowledgeBaseId, ragFileId, currentPageZeroBased]); + }, [knowledgeBaseId, ragFileId, currentPage]); const totalElements = paged?.totalElements ?? 0; const totalPages = paged?.totalPages ?? 0; @@ -143,24 +143,24 @@ const KnowledgeBaseFileDetail: React.FC = () => { {error && }
- 共 {totalElements} 个分块,第 {totalElements === 0 ? 0 : currentPageZeroBased * pageSize + 1}- - {Math.min((currentPageZeroBased + 1) * pageSize, totalElements)} 个 + 共 {totalElements} 个分块,第 {totalElements === 0 ? 0 : (currentPage - 1) * pageSize + 1}- + {Math.min(currentPage * pageSize, totalElements)} 个
- {totalPages === 0 ? 0 : currentPageZeroBased + 1} / {totalPages} + {totalPages === 0 ? 0 : currentPage} / {totalPages} diff --git a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts index a26c949ca..18dc34d09 100644 --- a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts +++ b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts @@ -54,9 +54,9 @@ export function retrieveKnowledgeBaseContent(data: { export function queryKnowledgeBaseFileDetailUsingGet( knowledgeBaseId: string, ragFileId: string, - params: { page?: number; size?: number } = { page: 0, size: 20 } + params: { page?: number; size?: number } = { page: 1, size: 20 } ) { - const page = params.page ?? 0; + const page = params.page ?? 1; const size = params.size ?? 20; return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?page=${page}&size=${size}`); }