From 380873ed5c0d09c23753d087dd546c7a68b08d36 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 16:18:40 +0800 Subject: [PATCH 1/2] feat: update error handling in RagEtlService and add commons-io dependency --- backend/pom.xml | 6 ++++++ backend/services/rag-indexer-service/pom.xml | 6 ++++++ .../rag/indexer/infrastructure/event/RagEtlService.java | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/pom.xml b/backend/pom.xml index f57ea1571..7105089b6 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -65,6 +65,12 @@ pom import + + commons-io + commons-io + 2.16.1 + compile + com.google.protobuf protobuf-bom diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml index 5f7a4fb33..23db9dd33 100644 --- a/backend/services/rag-indexer-service/pom.xml +++ b/backend/services/rag-indexer-service/pom.xml @@ -86,6 +86,12 @@ dev.langchain4j langchain4j-document-parser-apache-poi + + + commons-io + commons-io + + dev.langchain4j diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java index 53c78fa8a..355e4b2ce 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java @@ -78,7 +78,7 @@ public void processAfterCommit(DataInsertedEvent event) { // 更新文件状态为已处理 ragFile.setStatus(FileStatus.PROCESSED); ragFileRepository.updateById(ragFile); - } catch (Exception e) { + } catch (Throwable e) { // 处理异常 log.error("Error processing RAG file: {}", ragFile.getFileId(), e); ragFile.setStatus(FileStatus.PROCESS_FAILED); From 751b97acedfb5af41cad1a54300fcb25e21e9fb3 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Mon, 12 Jan 2026 17:05:33 +0800 Subject: [PATCH 2/2] feat: enhance Docker configuration with additional services and profiles --- .gitignore | 2 +- Makefile | 52 ++--- deployment/docker/datamate/docker-compose.yml | 212 +++++++++++++++++- .../docker/deer-flow/docker-compose.yml | 33 --- .../docker/label-studio/docker-compose.yml | 62 ----- deployment/docker/milvus/docker-compose.yml | 73 ------ editions/community/config/application.yml | 184 --------------- editions/community/config/log4j2.xml | 42 ---- editions/enterprise/config/application.yml | 181 --------------- editions/enterprise/config/log4j2.xml | 42 ---- scripts/images/frontend/Dockerfile | 2 +- .../images/frontend}/backend.conf | 0 12 files changed, 229 insertions(+), 656 deletions(-) delete mode 100644 deployment/docker/deer-flow/docker-compose.yml delete mode 100644 deployment/docker/label-studio/docker-compose.yml delete mode 100644 deployment/docker/milvus/docker-compose.yml delete mode 100644 editions/community/config/application.yml delete mode 100644 editions/community/config/log4j2.xml delete mode 100644 editions/enterprise/config/application.yml delete mode 100644 editions/enterprise/config/log4j2.xml rename {deployment/docker/datamate => scripts/images/frontend}/backend.conf (100%) diff --git a/.gitignore b/.gitignore index c0906e850..53ae0ad89 100644 --- a/.gitignore +++ b/.gitignore @@ -189,4 +189,4 @@ Thumbs.db *.sublime-workspace # Milvus -deployment/docker/milvus/volumes/ \ No newline at end of file +**/volumes/ diff --git a/Makefile b/Makefile index 4778f07d2..960e8b88b 100644 --- a/Makefile +++ b/Makefile @@ -238,7 +238,7 @@ endif # ========== Docker Install/Uninstall Targets ========== # Valid service targets for docker install/uninstall -VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" milvus "label-studio" "data-juicer" dj +VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python database gateway redis mineru deer-flow milvus label-studio data-juicer dj # Generic docker service install target .PHONY: %-docker-install @@ -252,21 +252,23 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi exit 1; \ fi @if [ "$*" = "label-studio" ]; then \ - $(call docker-compose-service,label-studio,up -d,deployment/docker/label-studio); \ - elif [ "$*" = "mineru" ]; then \ - REGISTRY=$(REGISTRY) && docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-mineru; \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile label-studio up -d; \ elif [ "$*" = "datamate" ]; then \ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d; \ - elif [ "$*" = "deer-flow" ]; then \ - cp runtime/deer-flow/.env deployment/docker/deer-flow/.env; \ - cp runtime/deer-flow/conf.yaml deployment/docker/deer-flow/conf.yaml; \ - REGISTRY=$(REGISTRY) docker compose -f deployment/docker/deer-flow/docker-compose.yml up -d; \ - elif [ "$*" = "milvus" ]; then \ - docker compose -f deployment/docker/milvus/docker-compose.yml up -d; \ + elif [ "$*" = "mineru" ]; then \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru up -d datamate-mineru; \ elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \ - REGISTRY=$(REGISTRY) && docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-data-juicer; \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile data-juicer up -d datamate-data-juicer; \ + elif [ "$*" = "redis" ]; then \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile redis up -d datamate-redis; \ + elif [ "$*" = "milvus" ]; then \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile milvus up -d; \ + elif [ "$*" = "deer-flow" ]; then \ + cp runtime/deer-flow/.env deployment/docker/datamate/.env; \ + cp runtime/deer-flow/conf.yaml deployment/docker/datamate/conf.yaml; \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile deer-flow up -d; \ else \ - $(call docker-compose-service,$*,up -d,deployment/docker/datamate); \ + REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-$*; \ fi # Generic docker service uninstall target @@ -281,29 +283,23 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi exit 1; \ fi @if [ "$*" = "label-studio" ]; then \ - if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \ - cd deployment/docker/label-studio && docker compose down -v && cd - >/dev/null; \ - else \ - cd deployment/docker/label-studio && docker compose down && cd - >/dev/null; \ - fi; \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s label-studio pg-db; \ elif [ "$*" = "mineru" ]; then \ - $(call docker-compose-service,datamate-mineru,down,deployment/docker/datamate); \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-mineru; \ + elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-data-juicer; \ + elif [ "$*" = "redis" ]; then \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-redis; \ elif [ "$*" = "datamate" ]; then \ if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \ - docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru down -v; \ + docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru --profile redis --profile data-juicer --profile deer-flow --profile label-studio --profile milvus down -v; \ else \ - docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru down; \ + docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru --profile redis --profile data-juicer --profile deer-flow --profile label-studio --profile milvus down; \ fi; \ elif [ "$*" = "deer-flow" ]; then \ - docker compose -f deployment/docker/deer-flow/docker-compose.yml down; \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s deer-flow-backend deer-flow-frontend; \ elif [ "$*" = "milvus" ]; then \ - if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \ - docker compose -f deployment/docker/milvus/docker-compose.yml down -v; \ - else \ - docker compose -f deployment/docker/milvus/docker-compose.yml down; \ - fi; \ - elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \ - $(call docker-compose-service,datamate-data-juicer,down,deployment/docker/datamate); \ + docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s milvus etcd minio; \ else \ $(call docker-compose-service,$*,down,deployment/docker/datamate); \ fi diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 422f5bd8c..191d9749a 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -1,5 +1,8 @@ services: - # 1) backend + # ============================== + # Core Datamate Services (always enabled) + # ============================== + datamate-backend: container_name: datamate-backend image: ${REGISTRY:-}datamate-backend @@ -15,7 +18,6 @@ services: depends_on: - datamate-database - # 1) backend (Python) datamate-backend-python: container_name: datamate-backend-python image: ${REGISTRY:-}datamate-backend-python @@ -38,13 +40,12 @@ services: privileged: true networks: [ datamate ] - # 2) frontend(NodePort 30000) datamate-frontend: container_name: datamate-frontend image: ${REGISTRY:-}datamate-frontend restart: on-failure ports: - - "30000:80" # nodePort → hostPort + - "30000:80" volumes: - frontend_log_volume:/var/log/datamate/frontend networks: [ datamate ] @@ -52,7 +53,6 @@ services: - datamate-backend - datamate-backend-python - # 3) database datamate-database: container_name: datamate-database image: ${REGISTRY:-}datamate-database @@ -72,7 +72,6 @@ services: - "3306:3306" networks: [ datamate ] - # 3) runtime datamate-runtime: container_name: datamate-runtime image: ${REGISTRY:-}datamate-runtime @@ -99,14 +98,16 @@ services: - operator-packages-volume:/usr/local/lib/ops/site-packages networks: [ datamate ] - # 4) mineru + # ============================= + # Optional: Mineru NPU Engine (profile: mineru) + # ============================== datamate-mineru: container_name: datamate-mineru image: datamate-mineru restart: on-failure environment: MINERU_MODEL_SOURCE: local - MINERU_DEVICE_MODE: npu # cpu|cuda|npu|mps + MINERU_DEVICE_MODE: npu VLLM_WORKER_MULTIPROC_METHOD: spawn privileged: true entrypoint: mineru-openai-server @@ -129,7 +130,9 @@ services: - /dev/devmm_svm - /dev/hisi_hdc - # 5) redis + # ============================== + # Optional: Redis (profile: redis) + # ============================== datamate-redis: container_name: datamate-redis image: redis:8.2.3 @@ -137,7 +140,11 @@ services: ports: - "6379:6379" networks: [ datamate ] + profiles: [ redis ] + # ============================== + # Optional: Data Juicer (profile: data-juicer) + # ============================== datamate-data-juicer: container_name: datamate-data-juicer image: datajuicer/data-juicer:v1.4.4 @@ -153,6 +160,171 @@ services: networks: [ datamate ] profiles: [ data-juicer ] + + # ============================== + # Optional: Deer Flow (profile: deer-flow) + # ============================== + deer-flow-backend: + image: ${REGISTRY:-}deer-flow-backend + container_name: deer-flow-backend + env_file: + - .env + volumes: + - ./conf.yaml:/app/conf.yaml:ro + - deer-flow-log-volume:/var/log/deer-flow + restart: unless-stopped + networks: + - datamate + profiles: + - deer-flow + + deer-flow-frontend: + image: ${REGISTRY:-}deer-flow-frontend + container_name: deer-flow-frontend + env_file: + - .env + depends_on: + - deer-flow-backend + restart: unless-stopped + networks: + - datamate + profiles: + - deer-flow + + # ============================== + # Optional: Label Studio (profile: label-studio) + # ============================== + label-studio: + container_name: label-studio + stdin_open: true + tty: true + image: heartexlabs/label-studio:latest + privileged: true + restart: unless-stopped + user: root + expose: + - "8000" + ports: + - "30001:8000" + depends_on: + - pg-db + environment: + - DJANGO_DB=default + - POSTGRE_NAME=postgres + - POSTGRE_USER=postgres + - POSTGRE_PASSWORD= + - POSTGRE_PORT=5432 + - POSTGRE_HOST=db + - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} + - LOCAL_FILES_SERVING_ENABLED=true + - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local + - USE_USERNAME_FOR_LOGIN=true + - LABEL_STUDIO_USERNAME=admin@demo.com + - LABEL_STUDIO_PASSWORD=demoadmin + - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true + - LABEL_STUDIO_USER_TOKEN=abc123abc123 + - LOG_LEVEL=DEBUG + volumes: + - label-studio-data:/label-studio/data:rw + - dataset_volume:/label-studio/local:rw + networks: + - datamate + command: label-studio-uwsgi + profiles: [ label-studio ] + + pg-db: + container_name: pg-db + image: pgautoupgrade/pgautoupgrade:13-alpine + hostname: db + restart: unless-stopped + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + - POSTGRES_USER=postgres + volumes: + - label-studio-db:/var/lib/postgresql/data + networks: + - datamate + profiles: [ label-studio ] + + # ============================== + # Optional: Milvus (profile: milvus) + # ============================== + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.18 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - etcd-volume:/etcd + command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + restart: always + networks: + - datamate + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + profiles: + - milvus + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2024-12-18T13-15-44Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - minio-volume:/minio_data + command: minio server /minio_data --console-address ":9001" + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + profiles: + - milvus + + milvus: + container_name: milvus-standalone + image: milvusdb/milvus:v2.6.5 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + MQ_TYPE: woodpecker + volumes: + - milvus-volume:/var/lib/milvus + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + profiles: + - milvus + +# ============================== +# Volumes +# ============================== volumes: dataset_volume: name: datamate-dataset-volume @@ -177,6 +349,28 @@ volumes: mineru_log_volume: name: datamate-mineru_log_volume + # Deer Flow + deer-flow-log-volume: + name: deer-flow-log-volume + + # Label Studio + label-studio-data: + name: label-studio-data-volume + label-studio-db: + name: label-studio-db-volume + + # Milvus + etcd-volume: + name: milvus-etcd-volume + minio-volume: + name: milvus-minio-volume + milvus-volume: + name: milvus-milvus-volume + + +# ============================== +# Networks +# ============================== networks: datamate: driver: bridge diff --git a/deployment/docker/deer-flow/docker-compose.yml b/deployment/docker/deer-flow/docker-compose.yml deleted file mode 100644 index edc2018a1..000000000 --- a/deployment/docker/deer-flow/docker-compose.yml +++ /dev/null @@ -1,33 +0,0 @@ -services: - deer-flow-backend: - image: ${REGISTRY:-}deer-flow-backend - container_name: deer-flow-backend - env_file: - - .env - volumes: - - ./conf.yaml:/app/conf.yaml:ro - - deer-flow-log-volume:/var/log/deer-flow - restart: unless-stopped - networks: - - datamate - - deer-flow-frontend: - image: ${REGISTRY:-}deer-flow-frontend - container_name: deer-flow-frontend - env_file: - - .env - depends_on: - - deer-flow-backend - restart: unless-stopped - networks: - - datamate - -volumes: - deer-flow-log-volume: - name: deer-flow-log-volume - -networks: - datamate: - driver: bridge - name: datamate-network - external: true diff --git a/deployment/docker/label-studio/docker-compose.yml b/deployment/docker/label-studio/docker-compose.yml deleted file mode 100644 index d68c6a0b9..000000000 --- a/deployment/docker/label-studio/docker-compose.yml +++ /dev/null @@ -1,62 +0,0 @@ -services: - - label-studio: - stdin_open: true - tty: true - image: heartexlabs/label-studio:latest - privileged: true - restart: unless-stopped - user: root - expose: - - "8000" - ports: - - "30001:8000" - depends_on: - - pg-db - environment: - - DJANGO_DB=default - - POSTGRE_NAME=postgres - - POSTGRE_USER=postgres - - POSTGRE_PASSWORD= - - POSTGRE_PORT=5432 - - POSTGRE_HOST=db - - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} - - LOCAL_FILES_SERVING_ENABLED=true - - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local - - USE_USERNAME_FOR_LOGIN=true - - LABEL_STUDIO_USERNAME=admin@demo.com - - LABEL_STUDIO_PASSWORD=demoadmin - - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true - - LABEL_STUDIO_USER_TOKEN=abc123abc123 - - LOG_LEVEL=DEBUG - volumes: - - label-studio-data:/label-studio/data:rw - - dataset_volume:/label-studio/local:rw - networks: - - datamate - command: label-studio-uwsgi - - pg-db: - image: pgautoupgrade/pgautoupgrade:13-alpine - hostname: db - restart: unless-stopped - environment: - - POSTGRES_HOST_AUTH_METHOD=trust - - POSTGRES_USER=postgres - volumes: - - label-studio-db:/var/lib/postgresql/data - networks: - - datamate - -volumes: - label-studio-data: - label-studio-db: - dataset_volume: - name: datamate-dataset-volume - external: true - -networks: - datamate: - driver: bridge - name: datamate-network - external: true diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml deleted file mode 100644 index f2f9ceb86..000000000 --- a/deployment/docker/milvus/docker-compose.yml +++ /dev/null @@ -1,73 +0,0 @@ -services: - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.18 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - restart: always - networks: - - datamate - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2024-12-18T13-15-44Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "9001:9001" - - "9000:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - networks: - - datamate - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - milvus: - container_name: milvus-standalone - image: milvusdb/milvus:v2.6.5 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - MQ_TYPE: woodpecker - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus - networks: - - datamate - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "9091:9091" - depends_on: - - "etcd" - - "minio" - -networks: - datamate: - name: datamate-network - external: true - driver: bridge diff --git a/editions/community/config/application.yml b/editions/community/config/application.yml deleted file mode 100644 index 689d09554..000000000 --- a/editions/community/config/application.yml +++ /dev/null @@ -1,184 +0,0 @@ -# 数据引擎平台 - 主应用配置 -spring: - application: - name: datamate - - # 暂时排除Spring Security自动配置(开发阶段使用) - autoconfigure: - exclude: - - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration - - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration - - # 数据源配置 - datasource: - driver-class-name: com.mysql.cj.jdbc.Driver - url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true - username: ${DB_USERNAME:root} - password: ${DB_PASSWORD:password} - hikari: - maximum-pool-size: 20 - minimum-idle: 5 - connection-timeout: 30000 - idle-timeout: 600000 - max-lifetime: 1800000 - - # Elasticsearch配置 - elasticsearch: - uris: ${ES_URIS:http://localhost:9200} - username: ${ES_USERNAME:} - password: ${ES_PASSWORD:} - connection-timeout: 10s - socket-timeout: 30s - - # Jackson配置 - jackson: - time-zone: Asia/Shanghai - date-format: yyyy-MM-dd HH:mm:ss - serialization: - write-dates-as-timestamps: false - deserialization: - fail-on-unknown-properties: false - - # 文件上传配置 - servlet: - multipart: - max-file-size: 100MB - max-request-size: 100MB - - # 任务调度配置 - task: - execution: - pool: - core-size: ${TASK_EXECUTION_CORE_SIZE:10} - max-size: ${TASK_EXECUTION_MAX_SIZE:20} - queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100} - keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s} - scheduling: - pool: - size: ${TASK_SCHEDULING_POOL_SIZE:5} - config: - import: - - classpath:config/application-datacollection.yml - - classpath:config/application-datamanagement.yml - - ai: - mcp: - server: - name: datamate-backend-mcp-server - base-url: /api - capabilities: - resource: false - prompt: false - completion: false - tool: true - -# MyBatis配置(需在顶层,不在 spring 下) -mybatis-plus: - configuration: - map-underscore-to-camel-case: true - default-fetch-size: 100 - default-statement-timeout: 30 - use-generated-keys: true - cache-enabled: true - lazy-loading-enabled: false - multiple-result-sets-enabled: true - use-column-label: true - auto-mapping-behavior: partial - auto-mapping-unknown-column-behavior: none - default-executor-type: simple - call-setters-on-nulls: false - return-instance-for-empty-row: false - log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl - mapper-locations: - - classpath*:mappers/**/*.xml - type-aliases-package: - - com.datamate.collection.domain.model - - com.datamate.datamanagement.domain.model.dataset - -# 应用配置 -server: - port: ${SERVER_PORT:8080} - servlet: - context-path: /api - encoding: - charset: UTF-8 - enabled: true - force: true - -# 日志配置 -logging: - config: file:/opt/backend/log4j2.xml - -# Actuator配置 -management: - endpoints: - web: - exposure: - include: health,info,metrics,prometheus - endpoint: - health: - show-details: when-authorized - health: - elasticsearch: - enabled: false # 禁用Elasticsearch健康检查 - -# 平台配置 -datamate: - # JWT配置 - jwt: - secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration} - expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒 - header: Authorization - prefix: "Bearer " - - # 文件存储配置 - storage: - type: ${STORAGE_TYPE:local} # local, minio, s3 - local: - base-path: ${STORAGE_LOCAL_PATH:./data/storage} - minio: - endpoint: ${MINIO_ENDPOINT:http://localhost:9000} - access-key: ${MINIO_ACCESS_KEY:minioadmin} - secret-key: ${MINIO_SECRET_KEY:minioadmin} - bucket-name: ${MINIO_BUCKET:data-mate} - - # Ray执行器配置 - ray: - enabled: ${RAY_ENABLED:false} - address: ${RAY_ADDRESS:ray://localhost:10001} - runtime-env: - working-dir: ${RAY_WORKING_DIR:./runtime/python-executor} - pip-packages: - - "ray[default]==2.7.0" - - "pandas" - - "numpy" - - "data-juicer" - - # 数据归集服务配置(可由模块导入叠加) - data-collection: {} - - # 算子市场配置 - operator-market: - repository-path: ${OPERATOR_REPO_PATH:./runtime/operators} - registry-url: ${OPERATOR_REGISTRY_URL:} - max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB} - - # 数据处理配置 - data-processing: - max-file-size: ${MAX_FILE_SIZE:1GB} - temp-dir: ${TEMP_DIR:./data/temp} - batch-size: ${BATCH_SIZE:1000} - - # 标注配置 - annotation: - auto-annotation: - enabled: ${AUTO_ANNOTATION_ENABLED:true} - model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:} - quality-control: - enabled: ${QC_ENABLED:true} - threshold: ${QC_THRESHOLD:0.8} - - # RAG配置 - rag: - milvus-host: ${MILVUS_HOST:milvus-standalone} - milvus-port: ${MILVUS_PORT:19530} diff --git a/editions/community/config/log4j2.xml b/editions/community/config/log4j2.xml deleted file mode 100644 index 5358fc94d..000000000 --- a/editions/community/config/log4j2.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - /var/log/datamate/backend - %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n - 100MB - 30 - INFO - WARN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/editions/enterprise/config/application.yml b/editions/enterprise/config/application.yml deleted file mode 100644 index 9326ab629..000000000 --- a/editions/enterprise/config/application.yml +++ /dev/null @@ -1,181 +0,0 @@ -# 数据引擎平台 - 主应用配置 -spring: - application: - name: datamate - - # 暂时排除Spring Security自动配置(开发阶段使用) - autoconfigure: - exclude: - - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration - - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration - - # 数据源配置 - datasource: - driver-class-name: com.mysql.cj.jdbc.Driver - url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true - username: ${DB_USERNAME:root} - password: ${DB_PASSWORD:password} - hikari: - maximum-pool-size: 20 - minimum-idle: 5 - connection-timeout: 30000 - idle-timeout: 600000 - max-lifetime: 1800000 - - # Elasticsearch配置 - elasticsearch: - uris: ${ES_URIS:http://localhost:9200} - username: ${ES_USERNAME:} - password: ${ES_PASSWORD:} - connection-timeout: 10s - socket-timeout: 30s - - # Jackson配置 - jackson: - time-zone: Asia/Shanghai - date-format: yyyy-MM-dd HH:mm:ss - serialization: - write-dates-as-timestamps: false - deserialization: - fail-on-unknown-properties: false - - # 文件上传配置 - servlet: - multipart: - max-file-size: 100MB - max-request-size: 100MB - - # 任务调度配置 - task: - execution: - pool: - core-size: ${TASK_EXECUTION_CORE_SIZE:10} - max-size: ${TASK_EXECUTION_MAX_SIZE:20} - queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100} - keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s} - scheduling: - pool: - size: ${TASK_SCHEDULING_POOL_SIZE:5} - config: - import: - - classpath:config/application-datacollection.yml - - classpath:config/application-datamanagement.yml - -# MyBatis配置(需在顶层,不在 spring 下) -mybatis-plus: - configuration: - map-underscore-to-camel-case: true - default-fetch-size: 100 - default-statement-timeout: 30 - use-generated-keys: true - cache-enabled: true - lazy-loading-enabled: false - multiple-result-sets-enabled: true - use-column-label: true - auto-mapping-behavior: partial - auto-mapping-unknown-column-behavior: none - default-executor-type: simple - call-setters-on-nulls: false - return-instance-for-empty-row: false - log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl - mapper-locations: - - classpath*:mappers/**/*.xml - type-aliases-package: - - com.datamate.collection.domain.model - - com.datamate.datamanagement.domain.model.dataset - -# 应用配置 -server: - port: ${SERVER_PORT:8080} - servlet: - context-path: /api - encoding: - charset: UTF-8 - enabled: true - force: true - -# 日志配置 -logging: - config: file:/opt/backend/log4j2.xml - -# Actuator配置 -management: - endpoints: - web: - exposure: - include: health,info,metrics,prometheus - endpoint: - health: - show-details: when-authorized - health: - elasticsearch: - enabled: false # 禁用Elasticsearch健康检查 - -# 平台配置 -datamate: - # JWT配置 - jwt: - secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration} - expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒 - header: Authorization - prefix: "Bearer " - - # 文件存储配置 - storage: - type: ${STORAGE_TYPE:local} # local, minio, s3 - local: - base-path: ${STORAGE_LOCAL_PATH:./data/storage} - minio: - endpoint: ${MINIO_ENDPOINT:http://localhost:9000} - access-key: ${MINIO_ACCESS_KEY:minioadmin} - secret-key: ${MINIO_SECRET_KEY:minioadmin} - bucket-name: ${MINIO_BUCKET:data-mate} - - # Ray执行器配置 - ray: - enabled: ${RAY_ENABLED:false} - address: ${RAY_ADDRESS:ray://localhost:10001} - runtime-env: - working-dir: ${RAY_WORKING_DIR:./runtime/python-executor} - pip-packages: - - "ray[default]==2.7.0" - - "pandas" - - "numpy" - - "data-juicer" - - # 数据归集服务配置(可由模块导入叠加) - data-collection: {} - - # 算子市场配置 - operator-market: - repository-path: ${OPERATOR_REPO_PATH:./runtime/operators} - registry-url: ${OPERATOR_REGISTRY_URL:} - max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB} - - # 数据处理配置 - data-processing: - max-file-size: ${MAX_FILE_SIZE:1GB} - temp-dir: ${TEMP_DIR:./data/temp} - batch-size: ${BATCH_SIZE:1000} - - # 标注配置 - annotation: - auto-annotation: - enabled: ${AUTO_ANNOTATION_ENABLED:true} - model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:} - quality-control: - enabled: ${QC_ENABLED:true} - threshold: ${QC_THRESHOLD:0.8} - - # RAG配置 - rag: - embedding: - model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002} - api-key: ${RAG_API_KEY:} - dimension: ${RAG_DIMENSION:1536} - chunk: - size: ${RAG_CHUNK_SIZE:512} - overlap: ${RAG_CHUNK_OVERLAP:50} - retrieval: - top-k: ${RAG_TOP_K:5} - score-threshold: ${RAG_SCORE_THRESHOLD:0.7} diff --git a/editions/enterprise/config/log4j2.xml b/editions/enterprise/config/log4j2.xml deleted file mode 100644 index f9d0cf3a5..000000000 --- a/editions/enterprise/config/log4j2.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - /var/log/data-mate/backend - %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n - 100MB - 30 - INFO - WARN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scripts/images/frontend/Dockerfile b/scripts/images/frontend/Dockerfile index db19a567a..fcdc78ef9 100644 --- a/scripts/images/frontend/Dockerfile +++ b/scripts/images/frontend/Dockerfile @@ -10,7 +10,7 @@ RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \ FROM nginx:1.29 AS runner COPY --from=builder /app/dist /opt/frontend -COPY deployment/docker/datamate/backend.conf /etc/nginx/conf.d/backend.conf +COPY scripts/images/frontend/backend.conf /etc/nginx/conf.d/backend.conf RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && rm -f /etc/nginx/conf.d/default.conf diff --git a/deployment/docker/datamate/backend.conf b/scripts/images/frontend/backend.conf similarity index 100% rename from deployment/docker/datamate/backend.conf rename to scripts/images/frontend/backend.conf