From 380873ed5c0d09c23753d087dd546c7a68b08d36 Mon Sep 17 00:00:00 2001
From: Dallas98 <990259227@qq.com>
Date: Mon, 12 Jan 2026 16:18:40 +0800
Subject: [PATCH 1/2] feat: update error handling in RagEtlService and add
commons-io dependency
---
backend/pom.xml | 6 ++++++
backend/services/rag-indexer-service/pom.xml | 6 ++++++
.../rag/indexer/infrastructure/event/RagEtlService.java | 2 +-
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/backend/pom.xml b/backend/pom.xml
index f57ea1571..7105089b6 100644
--- a/backend/pom.xml
+++ b/backend/pom.xml
@@ -65,6 +65,12 @@
pom
import
+
+ commons-io
+ commons-io
+ 2.16.1
+ compile
+
com.google.protobuf
protobuf-bom
diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml
index 5f7a4fb33..23db9dd33 100644
--- a/backend/services/rag-indexer-service/pom.xml
+++ b/backend/services/rag-indexer-service/pom.xml
@@ -86,6 +86,12 @@
dev.langchain4j
langchain4j-document-parser-apache-poi
+
+
+ commons-io
+ commons-io
+
+
dev.langchain4j
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java
index 53c78fa8a..355e4b2ce 100644
--- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java
@@ -78,7 +78,7 @@ public void processAfterCommit(DataInsertedEvent event) {
// 更新文件状态为已处理
ragFile.setStatus(FileStatus.PROCESSED);
ragFileRepository.updateById(ragFile);
- } catch (Exception e) {
+ } catch (Throwable e) {
// 处理异常
log.error("Error processing RAG file: {}", ragFile.getFileId(), e);
ragFile.setStatus(FileStatus.PROCESS_FAILED);
From 751b97acedfb5af41cad1a54300fcb25e21e9fb3 Mon Sep 17 00:00:00 2001
From: Dallas98 <990259227@qq.com>
Date: Mon, 12 Jan 2026 17:05:33 +0800
Subject: [PATCH 2/2] feat: enhance Docker configuration with additional
services and profiles
---
.gitignore | 2 +-
Makefile | 52 ++---
deployment/docker/datamate/docker-compose.yml | 212 +++++++++++++++++-
.../docker/deer-flow/docker-compose.yml | 33 ---
.../docker/label-studio/docker-compose.yml | 62 -----
deployment/docker/milvus/docker-compose.yml | 73 ------
editions/community/config/application.yml | 184 ---------------
editions/community/config/log4j2.xml | 42 ----
editions/enterprise/config/application.yml | 181 ---------------
editions/enterprise/config/log4j2.xml | 42 ----
scripts/images/frontend/Dockerfile | 2 +-
.../images/frontend}/backend.conf | 0
12 files changed, 229 insertions(+), 656 deletions(-)
delete mode 100644 deployment/docker/deer-flow/docker-compose.yml
delete mode 100644 deployment/docker/label-studio/docker-compose.yml
delete mode 100644 deployment/docker/milvus/docker-compose.yml
delete mode 100644 editions/community/config/application.yml
delete mode 100644 editions/community/config/log4j2.xml
delete mode 100644 editions/enterprise/config/application.yml
delete mode 100644 editions/enterprise/config/log4j2.xml
rename {deployment/docker/datamate => scripts/images/frontend}/backend.conf (100%)
diff --git a/.gitignore b/.gitignore
index c0906e850..53ae0ad89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -189,4 +189,4 @@ Thumbs.db
*.sublime-workspace
# Milvus
-deployment/docker/milvus/volumes/
\ No newline at end of file
+**/volumes/
diff --git a/Makefile b/Makefile
index 4778f07d2..960e8b88b 100644
--- a/Makefile
+++ b/Makefile
@@ -238,7 +238,7 @@ endif
# ========== Docker Install/Uninstall Targets ==========
# Valid service targets for docker install/uninstall
-VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" milvus "label-studio" "data-juicer" dj
+VALID_SERVICE_TARGETS := datamate backend frontend runtime backend-python database gateway redis mineru deer-flow milvus label-studio data-juicer dj
# Generic docker service install target
.PHONY: %-docker-install
@@ -252,21 +252,23 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi
exit 1; \
fi
@if [ "$*" = "label-studio" ]; then \
- $(call docker-compose-service,label-studio,up -d,deployment/docker/label-studio); \
- elif [ "$*" = "mineru" ]; then \
- REGISTRY=$(REGISTRY) && docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-mineru; \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile label-studio up -d; \
elif [ "$*" = "datamate" ]; then \
REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d; \
- elif [ "$*" = "deer-flow" ]; then \
- cp runtime/deer-flow/.env deployment/docker/deer-flow/.env; \
- cp runtime/deer-flow/conf.yaml deployment/docker/deer-flow/conf.yaml; \
- REGISTRY=$(REGISTRY) docker compose -f deployment/docker/deer-flow/docker-compose.yml up -d; \
- elif [ "$*" = "milvus" ]; then \
- docker compose -f deployment/docker/milvus/docker-compose.yml up -d; \
+ elif [ "$*" = "mineru" ]; then \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru up -d datamate-mineru; \
elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \
- REGISTRY=$(REGISTRY) && docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-data-juicer; \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile data-juicer up -d datamate-data-juicer; \
+ elif [ "$*" = "redis" ]; then \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile redis up -d datamate-redis; \
+ elif [ "$*" = "milvus" ]; then \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile milvus up -d; \
+ elif [ "$*" = "deer-flow" ]; then \
+ cp runtime/deer-flow/.env deployment/docker/datamate/.env; \
+ cp runtime/deer-flow/conf.yaml deployment/docker/datamate/conf.yaml; \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml --profile deer-flow up -d; \
else \
- $(call docker-compose-service,$*,up -d,deployment/docker/datamate); \
+ REGISTRY=$(REGISTRY) docker compose -f deployment/docker/datamate/docker-compose.yml up -d datamate-$*; \
fi
# Generic docker service uninstall target
@@ -281,29 +283,23 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi
exit 1; \
fi
@if [ "$*" = "label-studio" ]; then \
- if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \
- cd deployment/docker/label-studio && docker compose down -v && cd - >/dev/null; \
- else \
- cd deployment/docker/label-studio && docker compose down && cd - >/dev/null; \
- fi; \
+ docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s label-studio pg-db; \
elif [ "$*" = "mineru" ]; then \
- $(call docker-compose-service,datamate-mineru,down,deployment/docker/datamate); \
+ docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-mineru; \
+ elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \
+ docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-data-juicer; \
+ elif [ "$*" = "redis" ]; then \
+ docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s datamate-redis; \
elif [ "$*" = "datamate" ]; then \
if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \
- docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru down -v; \
+ docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru --profile redis --profile data-juicer --profile deer-flow --profile label-studio --profile milvus down -v; \
else \
- docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru down; \
+ docker compose -f deployment/docker/datamate/docker-compose.yml --profile mineru --profile redis --profile data-juicer --profile deer-flow --profile label-studio --profile milvus down; \
fi; \
elif [ "$*" = "deer-flow" ]; then \
- docker compose -f deployment/docker/deer-flow/docker-compose.yml down; \
+ docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s deer-flow-backend deer-flow-frontend; \
elif [ "$*" = "milvus" ]; then \
- if [ "$(DELETE_VOLUMES_CHOICE)" = "1" ]; then \
- docker compose -f deployment/docker/milvus/docker-compose.yml down -v; \
- else \
- docker compose -f deployment/docker/milvus/docker-compose.yml down; \
- fi; \
- elif [ "$*" = "data-juicer" ] || [ "$*" = "dj" ]; then \
- $(call docker-compose-service,datamate-data-juicer,down,deployment/docker/datamate); \
+ docker compose -f deployment/docker/datamate/docker-compose.yml rm -f -s milvus etcd minio; \
else \
$(call docker-compose-service,$*,down,deployment/docker/datamate); \
fi
diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml
index 422f5bd8c..191d9749a 100644
--- a/deployment/docker/datamate/docker-compose.yml
+++ b/deployment/docker/datamate/docker-compose.yml
@@ -1,5 +1,8 @@
services:
- # 1) backend
+ # ==============================
+ # Core Datamate Services (always enabled)
+ # ==============================
+
datamate-backend:
container_name: datamate-backend
image: ${REGISTRY:-}datamate-backend
@@ -15,7 +18,6 @@ services:
depends_on:
- datamate-database
- # 1) backend (Python)
datamate-backend-python:
container_name: datamate-backend-python
image: ${REGISTRY:-}datamate-backend-python
@@ -38,13 +40,12 @@ services:
privileged: true
networks: [ datamate ]
- # 2) frontend(NodePort 30000)
datamate-frontend:
container_name: datamate-frontend
image: ${REGISTRY:-}datamate-frontend
restart: on-failure
ports:
- - "30000:80" # nodePort → hostPort
+ - "30000:80"
volumes:
- frontend_log_volume:/var/log/datamate/frontend
networks: [ datamate ]
@@ -52,7 +53,6 @@ services:
- datamate-backend
- datamate-backend-python
- # 3) database
datamate-database:
container_name: datamate-database
image: ${REGISTRY:-}datamate-database
@@ -72,7 +72,6 @@ services:
- "3306:3306"
networks: [ datamate ]
- # 3) runtime
datamate-runtime:
container_name: datamate-runtime
image: ${REGISTRY:-}datamate-runtime
@@ -99,14 +98,16 @@ services:
- operator-packages-volume:/usr/local/lib/ops/site-packages
networks: [ datamate ]
- # 4) mineru
+ # =============================
+ # Optional: Mineru NPU Engine (profile: mineru)
+ # ==============================
datamate-mineru:
container_name: datamate-mineru
image: datamate-mineru
restart: on-failure
environment:
MINERU_MODEL_SOURCE: local
- MINERU_DEVICE_MODE: npu # cpu|cuda|npu|mps
+ MINERU_DEVICE_MODE: npu
VLLM_WORKER_MULTIPROC_METHOD: spawn
privileged: true
entrypoint: mineru-openai-server
@@ -129,7 +130,9 @@ services:
- /dev/devmm_svm
- /dev/hisi_hdc
- # 5) redis
+ # ==============================
+ # Optional: Redis (profile: redis)
+ # ==============================
datamate-redis:
container_name: datamate-redis
image: redis:8.2.3
@@ -137,7 +140,11 @@ services:
ports:
- "6379:6379"
networks: [ datamate ]
+ profiles: [ redis ]
+ # ==============================
+ # Optional: Data Juicer (profile: data-juicer)
+ # ==============================
datamate-data-juicer:
container_name: datamate-data-juicer
image: datajuicer/data-juicer:v1.4.4
@@ -153,6 +160,171 @@ services:
networks: [ datamate ]
profiles: [ data-juicer ]
+
+ # ==============================
+ # Optional: Deer Flow (profile: deer-flow)
+ # ==============================
+ deer-flow-backend:
+ image: ${REGISTRY:-}deer-flow-backend
+ container_name: deer-flow-backend
+ env_file:
+ - .env
+ volumes:
+ - ./conf.yaml:/app/conf.yaml:ro
+ - deer-flow-log-volume:/var/log/deer-flow
+ restart: unless-stopped
+ networks:
+ - datamate
+ profiles:
+ - deer-flow
+
+ deer-flow-frontend:
+ image: ${REGISTRY:-}deer-flow-frontend
+ container_name: deer-flow-frontend
+ env_file:
+ - .env
+ depends_on:
+ - deer-flow-backend
+ restart: unless-stopped
+ networks:
+ - datamate
+ profiles:
+ - deer-flow
+
+ # ==============================
+ # Optional: Label Studio (profile: label-studio)
+ # ==============================
+ label-studio:
+ container_name: label-studio
+ stdin_open: true
+ tty: true
+ image: heartexlabs/label-studio:latest
+ privileged: true
+ restart: unless-stopped
+ user: root
+ expose:
+ - "8000"
+ ports:
+ - "30001:8000"
+ depends_on:
+ - pg-db
+ environment:
+ - DJANGO_DB=default
+ - POSTGRE_NAME=postgres
+ - POSTGRE_USER=postgres
+ - POSTGRE_PASSWORD=
+ - POSTGRE_PORT=5432
+ - POSTGRE_HOST=db
+ - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-}
+ - LOCAL_FILES_SERVING_ENABLED=true
+ - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local
+ - USE_USERNAME_FOR_LOGIN=true
+ - LABEL_STUDIO_USERNAME=admin@demo.com
+ - LABEL_STUDIO_PASSWORD=demoadmin
+ - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true
+ - LABEL_STUDIO_USER_TOKEN=abc123abc123
+ - LOG_LEVEL=DEBUG
+ volumes:
+ - label-studio-data:/label-studio/data:rw
+ - dataset_volume:/label-studio/local:rw
+ networks:
+ - datamate
+ command: label-studio-uwsgi
+ profiles: [ label-studio ]
+
+ pg-db:
+ container_name: pg-db
+ image: pgautoupgrade/pgautoupgrade:13-alpine
+ hostname: db
+ restart: unless-stopped
+ environment:
+ - POSTGRES_HOST_AUTH_METHOD=trust
+ - POSTGRES_USER=postgres
+ volumes:
+ - label-studio-db:/var/lib/postgresql/data
+ networks:
+ - datamate
+ profiles: [ label-studio ]
+
+ # ==============================
+ # Optional: Milvus (profile: milvus)
+ # ==============================
+ etcd:
+ container_name: milvus-etcd
+ image: quay.io/coreos/etcd:v3.5.18
+ environment:
+ - ETCD_AUTO_COMPACTION_MODE=revision
+ - ETCD_AUTO_COMPACTION_RETENTION=1000
+ - ETCD_QUOTA_BACKEND_BYTES=4294967296
+ - ETCD_SNAPSHOT_COUNT=50000
+ volumes:
+ - etcd-volume:/etcd
+ command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+ restart: always
+ networks:
+ - datamate
+ healthcheck:
+ test: ["CMD", "etcdctl", "endpoint", "health"]
+ interval: 30s
+ timeout: 20s
+ retries: 3
+ profiles:
+ - milvus
+
+ minio:
+ container_name: milvus-minio
+ image: minio/minio:RELEASE.2024-12-18T13-15-44Z
+ environment:
+ MINIO_ACCESS_KEY: minioadmin
+ MINIO_SECRET_KEY: minioadmin
+ ports:
+ - "9001:9001"
+ - "9000:9000"
+ volumes:
+ - minio-volume:/minio_data
+ command: minio server /minio_data --console-address ":9001"
+ networks:
+ - datamate
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+ interval: 30s
+ timeout: 20s
+ retries: 3
+ profiles:
+ - milvus
+
+ milvus:
+ container_name: milvus-standalone
+ image: milvusdb/milvus:v2.6.5
+ command: ["milvus", "run", "standalone"]
+ security_opt:
+ - seccomp:unconfined
+ environment:
+ ETCD_ENDPOINTS: etcd:2379
+ MINIO_ADDRESS: minio:9000
+ MQ_TYPE: woodpecker
+ volumes:
+ - milvus-volume:/var/lib/milvus
+ networks:
+ - datamate
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
+ interval: 30s
+ start_period: 90s
+ timeout: 20s
+ retries: 3
+ ports:
+ - "19530:19530"
+ - "9091:9091"
+ depends_on:
+ - "etcd"
+ - "minio"
+ profiles:
+ - milvus
+
+# ==============================
+# Volumes
+# ==============================
volumes:
dataset_volume:
name: datamate-dataset-volume
@@ -177,6 +349,28 @@ volumes:
mineru_log_volume:
name: datamate-mineru_log_volume
+ # Deer Flow
+ deer-flow-log-volume:
+ name: deer-flow-log-volume
+
+ # Label Studio
+ label-studio-data:
+ name: label-studio-data-volume
+ label-studio-db:
+ name: label-studio-db-volume
+
+ # Milvus
+ etcd-volume:
+ name: milvus-etcd-volume
+ minio-volume:
+ name: milvus-minio-volume
+ milvus-volume:
+ name: milvus-milvus-volume
+
+
+# ==============================
+# Networks
+# ==============================
networks:
datamate:
driver: bridge
diff --git a/deployment/docker/deer-flow/docker-compose.yml b/deployment/docker/deer-flow/docker-compose.yml
deleted file mode 100644
index edc2018a1..000000000
--- a/deployment/docker/deer-flow/docker-compose.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-services:
- deer-flow-backend:
- image: ${REGISTRY:-}deer-flow-backend
- container_name: deer-flow-backend
- env_file:
- - .env
- volumes:
- - ./conf.yaml:/app/conf.yaml:ro
- - deer-flow-log-volume:/var/log/deer-flow
- restart: unless-stopped
- networks:
- - datamate
-
- deer-flow-frontend:
- image: ${REGISTRY:-}deer-flow-frontend
- container_name: deer-flow-frontend
- env_file:
- - .env
- depends_on:
- - deer-flow-backend
- restart: unless-stopped
- networks:
- - datamate
-
-volumes:
- deer-flow-log-volume:
- name: deer-flow-log-volume
-
-networks:
- datamate:
- driver: bridge
- name: datamate-network
- external: true
diff --git a/deployment/docker/label-studio/docker-compose.yml b/deployment/docker/label-studio/docker-compose.yml
deleted file mode 100644
index d68c6a0b9..000000000
--- a/deployment/docker/label-studio/docker-compose.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-services:
-
- label-studio:
- stdin_open: true
- tty: true
- image: heartexlabs/label-studio:latest
- privileged: true
- restart: unless-stopped
- user: root
- expose:
- - "8000"
- ports:
- - "30001:8000"
- depends_on:
- - pg-db
- environment:
- - DJANGO_DB=default
- - POSTGRE_NAME=postgres
- - POSTGRE_USER=postgres
- - POSTGRE_PASSWORD=
- - POSTGRE_PORT=5432
- - POSTGRE_HOST=db
- - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-}
- - LOCAL_FILES_SERVING_ENABLED=true
- - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local
- - USE_USERNAME_FOR_LOGIN=true
- - LABEL_STUDIO_USERNAME=admin@demo.com
- - LABEL_STUDIO_PASSWORD=demoadmin
- - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true
- - LABEL_STUDIO_USER_TOKEN=abc123abc123
- - LOG_LEVEL=DEBUG
- volumes:
- - label-studio-data:/label-studio/data:rw
- - dataset_volume:/label-studio/local:rw
- networks:
- - datamate
- command: label-studio-uwsgi
-
- pg-db:
- image: pgautoupgrade/pgautoupgrade:13-alpine
- hostname: db
- restart: unless-stopped
- environment:
- - POSTGRES_HOST_AUTH_METHOD=trust
- - POSTGRES_USER=postgres
- volumes:
- - label-studio-db:/var/lib/postgresql/data
- networks:
- - datamate
-
-volumes:
- label-studio-data:
- label-studio-db:
- dataset_volume:
- name: datamate-dataset-volume
- external: true
-
-networks:
- datamate:
- driver: bridge
- name: datamate-network
- external: true
diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml
deleted file mode 100644
index f2f9ceb86..000000000
--- a/deployment/docker/milvus/docker-compose.yml
+++ /dev/null
@@ -1,73 +0,0 @@
-services:
- etcd:
- container_name: milvus-etcd
- image: quay.io/coreos/etcd:v3.5.18
- environment:
- - ETCD_AUTO_COMPACTION_MODE=revision
- - ETCD_AUTO_COMPACTION_RETENTION=1000
- - ETCD_QUOTA_BACKEND_BYTES=4294967296
- - ETCD_SNAPSHOT_COUNT=50000
- volumes:
- - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
- command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
- restart: always
- networks:
- - datamate
- healthcheck:
- test: ["CMD", "etcdctl", "endpoint", "health"]
- interval: 30s
- timeout: 20s
- retries: 3
-
- minio:
- container_name: milvus-minio
- image: minio/minio:RELEASE.2024-12-18T13-15-44Z
- environment:
- MINIO_ACCESS_KEY: minioadmin
- MINIO_SECRET_KEY: minioadmin
- ports:
- - "9001:9001"
- - "9000:9000"
- volumes:
- - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
- command: minio server /minio_data --console-address ":9001"
- networks:
- - datamate
- healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
- interval: 30s
- timeout: 20s
- retries: 3
-
- milvus:
- container_name: milvus-standalone
- image: milvusdb/milvus:v2.6.5
- command: ["milvus", "run", "standalone"]
- security_opt:
- - seccomp:unconfined
- environment:
- ETCD_ENDPOINTS: etcd:2379
- MINIO_ADDRESS: minio:9000
- MQ_TYPE: woodpecker
- volumes:
- - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
- networks:
- - datamate
- healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
- interval: 30s
- start_period: 90s
- timeout: 20s
- retries: 3
- ports:
- - "19530:19530"
- - "9091:9091"
- depends_on:
- - "etcd"
- - "minio"
-
-networks:
- datamate:
- name: datamate-network
- external: true
- driver: bridge
diff --git a/editions/community/config/application.yml b/editions/community/config/application.yml
deleted file mode 100644
index 689d09554..000000000
--- a/editions/community/config/application.yml
+++ /dev/null
@@ -1,184 +0,0 @@
-# 数据引擎平台 - 主应用配置
-spring:
- application:
- name: datamate
-
- # 暂时排除Spring Security自动配置(开发阶段使用)
- autoconfigure:
- exclude:
- - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration
- - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration
-
- # 数据源配置
- datasource:
- driver-class-name: com.mysql.cj.jdbc.Driver
- url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
- username: ${DB_USERNAME:root}
- password: ${DB_PASSWORD:password}
- hikari:
- maximum-pool-size: 20
- minimum-idle: 5
- connection-timeout: 30000
- idle-timeout: 600000
- max-lifetime: 1800000
-
- # Elasticsearch配置
- elasticsearch:
- uris: ${ES_URIS:http://localhost:9200}
- username: ${ES_USERNAME:}
- password: ${ES_PASSWORD:}
- connection-timeout: 10s
- socket-timeout: 30s
-
- # Jackson配置
- jackson:
- time-zone: Asia/Shanghai
- date-format: yyyy-MM-dd HH:mm:ss
- serialization:
- write-dates-as-timestamps: false
- deserialization:
- fail-on-unknown-properties: false
-
- # 文件上传配置
- servlet:
- multipart:
- max-file-size: 100MB
- max-request-size: 100MB
-
- # 任务调度配置
- task:
- execution:
- pool:
- core-size: ${TASK_EXECUTION_CORE_SIZE:10}
- max-size: ${TASK_EXECUTION_MAX_SIZE:20}
- queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100}
- keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s}
- scheduling:
- pool:
- size: ${TASK_SCHEDULING_POOL_SIZE:5}
- config:
- import:
- - classpath:config/application-datacollection.yml
- - classpath:config/application-datamanagement.yml
-
- ai:
- mcp:
- server:
- name: datamate-backend-mcp-server
- base-url: /api
- capabilities:
- resource: false
- prompt: false
- completion: false
- tool: true
-
-# MyBatis配置(需在顶层,不在 spring 下)
-mybatis-plus:
- configuration:
- map-underscore-to-camel-case: true
- default-fetch-size: 100
- default-statement-timeout: 30
- use-generated-keys: true
- cache-enabled: true
- lazy-loading-enabled: false
- multiple-result-sets-enabled: true
- use-column-label: true
- auto-mapping-behavior: partial
- auto-mapping-unknown-column-behavior: none
- default-executor-type: simple
- call-setters-on-nulls: false
- return-instance-for-empty-row: false
- log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl
- mapper-locations:
- - classpath*:mappers/**/*.xml
- type-aliases-package:
- - com.datamate.collection.domain.model
- - com.datamate.datamanagement.domain.model.dataset
-
-# 应用配置
-server:
- port: ${SERVER_PORT:8080}
- servlet:
- context-path: /api
- encoding:
- charset: UTF-8
- enabled: true
- force: true
-
-# 日志配置
-logging:
- config: file:/opt/backend/log4j2.xml
-
-# Actuator配置
-management:
- endpoints:
- web:
- exposure:
- include: health,info,metrics,prometheus
- endpoint:
- health:
- show-details: when-authorized
- health:
- elasticsearch:
- enabled: false # 禁用Elasticsearch健康检查
-
-# 平台配置
-datamate:
- # JWT配置
- jwt:
- secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration}
- expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒
- header: Authorization
- prefix: "Bearer "
-
- # 文件存储配置
- storage:
- type: ${STORAGE_TYPE:local} # local, minio, s3
- local:
- base-path: ${STORAGE_LOCAL_PATH:./data/storage}
- minio:
- endpoint: ${MINIO_ENDPOINT:http://localhost:9000}
- access-key: ${MINIO_ACCESS_KEY:minioadmin}
- secret-key: ${MINIO_SECRET_KEY:minioadmin}
- bucket-name: ${MINIO_BUCKET:data-mate}
-
- # Ray执行器配置
- ray:
- enabled: ${RAY_ENABLED:false}
- address: ${RAY_ADDRESS:ray://localhost:10001}
- runtime-env:
- working-dir: ${RAY_WORKING_DIR:./runtime/python-executor}
- pip-packages:
- - "ray[default]==2.7.0"
- - "pandas"
- - "numpy"
- - "data-juicer"
-
- # 数据归集服务配置(可由模块导入叠加)
- data-collection: {}
-
- # 算子市场配置
- operator-market:
- repository-path: ${OPERATOR_REPO_PATH:./runtime/operators}
- registry-url: ${OPERATOR_REGISTRY_URL:}
- max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB}
-
- # 数据处理配置
- data-processing:
- max-file-size: ${MAX_FILE_SIZE:1GB}
- temp-dir: ${TEMP_DIR:./data/temp}
- batch-size: ${BATCH_SIZE:1000}
-
- # 标注配置
- annotation:
- auto-annotation:
- enabled: ${AUTO_ANNOTATION_ENABLED:true}
- model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:}
- quality-control:
- enabled: ${QC_ENABLED:true}
- threshold: ${QC_THRESHOLD:0.8}
-
- # RAG配置
- rag:
- milvus-host: ${MILVUS_HOST:milvus-standalone}
- milvus-port: ${MILVUS_PORT:19530}
diff --git a/editions/community/config/log4j2.xml b/editions/community/config/log4j2.xml
deleted file mode 100644
index 5358fc94d..000000000
--- a/editions/community/config/log4j2.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
- /var/log/datamate/backend
- %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n
- 100MB
- 30
- INFO
- WARN
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/editions/enterprise/config/application.yml b/editions/enterprise/config/application.yml
deleted file mode 100644
index 9326ab629..000000000
--- a/editions/enterprise/config/application.yml
+++ /dev/null
@@ -1,181 +0,0 @@
-# 数据引擎平台 - 主应用配置
-spring:
- application:
- name: datamate
-
- # 暂时排除Spring Security自动配置(开发阶段使用)
- autoconfigure:
- exclude:
- - org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration
- - org.springframework.boot.autoconfigure.security.servlet.UserDetailsServiceAutoConfiguration
-
- # 数据源配置
- datasource:
- driver-class-name: com.mysql.cj.jdbc.Driver
- url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
- username: ${DB_USERNAME:root}
- password: ${DB_PASSWORD:password}
- hikari:
- maximum-pool-size: 20
- minimum-idle: 5
- connection-timeout: 30000
- idle-timeout: 600000
- max-lifetime: 1800000
-
- # Elasticsearch配置
- elasticsearch:
- uris: ${ES_URIS:http://localhost:9200}
- username: ${ES_USERNAME:}
- password: ${ES_PASSWORD:}
- connection-timeout: 10s
- socket-timeout: 30s
-
- # Jackson配置
- jackson:
- time-zone: Asia/Shanghai
- date-format: yyyy-MM-dd HH:mm:ss
- serialization:
- write-dates-as-timestamps: false
- deserialization:
- fail-on-unknown-properties: false
-
- # 文件上传配置
- servlet:
- multipart:
- max-file-size: 100MB
- max-request-size: 100MB
-
- # 任务调度配置
- task:
- execution:
- pool:
- core-size: ${TASK_EXECUTION_CORE_SIZE:10}
- max-size: ${TASK_EXECUTION_MAX_SIZE:20}
- queue-capacity: ${TASK_EXECUTION_QUEUE_CAPACITY:100}
- keep-alive: ${TASK_EXECUTION_KEEP_ALIVE:60s}
- scheduling:
- pool:
- size: ${TASK_SCHEDULING_POOL_SIZE:5}
- config:
- import:
- - classpath:config/application-datacollection.yml
- - classpath:config/application-datamanagement.yml
-
-# MyBatis配置(需在顶层,不在 spring 下)
-mybatis-plus:
- configuration:
- map-underscore-to-camel-case: true
- default-fetch-size: 100
- default-statement-timeout: 30
- use-generated-keys: true
- cache-enabled: true
- lazy-loading-enabled: false
- multiple-result-sets-enabled: true
- use-column-label: true
- auto-mapping-behavior: partial
- auto-mapping-unknown-column-behavior: none
- default-executor-type: simple
- call-setters-on-nulls: false
- return-instance-for-empty-row: false
- log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl
- mapper-locations:
- - classpath*:mappers/**/*.xml
- type-aliases-package:
- - com.datamate.collection.domain.model
- - com.datamate.datamanagement.domain.model.dataset
-
-# 应用配置
-server:
- port: ${SERVER_PORT:8080}
- servlet:
- context-path: /api
- encoding:
- charset: UTF-8
- enabled: true
- force: true
-
-# 日志配置
-logging:
- config: file:/opt/backend/log4j2.xml
-
-# Actuator配置
-management:
- endpoints:
- web:
- exposure:
- include: health,info,metrics,prometheus
- endpoint:
- health:
- show-details: when-authorized
- health:
- elasticsearch:
- enabled: false # 禁用Elasticsearch健康检查
-
-# 平台配置
-datamate:
- # JWT配置
- jwt:
- secret: ${JWT_SECRET:dataMateSecretKey2024ForJWTTokenGeneration}
- expiration: ${JWT_EXPIRATION:86400} # 24小时,单位秒
- header: Authorization
- prefix: "Bearer "
-
- # 文件存储配置
- storage:
- type: ${STORAGE_TYPE:local} # local, minio, s3
- local:
- base-path: ${STORAGE_LOCAL_PATH:./data/storage}
- minio:
- endpoint: ${MINIO_ENDPOINT:http://localhost:9000}
- access-key: ${MINIO_ACCESS_KEY:minioadmin}
- secret-key: ${MINIO_SECRET_KEY:minioadmin}
- bucket-name: ${MINIO_BUCKET:data-mate}
-
- # Ray执行器配置
- ray:
- enabled: ${RAY_ENABLED:false}
- address: ${RAY_ADDRESS:ray://localhost:10001}
- runtime-env:
- working-dir: ${RAY_WORKING_DIR:./runtime/python-executor}
- pip-packages:
- - "ray[default]==2.7.0"
- - "pandas"
- - "numpy"
- - "data-juicer"
-
- # 数据归集服务配置(可由模块导入叠加)
- data-collection: {}
-
- # 算子市场配置
- operator-market:
- repository-path: ${OPERATOR_REPO_PATH:./runtime/operators}
- registry-url: ${OPERATOR_REGISTRY_URL:}
- max-upload-size: ${OPERATOR_MAX_UPLOAD_SIZE:50MB}
-
- # 数据处理配置
- data-processing:
- max-file-size: ${MAX_FILE_SIZE:1GB}
- temp-dir: ${TEMP_DIR:./data/temp}
- batch-size: ${BATCH_SIZE:1000}
-
- # 标注配置
- annotation:
- auto-annotation:
- enabled: ${AUTO_ANNOTATION_ENABLED:true}
- model-endpoint: ${ANNOTATION_MODEL_ENDPOINT:}
- quality-control:
- enabled: ${QC_ENABLED:true}
- threshold: ${QC_THRESHOLD:0.8}
-
- # RAG配置
- rag:
- embedding:
- model: ${RAG_EMBEDDING_MODEL:text-embedding-ada-002}
- api-key: ${RAG_API_KEY:}
- dimension: ${RAG_DIMENSION:1536}
- chunk:
- size: ${RAG_CHUNK_SIZE:512}
- overlap: ${RAG_CHUNK_OVERLAP:50}
- retrieval:
- top-k: ${RAG_TOP_K:5}
- score-threshold: ${RAG_SCORE_THRESHOLD:0.7}
diff --git a/editions/enterprise/config/log4j2.xml b/editions/enterprise/config/log4j2.xml
deleted file mode 100644
index f9d0cf3a5..000000000
--- a/editions/enterprise/config/log4j2.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
- /var/log/data-mate/backend
- %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n
- 100MB
- 30
- INFO
- WARN
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/scripts/images/frontend/Dockerfile b/scripts/images/frontend/Dockerfile
index db19a567a..fcdc78ef9 100644
--- a/scripts/images/frontend/Dockerfile
+++ b/scripts/images/frontend/Dockerfile
@@ -10,7 +10,7 @@ RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \
FROM nginx:1.29 AS runner
COPY --from=builder /app/dist /opt/frontend
-COPY deployment/docker/datamate/backend.conf /etc/nginx/conf.d/backend.conf
+COPY scripts/images/frontend/backend.conf /etc/nginx/conf.d/backend.conf
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& rm -f /etc/nginx/conf.d/default.conf
diff --git a/deployment/docker/datamate/backend.conf b/scripts/images/frontend/backend.conf
similarity index 100%
rename from deployment/docker/datamate/backend.conf
rename to scripts/images/frontend/backend.conf