Skip to content
Merged
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ Building blocks for the following big data project use cases are supported in th
- Spark
- Kafka
- Elasticsearch
- GreenplumDB
3 changes: 1 addition & 2 deletions docker_bigdata/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Distributed under the terms of the Modified BSD License.

ARG BASE_NAMESPACE
ARG BASE_IMG="base"
ARG BASE_IMG="jdk11"
FROM ${BASE_NAMESPACE:+$BASE_NAMESPACE/}${BASE_IMG}

LABEL maintainer="haobibo@gmail.com"
Expand All @@ -20,7 +20,6 @@ RUN source /opt/utils/script-setup.sh \
&& echo "Install mysql client:" && setup_mysql_client \
&& echo "Install mongosh:" && setup_mongosh_client \
&& echo "Install redis-cli:" && setup_redis_client \
&& echo "Install JDK:" && VERSION_JDK=11 setup_java_base \
&& echo "Install pyflink:" && install_pip /opt/utils/list_install_pip_pyflink.txt \
&& echo "Install pyspark:" && install_pip /opt/utils/list_install_pip_pyspark.txt \
&& echo "Clean up" && list_installed_packages && install__clean
78 changes: 54 additions & 24 deletions docker_greenplum/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,39 +1,69 @@
# Distributed under the terms of the Modified BSD License.
ARG BASE_NAMESPACE
ARG BASE_IMG="base"
FROM ${BASE_NAMESPACE:+$BASE_NAMESPACE/}${BASE_IMG} AS builder

COPY rootfs /

RUN source /opt/utils/script-utils.sh \
&& install_apt /opt/utils/install_list_greenplum.apt \
&& apt-get -qq install -yq --no-install-recommends gcc g++ bison flex cmake pkg-config ccache ninja-build \
&& install_tar_gz https://github.com/greenplum-db/gpdb/releases/download/7.0.0-beta.2/7.0.0-beta.2-src-full.tar.gz \
&& cd /opt/gpdb_src \
&& PYTHON=/opt/conda/bin/python3 ./configure --prefix=/opt/gpdb --with-perl --with-python --with-libxml --with-gssapi --with-openssl \
&& sudo make -j16 && sudo make install -j16

FROM ${BASE_NAMESPACE:+$BASE_NAMESPACE/}${BASE_IMG}

LABEL maintainer="haobibo@gmail.com"

COPY work /opt/utils/
COPY rootfs /
COPY --from=builder --chown=1000:1000 /opt/gpdb /opt/gpdb

ENV GPDB_HOME="/opt/gpdb" \
GPDB_DATA="/data/gpdb" \
GPDB_USER="gpadmin"
ENV GPHOME="/opt/gpdb" \
GPDATA="/data/gpdb" \
GPUSER="gpadmin"

RUN source /opt/utils/script-utils.sh \
&& echo "source ${GPHOME}/greenplum_path.sh" >> /etc/profile \
&& useradd -u 1000 ${GPUSER} -s /bin/bash -d /home/${GPUSER} \
&& usermod -aG root ${GPUSER} \
&& echo "${GPUSER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers \
&& echo "${GPUSER}:${GPUSER}" | chpasswd \
&& mkdir -pv ${GPHOME}/conf ${GPDATA} /home/${GPUSER}/.ssh \
&& chown -R ${GPUSER}:${GPUSER} ${GPHOME} ${GPDATA} /home/${GPUSER} \
&& install_apt /opt/utils/install_list_greenplum.apt \
&& useradd ${GPDB_USER} \
&& usermod -aG root ${GPDB_USER} \
&& echo "${GPDB_USER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers \
&& mkdir -p /home/${GPDB_USER}/.ssh \
&& chown -R ${GPDB_USER}:${GPDB_USER} /home/${GPDB_USER} \
&& echo "${GPDB_USER}:${GPDB_USER}" | chpasswd \
&& install_tar_gz https://github.com/greenplum-db/gpdb/releases/download/7.0.0-beta.2/7.0.0-beta.2-src-full.tar.gz \
&& cd /opt/gpdb_src \
&& export PATH="${PATH}":${GPDB_HOME}/bin \
&& PYTHON=/opt/conda/bin/python3 ./configure --prefix=${GPDB_HOME} --with-perl --with-python --with-libxml --with-gssapi --with-openssl \
&& sudo make -j16 && sudo make install -j16 \
&& mkdir -pv ${GPDB_HOME}/conf ${GPDB_DATA} \
&& sudo chown -R ${GPDB_USER}:${GPDB_USER} ${GPDB_HOME} ${GPDB_DATA} \
&& cp ${GPDB_HOME}/docs/cli_help/gpconfigs/gpinitsystem_config ${GPDB_HOME}/conf/gpinitsystem_config \
&& export PATH="${PATH}:${GPHOME}/bin" && cd ${GPHOME}/bin && ls -alh \
&& PYTHON_SITE=$(python3 -c 'import sys;print(list(filter(lambda s: "site" in s, sys.path))[0])') \
&& sudo ln -s ${GPDB_HOME}/lib/python/* ${PYTHON_SITE}/ \
&& pip install conan psutil pygresql \
&& cp -rf /opt/utils/etc / && rm -rf /opt/utils/etc /opt/gpdb_src \
&& echo "source ${GPDB_HOME}/greenplum_path.sh" >> /etc/profile \
&& sudo ln -s ${GPHOME}/lib/python/* ${PYTHON_SITE}/ \
&& pip install -U psutil pygresql pyyaml \
&& cp ${GPHOME}/docs/cli_help/gpconfigs/gpinitsystem_config ${GPHOME}/conf/gpinitsystem_config \
# config sshd
&& sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config \
&& sed -i -r 's/^.*StrictHostKeyChecking\s+\w+/StrictHostKeyChecking no/' /etc/ssh/ssh_config \
&& sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd \
&& mkdir -pv /run/sshd \
&& ulimit -n 65536 65536 \
&& echo "Clean up" && list_installed_packages && install__clean

USER ${GPDB_USER}
USER ${GPUSER}
RUN [ -e ~/.ssh/id_rsa.pub ] || ssh-keygen -t rsa -b 4096 -N "" -C GreenplumDB -f ~/.ssh/id_rsa \
&& cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys \
&& ssh-keygen -A -v \
&& chmod 600 ~/.ssh/authorized_keys \
&& chmod +x ${GPHOME}/*.sh

ENV PATH="$PATH:${GPHOME}/bin" \
USER=${GPUSER} \
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${GPHOME}/lib" \
COORDINATOR_DIRECTORY="${GPDATA}/coordinator" \
COORDINATOR_DATA_DIRECTORY="${GPDATA}/coordinator/gpseg-1" \
PGPORT=5432 \
PGUSER=gpadmin \
PGDATABASE=postgres

WORKDIR /home/${GPUSER}
EXPOSE 5432 22
VOLUME ["${GPDB_HOME}"]
VOLUME ["${GPDATA}", "${GPHOME}/conf"]
ENTRYPOINT ["tini", "-g", "--"]
CMD ["/bin/bash", "-c", "${GPHOME}/entrypoint.sh"]
28 changes: 14 additions & 14 deletions docker_greenplum/README-gpdb-install.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

本文档介绍GPDB的安装,以目前Github上开源的最新版本为例。

GPDB集群为主从模式,主结点称为master结点或者coordinator结点,从结点成为segment结点
GPDB集群为主从模式,主结点称为master结点或者coordinator结点,从结点称为segment结点

## 准备工作(均为必须)

Expand Down Expand Up @@ -32,7 +32,7 @@ ssh-keygen -t rsa -b 4096 -N "" -C `hostname` -f ~/.ssh/id_rsa

在主节点上,也即第1台服务器上,授权通过ssh访问各台服务器:
建议先安装sshpass: apt-get install sshpass,然后再从第1台服务器上执行下面的命令(最后一个参数为主机名)
```sshpass -p "P@ssw0rd!" ssh-copy-id -o StrictHostKeyChecking=no gpadmin@KGDB-001```
```sshpass -p "P@ssw0rd!" ssh-copy-id -o StrictHostKeyChecking=no gpadmin@GPDB-001```

### 1.3 安装依赖

Expand All @@ -58,16 +58,16 @@ sudo pip3 install conan

```txt
# IP Address Node Name
30.23.109.100 KGDB-001
30.23.109.107 KGDB-002
30.23.109.109 KGDB-003
30.23.109.100 GPDB-001
30.23.109.101 GPDB-002
30.23.109.102 GPDB-003
```

## 2 编译安装GPDB

在第一台服务器上,以gpadmin用户,从gpdb的源代码编译安装GPDB到/opt/gpdb/
本例中,下载的源代码来自于gpdb官方代码库的master branch: https://github.com/greenplum-db/gpdb.git
master branch编译出的版本为基于PostgreSQL 12.0的版本,且该版本可以只使用Python3。
master branch编译出的版本为基于PostgreSQL 12.x的版本,且该版本可以只使用Python3。

### 2.1 准备Python依赖

Expand Down Expand Up @@ -121,15 +121,15 @@ PYTHON_SITE=$(python3 -c 'import sys;print(list(filter(lambda s: "site" in s, sy

```txt
# hostlist
KGDB-001
KGDB-002
KDGB-003
GPDB-001
GPDB-002
GPDB-003
```

```txt
# seg_host
KDGB-002
KGDB-003
GPDB-002
GPDB-003
```

### 2.6 授权服务器之间SSH互访
Expand All @@ -149,7 +149,7 @@ gpscp -f seg_host /opt/conda /opt/gpdb =:~/
例如通过下面的echo命令来追加到/etc/hosts文件(参照步骤1.4):

```shell
sudo echo "30.23.109.100 KGDB-001" >> /etc/hosts
sudo echo "30.23.109.100 GPDB-001" >> /etc/hosts
```

### 2.8. 修改Linux系统配置
Expand Down Expand Up @@ -223,14 +223,14 @@ mkdir -pv /data/gpdb/primary1 /data/gpdb/primary2 /data/gpdb/mirror1 /data/gpdb/

```conf
#集群名称
ARRAY_NAME="KGDB"
ARRAY_NAME="GPDB"

SEG_PREFIX="gpseg"

MACHINE_LIST_FILE="/opt/gpdb/conf/seg_host"

# Master结点主机名
COORDINATOR_HOSTNAME=GP7-001
COORDINATOR_HOSTNAME=GPDB-001

#master的数据目录
COORDINATOR_DIRECTORY=/data/gpdb/coordinator
Expand Down
17 changes: 14 additions & 3 deletions docker_greenplum/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
```bash
docker build -t qpod/gpdb --build-arg "BASE_NAMESPACE=qpod" .
docker build -t qpod0dev/greenplum --build-arg "BASE_NAMESPACE=qpod" .

docker run -it \
-p 15432:5432 -p 10022:22 \
-v $(pwd):/opt/dev \
docker.io/qpod/greenplum \
-v /data/database/gpdb:/data/gpdb \
-h gpdb-cdw \
--name gpdb-cdw \
docker.io/qpod0dev/greenplum \
bash

docker run -d \
-p 15432:5432 -p 10022:22 \
-v /data/database/gpdb:/data/gpdb \
-h gpdb-cdw \
--name gpdb-cdw \
docker.io/qpod0dev/greenplum

/bin/bash -c ${GPHOME}/entrypoint.sh
```
22 changes: 22 additions & 0 deletions docker_greenplum/example/gpdb-single-node/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: "3"

networks:
greenplum:
ipam:
config:
- subnet: 10.188.0.0/24
gateway: 10.188.0.1

services:
cdw:
image: "docker.io/qpod/greenplum"
container_name: gpdb-cdw
hostname: gpdb-cdw
ports:
- "10022:22"
- "15432:5432"
tty: true
networks:
- greenplum
volumes:
- /data/database/gpdb:/data/gpdb
58 changes: 58 additions & 0 deletions docker_greenplum/example/gpdb-single-vm/conf/gpinitsystem.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# reference file:/opt/gpdb/docs/cli_help/gpconfigs/gpinitsystem_config

# Optional - GPDB Cluster Name
ARRAY_NAME="GPDB"

################################################
#### REQUIRED PARAMETERS
################################################

#### Naming convention for utility-generated data directories.
SEG_PREFIX=gpseg

#### Base number by which primary segment port numbers are calculated.
PORT_BASE=6000

#### File system location(s) where primary segment data directories will be created.
#### The number of locations in the list dictate the number of primary segments that will get created per physical host.
#### (If multiple addresses for a host are listed in the hostfile, the number of segments will be spread evenly across the specified interface addresses).
declare -a DATA_DIRECTORY=(${GPDATA}/primary1 ${GPDATA}/primary1 ${GPDATA}/primary1 ${GPDATA}/primary2 ${GPDATA}/primary2 ${GPDATA}/primary2)

#### OS-configured hostname or IP address of the coordinator host.
COORDINATOR_HOSTNAME=gpdb-cdw

#### File system location where the coordinator data directory will be created.
COORDINATOR_DIRECTORY=/data/gpdb/coordinator

#### Port number for the coordinator instance.
COORDINATOR_PORT=5432

#### Shell utility used to connect to remote hosts.
TRUSTED_SHELL=ssh

#### Default server-side character set encoding.
ENCODING=UNICODE

CHECK_POINT_SEGMENTS=8

################################################
#### OPTIONAL MIRROR PARAMETERS
################################################

#### Base number by which mirror segment port numbers are calculated.
#MIRROR_PORT_BASE=7000

#### File system location(s) where mirror segment data directories will be created.
#### The number of mirror locations must equal the number of primary locations as specified in the DATA_DIRECTORY parameter.
#declare -a MIRROR_DATA_DIRECTORY=(${GPDATA}/mirror1 ${GPDATA}/mirror1 ${GPDATA}/mirror1 ${GPDATA}/mirror2 ${GPDATA}/mirror2 ${GPDATA}/mirror2)


################################################
#### OTHER OPTIONAL PARAMETERS
################################################

#### Create a database of this name after initialization.
#DATABASE_NAME=name_of_database

#### Specify the location of the host address file here instead of with the -h option of gpinitsystem.
MACHINE_LIST_FILE="/opt/gpdb/conf/seg_host"
3 changes: 3 additions & 0 deletions docker_greenplum/example/gpdb-single-vm/conf/seg_host
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
gpdb-cdw
gpdb-sdw1
gpdb-sdw2
51 changes: 51 additions & 0 deletions docker_greenplum/example/gpdb-single-vm/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
version: "3"

networks:
greenplum:
ipam:
config:
- subnet: 10.189.0.0/24
gateway: 10.189.0.1

services:
cdw:
image: "docker.io/qpod/greenplum"
container_name: gpdb-cdw
hostname: gpdb-cdw
ports:
- "10022:22"
- "15432:5432"
tty: true
networks:
- greenplum
environment:
- ROLE="COORDINATOR"
volumes:
- ./conf:/opt/gpdb/conf
- /data/database/greenplum:/data/gpdb

sdw1:
image: "docker.io/qpod/greenplum"
container_name: gpdb-sdw1
hostname: gpdb-sdw1
tty: true
networks:
- greenplum
environment:
- ROLE="SEGMENT"
volumes:
- ./conf:/opt/gpdb/conf
- /data/database/greenplum:/data/gpdb

sdw2:
image: "docker.io/qpod/greenplum"
container_name: gpdb-sdw2
hostname: gpdb-sdw2
tty: true
networks:
- greenplum
environment:
- ROLE="SEGMENT"
volumes:
- ./conf:/opt/gpdb/conf
- /data/database/greenplum:/data/gpdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
* soft nofile 1048576
* hard nofile 1048576
* soft nproc 1048576
* hard nproc 1048576
15 changes: 15 additions & 0 deletions docker_greenplum/rootfs/etc/sysctl.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
kernel.shmmax = 5000000000000
kernel.shmmni = 32768
kernel.shmall = 40000000000
kernel.sem = 1000 32768000 1000 32768
kernel.msgmnb = 1048576
kernel.msgmax = 1048576
kernel.msgmni = 32768

net.core.netdev_max_backlog = 80000
net.core.rmem_default = 2097152
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216

vm.overcommit_memory = 2
vm.overcommit_ratio = 95
Loading