# Setup

In [1]:
HADOOP_START_FROM_SCRATCH = True
DOCKER_INTERNAL_HOST = "host.docker.internal"
DOCKER_DNS = ["10.15.20.1"]

HADOOP_NAMENODE_HOSTNAME = "namenode.mavasbel.vpn.itam.mx"
HADOOP_NAMENODE_IP = "10.15.20.2"
HADOOP_NAMENODE_PORT = 8020
HADOOP_NAMENODE_WEBUI_PORT = 9870

HADOOP_RESOURCEMANAGER_HOSTNAME = "resourcemanager.mavasbel.vpn.itam.mx"
HADOOP_RESOURCEMANAGER_IP = "10.15.20.2"
HADOOP_RESOURCEMANAGER_WEBUI_PORT = 8088
HADOOP_RESOURCEMANAGER_RPC_APP_MANAGER_PORT = 8032
HADOOP_RESOURCEMANAGER_TRACKER_PORT = 8031
HADOOP_RESOURCEMANAGER_SCHEDULER_PORT = 8030
HADOOP_RESOURCEMANAGER_ADMIN_PORT = 8033

HADOOP_REPLICATION = 3
HADOOP_NUM_WORKERS = 3

HADOOP_DATANODE_IPS = ["10.15.20.2"] * 3
HADOOP_DATANODE_NAMES = [f"datanode-{i+1}" for i in range(HADOOP_NUM_WORKERS)]
HADOOP_DATANODE_HOSTNAMES = [
    f"{HADOOP_DATANODE_NAMES[i]}.mavasbel.vpn.itam.mx"
    for i in range(HADOOP_NUM_WORKERS)
]
HADOOP_DATANODE_WEBUI_PORTS = [9864 + (i * 10) for i in range(HADOOP_NUM_WORKERS)]
HADOOP_DATANODE_TRANSFER_PORTS = [9866 + (i * 10) for i in range(HADOOP_NUM_WORKERS)]
HADOOP_DATANODE_IPC_PORTS = [6867 + (i * 10) for i in range(HADOOP_NUM_WORKERS)]

HADOOP_NODEMANAGER_IPS = ["10.15.20.2"] * 3
HADOOP_NODEMANAGER_NAMES = [f"nodemanager-{i+1}" for i in range(HADOOP_NUM_WORKERS)]
HADOOP_NODEMANAGER_HOSTNAMES = [
    f"{HADOOP_NODEMANAGER_NAMES[i]}.mavasbel.vpn.itam.mx"
    for i in range(HADOOP_NUM_WORKERS)
]
HADOOP_NODEMANAGER_WEBUI_PORTS = [8050 + (i * 10) for i in range(HADOOP_NUM_WORKERS)]
HADOOP_NODEMANAGER_RPC_PORTS = [8051 + (i * 10) for i in range(HADOOP_NUM_WORKERS)]

HADOOP_WORKDIR = "/opt/hadoop/work-dir"
HADOOP_NAMENODE_NAMEDIR = "/opt/hadoop/dfs/name"
HADOOP_DATANODE_DATADIR = "/opt/hadoop/dfs/data"

HADOOP_HDFS_DATADIR = "/opt/hadoop/work-dir"

In [2]:
import os
from pathlib import Path

LOCALHOST_WORKDIR = f"{os.path.join(os.path.relpath(Path.cwd()))}"
DOCKER_MOUNTDIR = os.path.join(LOCALHOST_WORKDIR, "mount")

Path(DOCKER_MOUNTDIR).mkdir(parents=True, exist_ok=True)

# Stop hadoop-cluster.docker-compose.yml

In [3]:
!docker compose -f hadoop-cluster.docker-compose.yml down -v

 Container nodemanager-3  Stopping
 Container nodemanager-1  Stopping
 Container nodemanager-2  Stopping
 Container nodemanager-3  Stopped
 Container nodemanager-3  Removing
 Container nodemanager-3  Removed
 Container datanode-3  Stopping
 Container nodemanager-2  Stopped
 Container nodemanager-2  Removing
 Container nodemanager-2  Removed
 Container datanode-2  Stopping
 Container nodemanager-1  Stopped
 Container nodemanager-1  Removing
 Container nodemanager-1  Removed
 Container datanode-1  Stopping
 Container resourcemanager  Stopping
 Container datanode-3  Stopped
 Container datanode-3  Removing
 Container datanode-3  Removed
 Container datanode-2  Stopped
 Container datanode-2  Removing
 Container datanode-2  Removed
 Container resourcemanager  Stopped
 Container resourcemanager  Removing
 Container resourcemanager  Removed
 Container datanode-1  Stopped
 Container datanode-1  Removing
 Container datanode-1  Removed
 Container namenode  Stopping
 Container namenode  Stopped
 Co

In [4]:
import shutil

if HADOOP_START_FROM_SCRATCH:
    shutil.rmtree(DOCKER_MOUNTDIR, ignore_errors=True)
    Path(DOCKER_MOUNTDIR).mkdir(parents=True, exist_ok=True)
    

# Start hadoop-cluster.docker-compose.yml

In [5]:
import os
import yaml
from IPython.display import Markdown, display

env_content = {
    "HADOOP_HOME": "/opt/hadoop",
    "HADOOP_HEAPSIZE_MAX": "1536",
    "YARN_HEAPSIZE": "1536",
    "MAPRED-SITE.XML_mapreduce.application.classpath": ":".join(
        [
            "$HADOOP_HOME/share/hadoop/mapreduce/*",
            "$HADOOP_HOME/share/hadoop/common/*",
            "$HADOOP_HOME/share/hadoop/common/lib/*",
            "$HADOOP_HOME/share/hadoop/hdfs/*",
            "$HADOOP_HOME/share/hadoop/hdfs/lib/*",
            "$HADOOP_HOME/share/hadoop/yarn/*",
            "$HADOOP_HOME/share/hadoop/yarn/lib/*",
        ]
    ),
    "MAPRED-SITE.XML_mapreduce.framework.name": "yarn",
    "CORE-SITE.XML_dfs.replication": HADOOP_REPLICATION,
    "CORE-SITE.XML_fs.defaultFS": f"hdfs://{HADOOP_NAMENODE_HOSTNAME}:{HADOOP_NAMENODE_PORT}",
    "YARN-SITE.XML_yarn.resourcemanager.hostname": HADOOP_RESOURCEMANAGER_HOSTNAME,
}
with open("envs/common.env", "w") as f:
    for key, value in env_content.items():
        f.write(f"{key}={value}\n")

hadoop_compose_file_name = "hadoop-cluster.docker-compose.yml"
hadoop_compose_dict = {
    "name": "hadoop-cluster",
    "networks": {"hadoop-cluster": {"driver": "bridge"}},
    "services": {
        "namenode": {
            "image": "apache/hadoop:3.4.2",
            "container_name": "namenode",
            "command": [
                "bash",
                "-c",
                " ".join(
                    [
                        f"sudo chown -R $(id -u hadoop):$(id -g hadoop) {HADOOP_WORKDIR} &&",
                        f"sudo mkdir -p {HADOOP_NAMENODE_NAMEDIR} &&",
                        f"sudo chown -R $(id -u hadoop):$(id -g hadoop) {HADOOP_NAMENODE_NAMEDIR} &&",
                        f"if [ ! -d {HADOOP_NAMENODE_NAMEDIR}/current ]; then hdfs namenode -Ddfs.namenode.name.dir={HADOOP_NAMENODE_NAMEDIR} -format -force; fi &&",
                        "hdfs",
                        "namenode",
                        f"-Dfs.defaultFS=hdfs://{HADOOP_NAMENODE_HOSTNAME}:{HADOOP_NAMENODE_PORT}",
                        f"-Ddfs.replication={HADOOP_REPLICATION}",
                        f"-Ddfs.namenode.name.dir={HADOOP_NAMENODE_NAMEDIR}",
                        "-Ddfs.namenode.name.dir.perm=775",
                        "-Ddfs.permissions.enabled=false",
                        f"-Ddfs.namenode.rpc-address=0.0.0.0:{HADOOP_NAMENODE_PORT}",
                        f"-Ddfs.namenode.http-address=0.0.0.0:{HADOOP_NAMENODE_WEBUI_PORT}",
                    ]
                ),
            ],
            "env_file": ["envs/common.env"],
            "volumes": [
                f"{os.path.join(DOCKER_MOUNTDIR,"namenode","work-dir")}:{HADOOP_WORKDIR}",
                f"{os.path.join(DOCKER_MOUNTDIR,"namenode","name-dir")}:{HADOOP_NAMENODE_NAMEDIR}",
            ],
            "networks": ["hadoop-cluster"],
            "hostname": HADOOP_NAMENODE_HOSTNAME,
            "ports": [
                f"{HADOOP_NAMENODE_PORT}:{HADOOP_NAMENODE_PORT}",
                f"{HADOOP_NAMENODE_WEBUI_PORT}:{HADOOP_NAMENODE_WEBUI_PORT}",
            ],
            "extra_hosts": [
                f"{DOCKER_INTERNAL_HOST}:host-gateway",
            ],
            "dns": DOCKER_DNS,
            "deploy": {"resources": {"limits": {"memory": "2048M"}}},
        },
        "resourcemanager": {
            "image": "apache/hadoop:3.4.2",
            "container_name": "resourcemanager",
            "command": [
                "bash",
                "-c",
                " ".join(
                    [
                        f"sudo chown -R $(id -u hadoop):$(id -g hadoop) {HADOOP_WORKDIR} &&",
                        "yarn",
                        "resourcemanager",
                        f"-Dfs.defaultFS=hdfs://{HADOOP_NAMENODE_HOSTNAME}:{HADOOP_NAMENODE_PORT}",
                        f"-Dyarn.resourcemanager.hostname={HADOOP_RESOURCEMANAGER_HOSTNAME}",
                        f"-Dyarn.resourcemanager.webapp.address=0.0.0.0:{HADOOP_RESOURCEMANAGER_WEBUI_PORT}",
                        f"-Dyarn.resourcemanager.address=0.0.0.0:{HADOOP_RESOURCEMANAGER_RPC_APP_MANAGER_PORT}",
                        f"-Dyarn.resourcemanager.scheduler.address=0.0.0.0:{HADOOP_RESOURCEMANAGER_SCHEDULER_PORT}",
                        f"-Dyarn.resourcemanager.resource-tracker.address=0.0.0.0:{HADOOP_RESOURCEMANAGER_TRACKER_PORT}",
                        f"-Dyarn.resourcemanager.admin.address=0.0.0.0:{HADOOP_RESOURCEMANAGER_ADMIN_PORT}",
                    ]
                ),
            ],
            "env_file": ["envs/common.env"],
            "volumes": [
                f"{os.path.join(DOCKER_MOUNTDIR,"resourcemanager","work-dir")}:{HADOOP_WORKDIR}",
            ],
            "networks": ["hadoop-cluster"],
            "hostname": HADOOP_RESOURCEMANAGER_HOSTNAME,
            "ports": [
                f"{HADOOP_RESOURCEMANAGER_WEBUI_PORT}:{HADOOP_RESOURCEMANAGER_WEBUI_PORT}",
                f"{HADOOP_RESOURCEMANAGER_RPC_APP_MANAGER_PORT}:{HADOOP_RESOURCEMANAGER_RPC_APP_MANAGER_PORT}",
                f"{HADOOP_RESOURCEMANAGER_SCHEDULER_PORT}:{HADOOP_RESOURCEMANAGER_SCHEDULER_PORT}",
                f"{HADOOP_RESOURCEMANAGER_TRACKER_PORT}:{HADOOP_RESOURCEMANAGER_TRACKER_PORT}",
                f"{HADOOP_RESOURCEMANAGER_ADMIN_PORT}:{HADOOP_RESOURCEMANAGER_ADMIN_PORT}",
            ],
            "extra_hosts": [
                f"{DOCKER_INTERNAL_HOST}:host-gateway",
            ],
            "dns": DOCKER_DNS,
            "depends_on": {"namenode": {"condition": "service_started"}},
            "deploy": {"resources": {"limits": {"memory": "2048M"}}},
        },
    },
}

# Programmatically add DataNodes and NodeManagers
for i in range(0, HADOOP_NUM_WORKERS):

    hadoop_compose_dict["services"][HADOOP_DATANODE_NAMES[i]] = {
        "image": "apache/hadoop:3.4.2",
        "container_name": HADOOP_DATANODE_NAMES[i],
        "command": [
            "bash",
            "-c",
            " ".join(
                [
                    f"sudo chown -R $(id -u hadoop):$(id -g hadoop) {HADOOP_WORKDIR} &&",
                    f"sudo mkdir -p {HADOOP_DATANODE_DATADIR} &&",
                    f"sudo chown -R $(id -u hadoop):$(id -g hadoop) {HADOOP_DATANODE_DATADIR} &&",
                    "hdfs",
                    "datanode",
                    f"-Dfs.defaultFS=hdfs://{HADOOP_NAMENODE_HOSTNAME}:{HADOOP_NAMENODE_PORT}",
                    f"-Ddfs.datanode.data.dir={HADOOP_DATANODE_DATADIR}",
                    "-Ddfs.datanode.data.dir.perm=775",
                    "-Ddfs.permissions.enabled=false",
                    f"-Ddfs.datanode.address=0.0.0.0:{HADOOP_DATANODE_TRANSFER_PORTS[i]}",
                    f"-Ddfs.datanode.http.address=0.0.0.0:{HADOOP_DATANODE_WEBUI_PORTS[i]}",
                    f"-Ddfs.datanode.ipc.address=0.0.0.0:{HADOOP_DATANODE_IPC_PORTS[i]}",
                    "-Ddfs.datanode.use.datanode.hostname=true",
                ]
            ),
        ],
        "env_file": ["envs/common.env"],
        "volumes": [
            f"{os.path.join(DOCKER_MOUNTDIR,HADOOP_DATANODE_NAMES[i],"work-dir")}:{HADOOP_WORKDIR}",
            f"{os.path.join(DOCKER_MOUNTDIR,HADOOP_DATANODE_NAMES[i],"data-dir")}:{HADOOP_DATANODE_DATADIR}",
        ],
        "networks": ["hadoop-cluster"],
        "hostname": HADOOP_DATANODE_HOSTNAMES[i],
        "ports": [
            f"{HADOOP_DATANODE_WEBUI_PORTS[i]}:{HADOOP_DATANODE_WEBUI_PORTS[i]}",
            f"{HADOOP_DATANODE_TRANSFER_PORTS[i]}:{HADOOP_DATANODE_TRANSFER_PORTS[i]}",
            f"{HADOOP_DATANODE_IPC_PORTS[i]}:{HADOOP_DATANODE_IPC_PORTS[i]}",
        ],
        "extra_hosts": [
            f"{DOCKER_INTERNAL_HOST}:host-gateway",
        ],
        "dns": DOCKER_DNS,
        "deploy": {"resources": {"limits": {"memory": "2048M"}}},
        "depends_on": {
            "namenode": {"condition": "service_started"},
            "resourcemanager": {"condition": "service_started"},
        }
        | {
            HADOOP_DATANODE_NAMES[j]: {"condition": "service_started"}
            for j in range(0, i)
        },
    }

    hadoop_compose_dict["services"][HADOOP_NODEMANAGER_NAMES[i]] = {
        "image": "apache/hadoop:3.4.2",
        "container_name": HADOOP_NODEMANAGER_NAMES[i],
        "command": [
            "bash",
            "-c",
            " ".join(
                [
                    f"sudo chown -R $(id -u hadoop):$(id -g hadoop) {HADOOP_WORKDIR} &&",
                    "yarn",
                    "nodemanager",
                    f"-Dyarn.resourcemanager.hostname={HADOOP_RESOURCEMANAGER_HOSTNAME}",
                    "-Dyarn.nodemanager.aux-services=mapreduce_shuffle",
                    f"-Dyarn.resourcemanager.resource-tracker.address={HADOOP_RESOURCEMANAGER_HOSTNAME}:{HADOOP_RESOURCEMANAGER_TRACKER_PORT}",
                    f"-Dyarn.nodemanager.address=0.0.0.0:{HADOOP_NODEMANAGER_RPC_PORTS[i]}",
                    f"-Dyarn.nodemanager.webapp.address=0.0.0.0:{HADOOP_DATANODE_WEBUI_PORTS[i]}",
                ]
            ),
        ],
        "env_file": ["envs/common.env"],
        "volumes": [
            f"{os.path.join(DOCKER_MOUNTDIR,HADOOP_NODEMANAGER_NAMES[i],"work-dir")}:{HADOOP_WORKDIR}",
        ],
        "networks": ["hadoop-cluster"],
        "hostname": HADOOP_NODEMANAGER_HOSTNAMES[i],
        "ports": [
            f"{HADOOP_NODEMANAGER_WEBUI_PORTS[i]}:{HADOOP_NODEMANAGER_WEBUI_PORTS[i]}",
            f"{HADOOP_NODEMANAGER_RPC_PORTS[i]}:{HADOOP_NODEMANAGER_RPC_PORTS[i]}",
        ],
        "extra_hosts": [
            f"{DOCKER_INTERNAL_HOST}:host-gateway",
        ],
        "dns": DOCKER_DNS,
        "deploy": {"resources": {"limits": {"memory": "2048M"}}},
        "depends_on": {
            "namenode": {"condition": "service_started"},
            "resourcemanager": {"condition": "service_started"},
        }
        | {
            HADOOP_DATANODE_NAMES[j]: {"condition": "service_started"}
            for j in range(HADOOP_NUM_WORKERS)
        }
        | {
            HADOOP_NODEMANAGER_NAMES[j]: {"condition": "service_started"}
            for j in range(0, i)
        },
    }


hadoop_compose_yaml_contents = yaml.dump(
    hadoop_compose_dict, default_flow_style=False, sort_keys=False, indent=4
)
with open(
    os.path.join(LOCALHOST_WORKDIR, hadoop_compose_file_name), "w"
) as hadoop_compose_file:
    hadoop_compose_file.write(hadoop_compose_yaml_contents)
print(
    f"Successfully created: {os.path.abspath(os.path.join(LOCALHOST_WORKDIR,hadoop_compose_file_name))}"
)
display(Markdown(f"```yaml\n{hadoop_compose_yaml_contents}\n```"))

Successfully created: c:\Users\Marco\Documents\ITAM\2026 - Bases de Datos No Relacionales\code\hadoop\hadoop-cluster.docker-compose.yml


```yaml
name: hadoop-cluster
networks:
    hadoop-cluster:
        driver: bridge
services:
    namenode:
        image: apache/hadoop:3.4.2
        container_name: namenode
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && sudo
            mkdir -p /opt/hadoop/dfs/name && sudo chown -R $(id -u hadoop):$(id -g
            hadoop) /opt/hadoop/dfs/name && if [ ! -d /opt/hadoop/dfs/name/current
            ]; then hdfs namenode -Ddfs.namenode.name.dir=/opt/hadoop/dfs/name -format
            -force; fi && hdfs namenode -Dfs.defaultFS=hdfs://namenode.mavasbel.vpn.itam.mx:8020
            -Ddfs.replication=3 -Ddfs.namenode.name.dir=/opt/hadoop/dfs/name -Ddfs.namenode.name.dir.perm=775
            -Ddfs.permissions.enabled=false -Ddfs.namenode.rpc-address=0.0.0.0:8020
            -Ddfs.namenode.http-address=0.0.0.0:9870
        env_file:
        - envs/common.env
        volumes:
        - .\mount\namenode\work-dir:/opt/hadoop/work-dir
        - .\mount\namenode\name-dir:/opt/hadoop/dfs/name
        networks:
        - hadoop-cluster
        hostname: namenode.mavasbel.vpn.itam.mx
        ports:
        - 8020:8020
        - 9870:9870
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: &id001
        - 10.15.20.1
        deploy:
            resources:
                limits:
                    memory: 2048M
    resourcemanager:
        image: apache/hadoop:3.4.2
        container_name: resourcemanager
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && yarn
            resourcemanager -Dfs.defaultFS=hdfs://namenode.mavasbel.vpn.itam.mx:8020
            -Dyarn.resourcemanager.hostname=resourcemanager.mavasbel.vpn.itam.mx -Dyarn.resourcemanager.webapp.address=0.0.0.0:8088
            -Dyarn.resourcemanager.address=0.0.0.0:8032 -Dyarn.resourcemanager.scheduler.address=0.0.0.0:8030
            -Dyarn.resourcemanager.resource-tracker.address=0.0.0.0:8031 -Dyarn.resourcemanager.admin.address=0.0.0.0:8033
        env_file:
        - envs/common.env
        volumes:
        - .\mount\resourcemanager\work-dir:/opt/hadoop/work-dir
        networks:
        - hadoop-cluster
        hostname: resourcemanager.mavasbel.vpn.itam.mx
        ports:
        - 8088:8088
        - 8032:8032
        - 8030:8030
        - 8031:8031
        - 8033:8033
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        depends_on:
            namenode:
                condition: service_started
        deploy:
            resources:
                limits:
                    memory: 2048M
    datanode-1:
        image: apache/hadoop:3.4.2
        container_name: datanode-1
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && sudo
            mkdir -p /opt/hadoop/dfs/data && sudo chown -R $(id -u hadoop):$(id -g
            hadoop) /opt/hadoop/dfs/data && hdfs datanode -Dfs.defaultFS=hdfs://namenode.mavasbel.vpn.itam.mx:8020
            -Ddfs.datanode.data.dir=/opt/hadoop/dfs/data -Ddfs.datanode.data.dir.perm=775
            -Ddfs.permissions.enabled=false -Ddfs.datanode.address=0.0.0.0:9866 -Ddfs.datanode.http.address=0.0.0.0:9864
            -Ddfs.datanode.ipc.address=0.0.0.0:6867 -Ddfs.datanode.use.datanode.hostname=true
        env_file:
        - envs/common.env
        volumes:
        - .\mount\datanode-1\work-dir:/opt/hadoop/work-dir
        - .\mount\datanode-1\data-dir:/opt/hadoop/dfs/data
        networks:
        - hadoop-cluster
        hostname: datanode-1.mavasbel.vpn.itam.mx
        ports:
        - 9864:9864
        - 9866:9866
        - 6867:6867
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        deploy:
            resources:
                limits:
                    memory: 2048M
        depends_on:
            namenode:
                condition: service_started
            resourcemanager:
                condition: service_started
    nodemanager-1:
        image: apache/hadoop:3.4.2
        container_name: nodemanager-1
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && yarn
            nodemanager -Dyarn.resourcemanager.hostname=resourcemanager.mavasbel.vpn.itam.mx
            -Dyarn.nodemanager.aux-services=mapreduce_shuffle -Dyarn.resourcemanager.resource-tracker.address=resourcemanager.mavasbel.vpn.itam.mx:8031
            -Dyarn.nodemanager.address=0.0.0.0:8051 -Dyarn.nodemanager.webapp.address=0.0.0.0:9864
        env_file:
        - envs/common.env
        volumes:
        - .\mount\nodemanager-1\work-dir:/opt/hadoop/work-dir
        networks:
        - hadoop-cluster
        hostname: nodemanager-1.mavasbel.vpn.itam.mx
        ports:
        - 8050:8050
        - 8051:8051
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        deploy:
            resources:
                limits:
                    memory: 2048M
        depends_on:
            namenode:
                condition: service_started
            resourcemanager:
                condition: service_started
            datanode-1:
                condition: service_started
            datanode-2:
                condition: service_started
            datanode-3:
                condition: service_started
    datanode-2:
        image: apache/hadoop:3.4.2
        container_name: datanode-2
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && sudo
            mkdir -p /opt/hadoop/dfs/data && sudo chown -R $(id -u hadoop):$(id -g
            hadoop) /opt/hadoop/dfs/data && hdfs datanode -Dfs.defaultFS=hdfs://namenode.mavasbel.vpn.itam.mx:8020
            -Ddfs.datanode.data.dir=/opt/hadoop/dfs/data -Ddfs.datanode.data.dir.perm=775
            -Ddfs.permissions.enabled=false -Ddfs.datanode.address=0.0.0.0:9876 -Ddfs.datanode.http.address=0.0.0.0:9874
            -Ddfs.datanode.ipc.address=0.0.0.0:6877 -Ddfs.datanode.use.datanode.hostname=true
        env_file:
        - envs/common.env
        volumes:
        - .\mount\datanode-2\work-dir:/opt/hadoop/work-dir
        - .\mount\datanode-2\data-dir:/opt/hadoop/dfs/data
        networks:
        - hadoop-cluster
        hostname: datanode-2.mavasbel.vpn.itam.mx
        ports:
        - 9874:9874
        - 9876:9876
        - 6877:6877
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        deploy:
            resources:
                limits:
                    memory: 2048M
        depends_on:
            namenode:
                condition: service_started
            resourcemanager:
                condition: service_started
            datanode-1:
                condition: service_started
    nodemanager-2:
        image: apache/hadoop:3.4.2
        container_name: nodemanager-2
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && yarn
            nodemanager -Dyarn.resourcemanager.hostname=resourcemanager.mavasbel.vpn.itam.mx
            -Dyarn.nodemanager.aux-services=mapreduce_shuffle -Dyarn.resourcemanager.resource-tracker.address=resourcemanager.mavasbel.vpn.itam.mx:8031
            -Dyarn.nodemanager.address=0.0.0.0:8061 -Dyarn.nodemanager.webapp.address=0.0.0.0:9874
        env_file:
        - envs/common.env
        volumes:
        - .\mount\nodemanager-2\work-dir:/opt/hadoop/work-dir
        networks:
        - hadoop-cluster
        hostname: nodemanager-2.mavasbel.vpn.itam.mx
        ports:
        - 8060:8060
        - 8061:8061
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        deploy:
            resources:
                limits:
                    memory: 2048M
        depends_on:
            namenode:
                condition: service_started
            resourcemanager:
                condition: service_started
            datanode-1:
                condition: service_started
            datanode-2:
                condition: service_started
            datanode-3:
                condition: service_started
            nodemanager-1:
                condition: service_started
    datanode-3:
        image: apache/hadoop:3.4.2
        container_name: datanode-3
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && sudo
            mkdir -p /opt/hadoop/dfs/data && sudo chown -R $(id -u hadoop):$(id -g
            hadoop) /opt/hadoop/dfs/data && hdfs datanode -Dfs.defaultFS=hdfs://namenode.mavasbel.vpn.itam.mx:8020
            -Ddfs.datanode.data.dir=/opt/hadoop/dfs/data -Ddfs.datanode.data.dir.perm=775
            -Ddfs.permissions.enabled=false -Ddfs.datanode.address=0.0.0.0:9886 -Ddfs.datanode.http.address=0.0.0.0:9884
            -Ddfs.datanode.ipc.address=0.0.0.0:6887 -Ddfs.datanode.use.datanode.hostname=true
        env_file:
        - envs/common.env
        volumes:
        - .\mount\datanode-3\work-dir:/opt/hadoop/work-dir
        - .\mount\datanode-3\data-dir:/opt/hadoop/dfs/data
        networks:
        - hadoop-cluster
        hostname: datanode-3.mavasbel.vpn.itam.mx
        ports:
        - 9884:9884
        - 9886:9886
        - 6887:6887
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        deploy:
            resources:
                limits:
                    memory: 2048M
        depends_on:
            namenode:
                condition: service_started
            resourcemanager:
                condition: service_started
            datanode-1:
                condition: service_started
            datanode-2:
                condition: service_started
    nodemanager-3:
        image: apache/hadoop:3.4.2
        container_name: nodemanager-3
        command:
        - bash
        - -c
        - sudo chown -R $(id -u hadoop):$(id -g hadoop) /opt/hadoop/work-dir && yarn
            nodemanager -Dyarn.resourcemanager.hostname=resourcemanager.mavasbel.vpn.itam.mx
            -Dyarn.nodemanager.aux-services=mapreduce_shuffle -Dyarn.resourcemanager.resource-tracker.address=resourcemanager.mavasbel.vpn.itam.mx:8031
            -Dyarn.nodemanager.address=0.0.0.0:8071 -Dyarn.nodemanager.webapp.address=0.0.0.0:9884
        env_file:
        - envs/common.env
        volumes:
        - .\mount\nodemanager-3\work-dir:/opt/hadoop/work-dir
        networks:
        - hadoop-cluster
        hostname: nodemanager-3.mavasbel.vpn.itam.mx
        ports:
        - 8070:8070
        - 8071:8071
        extra_hosts:
        - host.docker.internal:host-gateway
        dns: *id001
        deploy:
            resources:
                limits:
                    memory: 2048M
        depends_on:
            namenode:
                condition: service_started
            resourcemanager:
                condition: service_started
            datanode-1:
                condition: service_started
            datanode-2:
                condition: service_started
            datanode-3:
                condition: service_started
            nodemanager-1:
                condition: service_started
            nodemanager-2:
                condition: service_started

```

In [6]:
!docker compose -f hadoop-cluster.docker-compose.yml up -d --wait

 Network hadoop-cluster_hadoop-cluster  Creating
 Network hadoop-cluster_hadoop-cluster  Created
 Container namenode  Creating
 Container namenode  Created
 Container resourcemanager  Creating
 Container resourcemanager  Created
 Container datanode-1  Creating
 Container datanode-1  Created
 Container datanode-2  Creating
 Container datanode-2  Created
 Container datanode-3  Creating
 Container datanode-3  Created
 Container nodemanager-1  Creating
 Container nodemanager-1  Created
 Container nodemanager-2  Creating
 Container nodemanager-2  Created
 Container nodemanager-3  Creating
 Container nodemanager-3  Created
 Container namenode  Starting
 Container namenode  Started
 Container resourcemanager  Starting
 Container resourcemanager  Started
 Container datanode-1  Starting
 Container datanode-1  Started
 Container datanode-2  Starting
 Container datanode-2  Started
 Container datanode-3  Starting
 Container datanode-3  Started
 Container nodemanager-1  Starting
 Container nodemana