diff --git a/README.md b/README.md index 359e6c4ac..88d19cfaa 100644 --- a/README.md +++ b/README.md @@ -129,3 +129,5 @@ Please refer to the README-HELM.md 43. ~~vrouter agent entrypoint: add ability to configure metadata_proxy_secret=contrail in [METADATA]~~ 44. Sort out with KUBERNETES_public_fip_pool 45. ~~Ubuntu containers~~ +46. Introduce base container for agent and move agent specific functions to it +47. Remove net-watchdog contrail as the problem with vhost0 rest by agent be solved (dpdk case) diff --git a/common.env.sample b/common.env.sample index 6f87319c5..0741b3b09 100644 --- a/common.env.sample +++ b/common.env.sample @@ -156,7 +156,7 @@ # --- # Agent mode. For DPDK case should be set to 'dpdk'. -#AGENT_MODE=${AGENT_MODE:-'vrouter'} +#AGENT_MODE=${AGENT_MODE:-'nic'} # --- # Cpu coremask, it is used for DPDK mode for cores pinning diff --git a/containers/agent/net-watchdog/Dockerfile b/containers/agent/net-watchdog/Dockerfile new file mode 100644 index 000000000..b0eae56bd --- /dev/null +++ b/containers/agent/net-watchdog/Dockerfile @@ -0,0 +1,18 @@ +ARG CONTRAIL_REGISTRY=localhost:5000 +ARG CONTRAIL_CONTAINER_TAG=4.1.0.0-6-centos7-newton +FROM ${CONTRAIL_REGISTRY}/contrail-base:${CONTRAIL_CONTAINER_TAG} + +RUN yum install -y \ + ethtool pciutils initscripts NetworkManager \ + contrail-vrouter-utils \ + yum clean all && \ + rm -rf /var/cache/yum && \ + ldconfig + +COPY entrypoint.sh / + +ENTRYPOINT ["/entrypoint.sh"] + +LABEL net.juniper.nodemgr.filter.name=contrail-vrouter-network-init +LABEL net.juniper.contrail=agent +LABEL net.juniper.node=agent diff --git a/containers/agent/net-watchdog/entrypoint.sh b/containers/agent/net-watchdog/entrypoint.sh new file mode 100755 index 000000000..f6cd532c1 --- /dev/null +++ b/containers/agent/net-watchdog/entrypoint.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +source /common.sh +source /agent-functions.sh + +TRACK_VHOST0=${TRACK_VHOST0:-'true'} +TRACK_VHOST0_PAUSE=${TRACK_VHOST0_PAUSE:-5} + +while (true) ; do + + echo "INFO: ip address show:" + ip address show + + init_vhost0 + + if [[ "$TRACK_VHOST0" != 'true' ]] ; then + echo "INFO: TRACK_VHOST0 is $TRACK_VHOST0. Stop stracking." + break + fi + echo "INFO: wait for $TRACK_VHOST0_PAUSE seconds..." + sleep $TRACK_VHOST0_PAUSE +done + +exec $@ diff --git a/containers/agent/vrouter-dpdk/entrypoint.sh b/containers/agent/vrouter-dpdk/entrypoint.sh index 07e813262..deaf114a3 100755 --- a/containers/agent/vrouter-dpdk/entrypoint.sh +++ b/containers/agent/vrouter-dpdk/entrypoint.sh @@ -3,13 +3,25 @@ source /common.sh echo "INFO: agent started in $AGENT_MODE mode" -IFS=' ' read -r phys_int phys_int_mac <<< $(get_physical_nic_and_mac) -pci_address=$(get_pci_address_for_nic $phys_int) + +function assert_file() { + local file=$1 + if [[ ! -f "$file" ]] ; then + echo "ERROR: there is no file $file" + exit -1 + fi +} + +phys_int=`get_default_physical_iface` +binding_data_dir='/var/run/vrouter' +assert_file "$binding_data_dir/${phys_int}_mac" +phys_int_mac=`cat "$binding_data_dir/${phys_int}_mac"` +assert_file "$binding_data_dir/${phys_int}_pci" +pci_address=`cat "$binding_data_dir/${phys_int}_pci"` echo "INFO: Physical interface: $phys_int, mac=$phys_int_mac, pci=$pci_address" -VROUTER_GATEWAY=${VROUTER_GATEWAY:-`get_default_gateway_for_nic $phys_int`} -vrouter_cidr=$(get_cidr_for_nic $phys_int) -echo "INFO: $phys_int cidr $vrouter_cidr, gateway $VROUTER_GATEWAY" +# ensure device is bind to dpdk driver +wait_device_for_driver $DPDK_UIO_DRIVER $pci_address # TODO: consider to avoid taskset here and leave to manage by Docker cmd="$@" @@ -33,23 +45,7 @@ mkdir -p -m 777 /var/crashes rm -f '/run/.rte_config' # set maximum socket buffer size to (max hold flows entries * 9160 bytes) -sysctl -w net.core.wmem_max=9160000 - -# bind iface to dpdk uio driver before start dpdk agent -bind_dev_to_driver $DPDK_UIO_DRIVER $phys_int - -function background_init() { - wait_dpdk_agent_start - if [[ -n $cmd ]] ; then - local pname=`echo $cmd | cut -d ' ' -f 1` - enable_hugepages_to_coredump "$pname" - fi - create_vhost0_dpdk $phys_int $phys_int_mac $vrouter_cidr $VROUTER_GATEWAY -} - -# TODO: tart backgound job to enable hugepase to coredump -# and initialize vhost0 -background_init & +set_ctl net.core.wmem_max 9160000 echo "INFO: exec '$real_cmd'" exec $real_cmd diff --git a/containers/agent/vrouter-init-kernel-dpdk/Dockerfile.centos b/containers/agent/vrouter-init-kernel-dpdk/Dockerfile.centos index 49c2109fe..1530cfb08 100644 --- a/containers/agent/vrouter-init-kernel-dpdk/Dockerfile.centos +++ b/containers/agent/vrouter-init-kernel-dpdk/Dockerfile.centos @@ -2,7 +2,10 @@ ARG CONTRAIL_REGISTRY=localhost:5000 ARG CONTRAIL_CONTAINER_TAG=4.1.0.0-6-centos7-newton FROM ${CONTRAIL_REGISTRY}/contrail-base:${CONTRAIL_CONTAINER_TAG} -RUN yum clean all -y && \ +RUN yum install -y \ + ethtool pciutils sysvinit-tools \ + contrail-vrouter-utils contrail-vrouter-dpdk-init && \ + yum clean all -y && \ rm -rf /var/cache/yum COPY entrypoint.sh / diff --git a/containers/agent/vrouter-init-kernel-dpdk/entrypoint.sh b/containers/agent/vrouter-init-kernel-dpdk/entrypoint.sh index bfa890a29..50816c5e2 100755 --- a/containers/agent/vrouter-init-kernel-dpdk/entrypoint.sh +++ b/containers/agent/vrouter-init-kernel-dpdk/entrypoint.sh @@ -1,60 +1,15 @@ -#!/bin/bash -x +#!/bin/bash source /common.sh HUGE_PAGES_DIR=${HUGE_PAGES_DIR:-'/dev/hugepages'} -if [[ ! -d "$HUGE_PAGES_DIR" ]] ; then - echo "WARNING: There is no $HUGE_PAGES_DIR mounted from host. Try to create and mount hugetlbfs." - if ! mkdir -p $HUGE_PAGES_DIR ; then - echo "ERROR: failed to create $HUGE_PAGES_DIR" - exit -1 - fi - if ! mount -t hugetlbfs hugetlbfs $HUGE_PAGES_DIR ; then - echo "ERROR: failed to mount hugetlbfs to $HUGE_PAGES_DIR" - exit -1 - fi -fi - -if [[ ! -d "$HUGE_PAGES_DIR" ]] ; then - echo "ERROR: There is no $HUGE_PAGES_DIR. Probably HugeTables are anuvailable on the host." - exit -1 -fi - -function set_ctl() { - local var=$1 - local value=$2 - if grep -q "^$var" /etc/sysctl.conf ; then - sed -i "s/^$var.*=.*/$var=$value/g" /etc/sysctl.conf - else - echo "$var=$value" >> /etc/sysctl.conf - fi -} +ensure_hugepages $HUGE_PAGES_DIR set_ctl vm.nr_hugepages ${HUGE_PAGES} set_ctl vm.max_map_count 128960 set_ctl net.ipv4.tcp_keepalive_time 5 set_ctl net.ipv4.tcp_keepalive_probes 5 set_ctl net.ipv4.tcp_keepalive_intvl 1 -sysctl --system - -function load_kernel_module() { - local module=$1 - shift 1 - local opts=$@ - echo "INFO: load $module kernel module" - if ! modprobe -v "$module" $opts ; then - echo "ERROR: failed to load $module driver" - exit -1 - fi -} - -function unload_kernel_module() { - local module=$1 - echo "INFO: unload $module kernel module" - if ! rmmod $module ; then - echo "WARNING: Failed to unload $module driver" - fi -} load_kernel_module uio load_kernel_module "$DPDK_UIO_DRIVER" @@ -64,4 +19,31 @@ if ! is_ubuntu_xenial && ! is_centos; then load_kernel_module rte_kni kthread_mode=multiple fi -exec "$@" +echo "INFO: agent $AGENT_MODE mode" +IFS=' ' read -r phys_int phys_int_mac <<< $(get_physical_nic_and_mac) +pci_address=$(get_pci_address_for_nic $phys_int) +default_gw_metric=`get_default_gateway_for_nic_metric $phys_int` +echo "INFO: Physical interface: $phys_int, mac=$phys_int_mac, pci=$pci_address" + +# save data for next usage in network init container +# TODO: check that data valid for the case if container is re-run again by some reason +addrs=$(get_ips_for_nic $phys_int) +gateway=${VROUTER_GATEWAY:-"$default_gw_metric"} +binding_data_dir='/var/run/vrouter' +mkdir -p $binding_data_dir +echo "INFO: addrs=[$addrs], gateway=$gateway" +echo "$phys_int" > $binding_data_dir/${phys_int}_nic +echo "$phys_int_mac" > $binding_data_dir/${phys_int}_mac +echo "$pci_address" > $binding_data_dir/${phys_int}_pci +echo "$addrs" > $binding_data_dir/${phys_int}_ip_addresses +echo "$gateway" > $binding_data_dir/${phys_int}_gateway + +if [[ "$phys_int" == "vhost0" ]] ; then + echo "ERROR: it is not expected the vhost0 is up and running" + exit -1 +fi + +# bind iface to dpdk uio driver before start dpdk agent +bind_devs_to_driver $DPDK_UIO_DRIVER $phys_int + +exec $@ diff --git a/containers/agent/vrouter-init-kernel/Dockerfile.centos b/containers/agent/vrouter-init-kernel/Dockerfile.centos index a53f242d4..0e1234a06 100644 --- a/containers/agent/vrouter-init-kernel/Dockerfile.centos +++ b/containers/agent/vrouter-init-kernel/Dockerfile.centos @@ -2,7 +2,7 @@ ARG CONTRAIL_REGISTRY=localhost:5000 ARG CONTRAIL_CONTAINER_TAG=4.1.0.0-6-centos7-newton FROM ${CONTRAIL_REGISTRY}/contrail-base:${CONTRAIL_CONTAINER_TAG} -RUN yum install -y contrail-vrouter contrail-vrouter-utils initscripts dhclient && \ +RUN yum install -y contrail-vrouter contrail-vrouter-utils && \ yum clean all && \ rm -rf /var/cache/yum diff --git a/containers/agent/vrouter-init-kernel/entrypoint.sh b/containers/agent/vrouter-init-kernel/entrypoint.sh index cafd13028..52e36c84e 100755 --- a/containers/agent/vrouter-init-kernel/entrypoint.sh +++ b/containers/agent/vrouter-init-kernel/entrypoint.sh @@ -55,88 +55,4 @@ else fi fi -# VRouter specific code starts here -function pkt_setup () { - for f in /sys/class/net/$1/queues/rx-* - do - q="$(echo $f | cut -d '-' -f2)" - r=$(($q%32)) - s=$(($q/32)) - ((mask=1<<$r)) - str=(`printf "%x" $mask`) - if [ $s -gt 0 ]; then - for ((i=0; i < $s; i++)) - do - str+=,00000000 - done - fi - echo $str > $f/rps_cpus - done - ip link set dev $1 up -} - -function insert_vrouter() { - if [ -f /sys/class/net/pkt1/queues/rx-0/rps_cpus ]; then - pkt_setup pkt1 - fi - if [ -f /sys/class/net/pkt2/queues/rx-0/rps_cpus ]; then - pkt_setup pkt2 - fi - if [ -f /sys/class/net/pkt3/queues/rx-0/rps_cpus ]; then - pkt_setup pkt3 - fi - vif --create vhost0 --mac $phys_int_mac - vif --add $phys_int --mac $phys_int_mac --vrf 0 --vhost-phys --type physical - vif --add vhost0 --mac $phys_int_mac --vrf 0 --type vhost --xconnect $phys_int - return 0 -} - -echo "INFO: ip address show:" -ip address show - -IFS=' ' read -r phys_int phys_int_mac <<< $(get_physical_nic_and_mac) -echo "INFO: Physical interface: $phys_int, mac=$phys_int_mac" - -# Probe vhost0 -vrouter_cidr="$(get_cidr_for_nic vhost0)" - -if [[ -e /etc/sysconfig/network-scripts/ifcfg-${phys_int} && ! -e /etc/sysconfig/network-scripts/ifcfg-vhost0 ]]; then - echo "INFO: creating vhost0" - insert_vrouter - ifdown ${phys_int} - cp -f /etc/sysconfig/network-scripts/ifcfg-${phys_int} /etc/sysconfig/network-scripts/ifcfg-vhost0 - sed -i "s/${phys_int}/vhost0/g" /etc/sysconfig/network-scripts/ifcfg-vhost0 - sed -ri "/(DEVICE|ONBOOT|NM_CONTROLLED)/! s/.*/#commented_by_contrail& /" /etc/sysconfig/network-scripts/ifcfg-${phys_int} - ifup ${phys_int} - ifup vhost0 - while IFS= read -r line - do - ip route del $line - done < <(ip route sh |grep ${phys_int}) -elif [[ "$vrouter_cidr" == '' ]] ; then - echo "INFO: creating vhost0" - addrs=$(ip addr show dev $phys_int | grep "inet" | grep -oP "[0-9a-f\:\.]*/[0-9]* brd [0-9\.]*|[0-9a-f\:\.]*/[0-9]*") - default_gw=`ip route show dev $phys_int | grep default | head -n 1 | awk '{print $3}'` - default_gw_metric=`ip route show dev $phys_int | grep default | head -1 | grep -o "metric [0-9]*"` - VROUTER_GATEWAY=${VROUTER_GATEWAY:-"$default_gw $default_gw_metric"} - insert_vrouter - - # TODO: switch off dhcp on phys_int - echo "INFO: Changing physical interface to vhost in ip table" - ip link set vhost0 up - echo "$addrs" | while IFS= read -r line ; do - echo "Processing $line" - addr_to_del=`echo $line | cut -d ' ' -f 1` - addr_to_add=`echo $line | sed 's/brd/broadcast/'` - ip address delete $addr_to_del dev $phys_int - ip address add $addr_to_add dev vhost0 - if [[ -n "$VROUTER_GATEWAY" ]]; then - echo "INFO: set default gateway" - ip route add default via $VROUTER_GATEWAY - fi - done -else - echo "INFO: vhost0 is already up" -fi - exec $@ diff --git a/containers/agent/vrouter/Dockerfile.centos b/containers/agent/vrouter/Dockerfile.centos index ca8893561..5f5380066 100644 --- a/containers/agent/vrouter/Dockerfile.centos +++ b/containers/agent/vrouter/Dockerfile.centos @@ -2,7 +2,8 @@ ARG CONTRAIL_REGISTRY=localhost:5000 ARG CONTRAIL_CONTAINER_TAG=4.1.0.0-6-centos7-newton FROM ${CONTRAIL_REGISTRY}/contrail-base:${CONTRAIL_CONTAINER_TAG} -RUN yum install -y ethtool contrail-vrouter-agent contrail-vrouter-utils \ +RUN yum install -y ethtool \ + contrail-vrouter-agent contrail-vrouter-utils \ sudo python-opencontrail-vrouter-netns python-contrail-vrouter-api && \ yum clean all && \ rm -rf /var/cache/yum && \ diff --git a/containers/agent/vrouter/entrypoint.sh b/containers/agent/vrouter/entrypoint.sh index 5bbd74b52..9de2fe451 100755 --- a/containers/agent/vrouter/entrypoint.sh +++ b/containers/agent/vrouter/entrypoint.sh @@ -1,32 +1,56 @@ #!/bin/bash source /common.sh +source /agent-functions.sh HYPERVISOR_TYPE=${HYPERVISOR_TYPE:-'kvm'} VROUTER_HOSTNAME=${VROUTER_HOSTNAME:-${DEFAULT_HOSTNAME}} echo "INFO: agent started in $AGENT_MODE mode" -IFS=' ' read -r phys_int phys_int_mac <<< $(get_physical_nic_and_mac) -pci_address=$(get_pci_address_for_nic $phys_int) -echo "INFO: Physical interface: $phys_int, mac=$phys_int_mac, pci=$pci_address" -# For dpdk case is is expected vhost0 is down here -# but for regular case it shold be up -if is_dpdk ; then - nic_to_read_net_parameters=$phys_int -else - nic_to_read_net_parameters='vhost0' -fi -VROUTER_GATEWAY=${VROUTER_GATEWAY:-`get_default_gateway_for_nic $nic_to_read_net_parameters`} -vrouter_cidr=$(get_cidr_for_nic $nic_to_read_net_parameters) -echo "INFO: $nic_to_read_net_parameters cidr $vrouter_cidr, gateway $VROUTER_GATEWAY" +# wait vhost0 +while (true) ; do + # TODO: net-watchdog container does init for dpdk case, + # because vhost0 is re-created each time dpdk container + # restarted, so its initialization is needed at runtime, + # not only at init time. here is the TODO to remove + # that container after problem be solved at agent level. + # For Non dpdk case jsut init vhost here, + # because net-watchdog is not needed at all. + if ! is_dpdk ; then + init_vhost0 + fi + if ! wait_nic vhost0 ; then + sleep 2 + continue + fi + + # TODO: avoid duplication of reading parameters with init_vhost0 + IFS=' ' read -r phys_int phys_int_mac <<< $(get_physical_nic_and_mac) + if ! is_dpdk ; then + pci_address=$(get_pci_address_for_nic $phys_int) + else + binding_data_dir='/var/run/vrouter' + pci_address=`cat $binding_data_dir/${phys_int}_pci` + fi + + VROUTER_GATEWAY=${VROUTER_GATEWAY:-`get_default_gateway_for_nic 'vhost0'`} + vrouter_cidr=$(get_cidr_for_nic 'vhost0') + echo "INFO: Physical interface: $phys_int, mac=$phys_int_mac, pci=$pci_address" + echo "INFO: vhost0 cidr $vrouter_cidr, gateway $VROUTER_GATEWAY" + + if [[ -n "$vrouter_cidr" ]] ; then + break + fi +done + if [[ -z "$vrouter_cidr" ]] ; then - echo "ERROR: $nic_to_read_net_parameters interface is down or has no assigned IP" + echo "ERROR: vhost0 interface is down or has no assigned IP" exit -1 fi vrouter_ip=${vrouter_cidr%/*} if [[ -z "$VROUTER_GATEWAY" ]] ; then - echo "ERROR: VROUTER_GATEWAY is empty or there is no default route for $nic_to_read_net_parameters" + echo "ERROR: VROUTER_GATEWAY is empty or there is no default route for vhost0" exit -1 fi @@ -36,12 +60,10 @@ if is_dpdk ; then platform=${AGENT_MODE} physical_interface_mac = $phys_int_mac physical_interface_address = $pci_address -physical_uio_driver=${DPDK_UIO_DRIVER} +physical_uio_driver = ${DPDK_UIO_DRIVER} EOM fi -mkdir -p -m 777 /var/crashes - echo "INFO: Preparing /etc/contrail/contrail-vrouter-agent.conf" cat << EOM > /etc/contrail/contrail-vrouter-agent.conf [CONTROL-NODE] @@ -86,16 +108,19 @@ docker_command=/usr/bin/opencontrail-vrouter-docker [HYPERVISOR] type = $HYPERVISOR_TYPE EOM -echo /etc/contrail/contrail-vrouter-agent.conf +echo "INFO: /etc/contrail/contrail-vrouter-agent.conf" cat /etc/contrail/contrail-vrouter-agent.conf set_vnc_api_lib_ini +# TODO: move it to special provision container function provision_node_background() { wait_for_contrail_api provision_node provision_vrouter.py $vrouter_ip $VROUTER_HOSTNAME } +mkdir -p -m 777 /var/crashes + provision_node_background & -exec "$@" +exec $@ diff --git a/containers/base/agent-functions.sh b/containers/base/agent-functions.sh new file mode 100644 index 000000000..34918a677 --- /dev/null +++ b/containers/base/agent-functions.sh @@ -0,0 +1,97 @@ +#!/bin/bash + + +function init_vhost0() { + # Probe vhost0 + local vrouter_cidr="$(get_cidr_for_nic vhost0)" + if [[ "$vrouter_cidr" != '' ]] ; then + echo "INFO: vhost0 is already up" + return 0 + fi + + local phys_int='' + local phys_int_mac='' + local addrs='' + local gateway='' + if ! is_dpdk ; then + # NIC case + IFS=' ' read -r phys_int phys_int_mac <<< $(get_physical_nic_and_mac) + if [[ "$vrouter_cidr" == '' ]] ; then + addrs=$(get_ips_for_nic $phys_int) + local default_gw_metric=`get_default_gateway_for_nic_metric $phys_int` + gateway=${VROUTER_GATEWAY:-"$default_gw_metric"} + fi + echo "INFO: creating vhost0 for nic mode: nic: $phys_int, mac=$phys_int_mac" + if ! create_vhost0 $phys_int $phys_int_mac ; then + return 1 + fi + else + # DPDK case + # TODO: rework someow config pathching.. + if ! wait_dpdk_agent_start ; then + return 1 + fi + phys_int=`get_default_physical_iface` + local binding_data_dir='/var/run/vrouter' + phys_int_mac=`cat $binding_data_dir/${phys_int}_mac` + local pci_address=`cat $binding_data_dir/${phys_int}_pci` + cat << EOM > /etc/contrail/contrail-vrouter-agent.conf +[DEFAULT] +platform=${AGENT_MODE} +physical_interface_mac = $phys_int_mac +physical_interface_address = $pci_address +physical_uio_driver = ${DPDK_UIO_DRIVER} +EOM + if [[ "$vrouter_cidr" == '' ]] ; then + addrs=`cat $binding_data_dir/${phys_int}_ip_addresses` + gateway=${VROUTER_GATEWAY:-"$(cat $binding_data_dir/${phys_int}_gateway)"} + fi + echo "INFO: creating vhost0 for dpdk mode: nic: $phys_int, mac=$phys_int_mac" + if ! create_vhost0_dpdk $phys_int $phys_int_mac ; then + return + fi + fi + + if [[ -e /etc/sysconfig/network-scripts/ifcfg-${phys_int} ]]; then + echo "INFO: creating ifcfg-vhost0 and initialize it via ifup" + if ! is_dpdk ; then + ifdown ${phys_int} + fi + pushd /etc/sysconfig/network-scripts/ + if [[ ! -f "ifcfg-${phys_int}.contrail.org" ]] ; then + cp -f ifcfg-${phys_int} ifcfg-${phys_int}.contrail.org + sed -ri "/(DEVICE|ONBOOT|NM_CONTROLLED)/! s/.*/#commented_by_contrail& /" ifcfg-${phys_int} + fi + if [[ ! -f ifcfg-vhost0 ]] ; then + sed "s/${phys_int}/vhost0/g" ifcfg-${phys_int}.contrail.org > ifcfg-vhost0 + if is_dpdk ; then + sed -ri "/NM_CONTROLLED/ s/.*/#commented_by_contrail& /" ifcfg-vhost0 + echo 'NM_CONTROLLED="no"' >> ifcfg-vhost0 + fi + fi + popd + if ! is_dpdk ; then + ifup ${phys_int} + fi + ifup vhost0 + while IFS= read -r line ; do + ip route del $line + done < <(ip route sh | grep ${phys_int}) + else + echo "INFO: there is no ifcfg-$phys_int, so initialize vhost0 manually" + # TODO: switch off dhcp on phys_int + echo "INFO: Changing physical interface to vhost in ip table" + echo "$addrs" | while IFS= read -r line ; do + if ! is_dpdk ; then + addr_to_del=`echo $line | cut -d ' ' -f 1` + ip address delete $addr_to_del dev $phys_int + fi + local addr_to_add=`echo $line | sed 's/brd/broadcast/'` + ip address add $addr_to_add dev vhost0 + done + if [[ -n "$gateway" ]]; then + echo "INFO: set default gateway" + ip route add default via $gateway + fi + fi +} diff --git a/containers/base/common.sh b/containers/base/common.sh index 011eed9f6..de315e8a3 100644 --- a/containers/base/common.sh +++ b/containers/base/common.sh @@ -159,7 +159,7 @@ sandesh_certfile=${SANDESH_CERTFILE} sandesh_ca_cert=${SANDESH_CA_CERT} EOM -AGENT_MODE=${AGENT_MODE:-'vrouter'} +AGENT_MODE=${AGENT_MODE:-'nic'} DPDK_UIO_DRIVER=${DPDK_UIO_DRIVER:-'uio_pci_generic'} CPU_CORE_MASK=${CPU_CORE_MASK:-'0x01'} HUGE_PAGES=${HUGE_PAGES:-1024} diff --git a/containers/general-base/functions.sh b/containers/general-base/functions.sh index edb7d4562..c00205f3b 100644 --- a/containers/general-base/functions.sh +++ b/containers/general-base/functions.sh @@ -62,6 +62,11 @@ function get_cidr_for_nic() { ip addr show dev $nic | grep "inet .*/.* brd " | awk '{print $2}' } +function get_ips_for_nic() { + local nic=$1 + ip addr show dev $nic | grep "inet" | grep -oP "[0-9a-f\:\.]*/[0-9]* brd [0-9\.]*|[0-9a-f\:\.]*/[0-9]*" +} + function get_default_ip() { local nic=$(get_default_nic) get_cidr_for_nic $nic | cut -d '/' -f 1 @@ -72,6 +77,13 @@ function get_default_gateway_for_nic() { ip route show dev $nic | grep default | head -n 1 | awk '{print $3}' } +function get_default_gateway_for_nic_metric() { + local nic=$1 + local default_gw=`get_default_gateway_for_nic $nic` + local default_gw_metric=`ip route show dev $nic | grep default | head -1 | grep -o "metric [0-9]*"` + echo "$default_gw $default_gw_metric" +} + function find_my_ip_and_order_for_node() { local server_typ=$1_NODES local server_list='' @@ -169,12 +181,18 @@ function get_physical_nic_and_mac() # so try to find physical nic by MAC (which should be # the same as in vhost0) nic=`vif --list | grep "Type:Physical HWaddr:${mac}" -B1 | head -1 | awk '{print($3)}'` - local _mac=$(get_iface_mac $nic) - if [[ -n "$_mac" ]] ; then - mac=$_mac + if [[ -n "$nic" && ! "$nic" =~ ^[0-9] ]] ; then + # NIC case, for DPDK case nic is number, so use mac from vhost0 there + local _mac=$(get_iface_mac $nic) + if [[ -n "$_mac" ]] ; then + mac=$_mac + else + echo "ERROR: unsupported agent mode" + exit -1 + fi else - # TODO: remove it after checking DPDK case - echo "INFO: DPDK case: there is no appropriate net-device for $nic" + # DPDK case, nic name is not exist, so set it to default + nic=$(get_default_physical_iface) fi else # there is no vhost0 device, so set to default @@ -208,12 +226,13 @@ function enable_hugepages_to_coredump() { function wait_nic () { local nic=$1 - local i=0 - for i in {1..60} ; do - echo "INFO: Waiting for ${nic}... $i" + local probes=${2:-60} + while (( probes > 0 )) ; do + echo "INFO: Waiting for ${nic}... tries left $probes" if [[ -L /sys/class/net/${nic} ]] ; then return 0 fi + (( probes -= 1)) sleep 2 done return 1 @@ -235,7 +254,7 @@ function wait_device_for_driver () { # TODO: move to agent specific file function is_dpdk_agent_running() { - lsof -ni:20914 > /dev/null 2>&1 + netstat -ntl | awk '{print($4)}' | grep -q ':20914' } function wait_dpdk_agent_start() { @@ -243,47 +262,84 @@ function wait_dpdk_agent_start() { for i in {1..60} ; do echo "INFO: wait DPDK agent to run... $i" if is_dpdk_agent_running ; then - return + return 0 fi sleep 5 done - echo "ERROR: DPDK agent is not started during timeout" - exit -1 + return 1 +} + +# VRouter specific code starts here +function pkt_setup () { + for f in /sys/class/net/$1/queues/rx-* + do + q="$(echo $f | cut -d '-' -f2)" + r=$(($q%32)) + s=$(($q/32)) + ((mask=1<<$r)) + str=(`printf "%x" $mask`) + if [ $s -gt 0 ]; then + for ((i=0; i < $s; i++)) + do + str+=,00000000 + done + fi + echo $str > $f/rps_cpus + done + ip link set dev $1 up +} + +function create_vhost0() { + local phys_int=$1 + local phys_int_mac=$2 + if [ -f /sys/class/net/pkt1/queues/rx-0/rps_cpus ]; then + pkt_setup pkt1 + fi + if [ -f /sys/class/net/pkt2/queues/rx-0/rps_cpus ]; then + pkt_setup pkt2 + fi + if [ -f /sys/class/net/pkt3/queues/rx-0/rps_cpus ]; then + pkt_setup pkt3 + fi + vif --create vhost0 --mac $phys_int_mac + vif --add $phys_int --mac $phys_int_mac --vrf 0 --vhost-phys --type physical + vif --add vhost0 --mac $phys_int_mac --vrf 0 --type vhost --xconnect $phys_int + ip link set dev vhost0 address $phys_int_mac + ip link set dev vhost0 up } function create_vhost0_dpdk() { - local nic='vhost0' local phys_int=$1 local phys_int_mac=$2 - local vrouter_cidr=$3 - local gateway=$4 - # Wait nic to be configured by agent - if ! wait_nic $nic ; then - echo "WARNING: interface $nic does not exist.. try tro create" + # Check nic is not configured by agent + if ! wait_nic vhost0 1 ; then + echo "INFO: interface vhost0 does not exist.. try tro create" # vhost0 is not present, so create vhost0 and $dev echo "INFO: Creating ${phys_int} interface with mac $phys_int_mac via vif utility..." if ! vif --add 0 --mac ${phys_int_mac} --vrf 0 --vhost-phys --type physical --pmd --id 0 ; then echo "ERROR: Failed to adding ${phys_int} interface" - exit -1 + return 1 fi - echo "INFO: Adding ${nic} interface with vif utility..." + echo "INFO: Adding vhost0 interface with vif utility..." # TODO: vif --xconnect seems does not work without --id parameter? - if ! vif --add ${nic} --mac ${phys_int_mac} --vrf 0 --type vhost --xconnect 0 --pmd --id 1 ; then - echo "ERROR: Failed to add ${nic} interface" - exit -1 + if ! vif --add vhost0 --mac ${phys_int_mac} --vrf 0 --type vhost --xconnect 0 --pmd --id 1 ; then + echo "ERROR: Failed to add vhost0 interface" + return 1 fi fi - - ip link set dev $nic address $phys_int_mac - ip link set dev $nic up - ip address add $vrouter_cidr dev $nic - if [[ -n "$gateway" ]] ; then - ip route add default via $gateway dev $nic + if ! ip link set dev vhost0 address $phys_int_mac ; then + echo "ERROR: Failed to set vhost0 address $phys_int_mac" + return 1 + fi + if ! ip link set dev vhost0 up ; then + echo "ERROR: Failed to up vhost0 interface" + return 1 fi } function save_pci_info() { - local pci_address=$1 + local nic=$1 + local pci_address=$2 local binding_data_dir='/var/run/vrouter' mkdir -p ${binding_data_dir} local binding_data_file="${binding_data_dir}/${pci_address}" @@ -297,7 +353,7 @@ function save_pci_info() { fi } -function bind_dev_to_driver() { +function bind_devs_to_driver() { local driver=$1 shift 1 local nics=( $@ ) @@ -307,7 +363,7 @@ function bind_dev_to_driver() { for n in ${nics[@]} ; do echo "INFO: Binding device $n to driver $driver ..." local pci_address=`get_pci_address_for_nic $n` - save_pci_info $pci_address + save_pci_info $n $pci_address if ! /opt/contrail/bin/dpdk_nic_bind.py --force --bind="$driver" $n ; then echo "ERROR: Failed to bind $n to driver $driver" exit -1 @@ -320,6 +376,7 @@ function bind_dev_to_driver() { } function restore_bindinds() { + # TODO: most probably remove this function since it is not used local binding_data_dir='/var/run/vrouter' if [[ ! -d "$binding_data_dir" ]] ; then ehoc "INFO: there is no local data with devs bound to dpdk uio" @@ -346,4 +403,54 @@ function restore_bindinds() { ip link set dev $nic down || echo "INFO: $nic is already down" ip link set dev $nic up || echo "WARNING: failed to up interface $nic" done -} \ No newline at end of file +} + +function ensure_hugepages() { + local hp_dir=${1:?} + if [[ ! -d "$hp_dir" ]] ; then + echo "WARNING: There is no $hp_dir mounted from host. Try to create and mount hugetlbfs." + if ! mkdir -p $hp_dir ; then + echo "ERROR: failed to create $hp_dir" + exit -1 + fi + if ! mount -t hugetlbfs hugetlbfs $hp_dir ; then + echo "ERROR: failed to mount hugetlbfs to $hp_dir" + exit -1 + fi + fi + + if [[ ! -d "$hp_dir" ]] ; then + echo "ERROR: There is no $hp_dir. Probably HugeTables are anuvailable on the host." + exit -1 + fi +} + +function set_ctl() { + local var=$1 + local value=$2 + if grep -q "^$var" /etc/sysctl.conf ; then + sed -i "s/^$var.*=.*/$var=$value/g" /etc/sysctl.conf + else + echo "$var=$value" >> /etc/sysctl.conf + fi + sysctl -w ${var}=${value} +} + +function load_kernel_module() { + local module=$1 + shift 1 + local opts=$@ + echo "INFO: load $module kernel module" + if ! modprobe -v "$module" $opts ; then + echo "ERROR: failed to load $module driver" + exit -1 + fi +} + +function unload_kernel_module() { + local module=$1 + echo "INFO: unload $module kernel module" + if ! rmmod $module ; then + echo "WARNING: Failed to unload $module driver" + fi +} diff --git a/docker-compose/docker-compose-dpdk.yaml b/docker-compose/docker-compose-dpdk.yaml index 25e97aba5..db64ad698 100644 --- a/docker-compose/docker-compose-dpdk.yaml +++ b/docker-compose/docker-compose-dpdk.yaml @@ -318,10 +318,11 @@ services: - /dev:/dev - /lib/modules:/lib/modules - /var/log/contrail:/var/log/contrail - vrouter-agent: - image: "${CONTRAIL_REGISTRY}/contrail-agent-vrouter:${CONTRAIL_CONTAINER_TAG}" + vrouter-agent-dpdk: + image: "${CONTRAIL_REGISTRY}/contrail-agent-vrouter-dpdk:${CONTRAIL_CONTAINER_TAG}" env_file: ${ENV_FILE} network_mode: host + pid: host privileged: true volumes: - /dev:/dev @@ -331,18 +332,30 @@ services: restart: on-failure depends_on: - vrouter-init-kernel - vrouter-agent-dpdk: - image: "${CONTRAIL_REGISTRY}/contrail-agent-vrouter-dpdk:${CONTRAIL_CONTAINER_TAG}" + vrouter-agent-net-watchdog: + image: "${CONTRAIL_REGISTRY}/contrail-agent-net-watchdog:${CONTRAIL_CONTAINER_TAG}" env_file: ${ENV_FILE} network_mode: host - pid: host privileged: true volumes: - /dev:/dev + - /etc/sysconfig/network-scripts:/etc/sysconfig/network-scripts - /lib/modules:/lib/modules - /var/run:/var/run - /var/log/contrail:/var/log/contrail restart: on-failure depends_on: - - vrouter-init-kernel - - vrouter-agent + - vrouter-agent-dpdk + vrouter-agent: + image: "${CONTRAIL_REGISTRY}/contrail-agent-vrouter:${CONTRAIL_CONTAINER_TAG}" + env_file: ${ENV_FILE} + network_mode: host + privileged: true + volumes: + - /dev:/dev + - /lib/modules:/lib/modules + - /var/run:/var/run + - /var/log/contrail:/var/log/contrail + restart: on-failure + depends_on: + - vrouter-agent-net-watchdog diff --git a/kubernetes/manifests/contrail-template-dpdk.yaml b/kubernetes/manifests/contrail-template-dpdk.yaml index 100914924..cc80259c8 100644 --- a/kubernetes/manifests/contrail-template-dpdk.yaml +++ b/kubernetes/manifests/contrail-template-dpdk.yaml @@ -807,7 +807,38 @@ spec: - mountPath: /var/log/contrail name: agent-logs containers: - - name: contrail-agent + - name: contrail-agent-dpdk + image: "{{CONTRAIL_REGISTRY}}/contrail-agent-vrouter-dpdk:{{CONTRAIL_CONTAINER_TAG}}" + imagePullPolicy: "" + # TODO: Priveleged mode is requied because w/o it the device /dev/net/tun + # is not present in the container. The mounting it into container + # doesnt help because of permissions are not enough syscalls, + # e.g. https://github.com/Juniper/contrail-controller/blob/master/src/vnsw/agent/contrail/linux/pkt0_interface.cc: 48. + securityContext: + privileged: true + envFrom: + - configMapRef: + name: env + volumeMounts: + # TODO: need for hugepages & communication with agent + - mountPath: /dev + name: dev + # TODO: it is for lspci -vmmks works, since it uses libkmod + - mountPath: /lib/modules + name: lib-modules + # TODO: looks like a shared storage for agent and dpdk agent + # there are sockets and pci dev info + - mountPath: /var/run + name: var-run + - mountPath: /var/log/contrail + name: agent-logs + - mountPath: /var/lib/contrail + name: var-lib-contrail + - mountPath: /var/crashes + name: var-crashes + - mountPath: /tmp/serviceaccount + name: pod-secret + - name: contrail-agent-net-watchdog image: "{{CONTRAIL_REGISTRY}}/contrail-agent-vrouter:{{CONTRAIL_CONTAINER_TAG}}" imagePullPolicy: "" # TODO: Priveleged mode is requied because w/o it the device /dev/net/tun @@ -820,7 +851,8 @@ spec: - configMapRef: name: env volumeMounts: - # TODO: need for hugepages & communication with dpdk agent + - mountPath: /etc/sysconfig/network-scripts + name: etc-net-scripts - mountPath: /dev name: dev - mountPath: /lib/modules @@ -829,8 +861,6 @@ spec: # there are sockets and pci dev info - mountPath: /var/run name: var-run - - mountPath: /usr/src - name: usr-src - mountPath: /var/log/contrail name: agent-logs - mountPath: /var/lib/contrail @@ -839,8 +869,8 @@ spec: name: var-crashes - mountPath: /tmp/serviceaccount name: pod-secret - - name: contrail-agent-dpdk - image: "{{CONTRAIL_REGISTRY}}/contrail-agent-vrouter-dpdk:{{CONTRAIL_CONTAINER_TAG}}" + - name: contrail-agent + image: "{{CONTRAIL_REGISTRY}}/contrail-agent-vrouter:{{CONTRAIL_CONTAINER_TAG}}" imagePullPolicy: "" # TODO: Priveleged mode is requied because w/o it the device /dev/net/tun # is not present in the container. The mounting it into container @@ -852,16 +882,17 @@ spec: - configMapRef: name: env volumeMounts: - # TODO: need for hugepages & communication with agent + # TODO: need for hugepages & communication with dpdk agent - mountPath: /dev name: dev - # TODO: it is for lspci -vmmks works, since it uses libkmod - mountPath: /lib/modules name: lib-modules # TODO: looks like a shared storage for agent and dpdk agent # there are sockets and pci dev info - mountPath: /var/run name: var-run + - mountPath: /usr/src + name: usr-src - mountPath: /var/log/contrail name: agent-logs - mountPath: /var/lib/contrail @@ -893,6 +924,9 @@ spec: - name: etc hostPath: path: /etc + - name: etc-net-scripts + hostPath: + path: /etc/sysconfig/network-scripts - name: var-run hostPath: path: /var/run