From 35a8d52c25c5e25bc5816e5ebd1b37034f999ff7 Mon Sep 17 00:00:00 2001
From: Paul Holzinger <pholzing@redhat.com>
Date: Tue, 17 Oct 2023 16:26:56 +0200
Subject: [PATCH] libpod: restart+userns cleanup netns correctly

When a userns and netns is used we need to let the runtime create the
netns othwerwise the netns is not owned by the right userns and thus
the capabilities would not be correct.

The current restart logic tries to reuse the netns which is fine if no
userns is used but when one is used we setup a new netns (which is
correct) but forgot to cleanup the old netns. This resulted in leaked
network namespaces and because no teardown was ever called leaked ipam
assignments, thus a quickly restarting contianer will run out of ip
space very fast.

Fixes #18615

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
---
 libpod/container_internal.go    | 11 +++++++++
 test/system/500-networking.bats | 40 +++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 6e8306a24096..4e3bd8e9e92d 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -288,6 +288,17 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
 			}
 		}
 	}()
+
+	// Now this is a bit of a mess, normally we try to reuse the netns but if a userns
+	// is used this is not possible as it must be owned by the userns which is created
+	// by the oci runtime. Thus we need to teardown the netns so that the runtime
+	// creates the users+netns and then we setup in completeNetworkSetup() again.
+	if c.config.PostConfigureNetNS {
+		if err := c.cleanupNetwork(); err != nil {
+			return false, err
+		}
+	}
+
 	if err := c.prepare(); err != nil {
 		return false, err
 	}
diff --git a/test/system/500-networking.bats b/test/system/500-networking.bats
index 6b6a86251d09..a34d854244b2 100644
--- a/test/system/500-networking.bats
+++ b/test/system/500-networking.bats
@@ -862,4 +862,44 @@ EOF
      fi
 }
 
+# Test for https://github.com/containers/podman/issues/18615
+@test "podman network cleanup --userns + --restart" {
+    skip_if_cgroupsv1 "run --uidmap fails on cgroups v1 (issue 15025, wontfix)"
+    userns="--userns=keep-id"
+    if ! is_rootless; then
+        userns="--uidmap=0:1111111:65536 --gidmap=0:1111111:65536"
+    fi
+
+    local net1=a-$(random_string 10)
+    # use /29 subnet to limt available ip space, a 29 gives 5 useable addresses (6 - 1 for the gw)
+    local subnet="$(random_rfc1918_subnet).0/29"
+    run_podman network create --subnet $subnet $net1
+    local cname=con-$(random_string 10)
+
+    local netns_count=
+    if ! is_rootless; then
+        netns_count=$(ls /run/netns | wc -l)
+    fi
+
+    # This will cause 7 containers runs with the restart policy (one more than the on failure limit)
+    # Since they run sequentially they should run fine without allocating all ips.
+    run_podman 1 run --name $cname --network $net1 --restart on-failure:6 --userns keep-id $IMAGE false
+
+    # Previously this would fail as the container would run out of ips after 5 restarts.
+    run_podman inspect --format "{{.RestartCount}}" $cname
+    assert "$output" == "6" "RestartCount for failing container"
+
+    # Now make sure we can still run a contianer with free ips.
+    run_podman run --rm --network $net1 $IMAGE true
+
+    if ! is_rootless; then
+        # This is racy if other programs modify /run/netns while the test is running.
+        # However I think the risk is minimal and I think checking for this is important.
+        assert "$(ls /run/netns | wc -l)" == "$netns_count" "/run/netns has no leaked netns files"
+    fi
+
+    run_podman rm -f -t0 $cname
+    run_podman network rm $net1
+}
+
 # vim: filetype=sh