From 91a201afb812321892a1cab7133b5315c75cb337 Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Thu, 28 Sep 2023 16:33:33 +0000 Subject: [PATCH 1/4] cilium configmap --- .../manifests/cilium/cilium-config.yaml | 12 +++++++++++- .../manifests/cilium/hubble/hubble-svc.yaml | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 test/integration/manifests/cilium/hubble/hubble-svc.yaml diff --git a/test/integration/manifests/cilium/cilium-config.yaml b/test/integration/manifests/cilium/cilium-config.yaml index 9e7d7becfd..42ea460c77 100644 --- a/test/integration/manifests/cilium/cilium-config.yaml +++ b/test/integration/manifests/cilium/cilium-config.yaml @@ -24,7 +24,7 @@ data: enable-health-check-nodeport: "true" enable-health-checking: "true" enable-host-legacy-routing: "true" - enable-hubble: "false" + enable-hubble: "true" enable-ipv4: "true" enable-ipv4-masquerade: "false" enable-ipv6: "false" @@ -42,6 +42,16 @@ data: enable-vtep: "false" enable-well-known-identities: "false" enable-xt-socket-fallback: "true" + hubble-metrics: flow:sourceContext=pod-short;destinationContext=pod-short + tcp:sourceContext=pod-short;destinationContext=pod-short + dns:flow:sourceContext=pod-short;destinationContext=pod-short + hubble-metrics-server: :9965 + hubble-disable-tls: "false" + hubble-listen-address: "" + hubble-socket-path: /dev/null + hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt + hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt + hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key identity-allocation-mode: crd install-iptables-rules: "true" install-no-conntrack-iptables-rules: "false" diff --git a/test/integration/manifests/cilium/hubble/hubble-svc.yaml b/test/integration/manifests/cilium/hubble/hubble-svc.yaml new file mode 100644 index 0000000000..6ba733885c --- /dev/null +++ b/test/integration/manifests/cilium/hubble/hubble-svc.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: cilium + name: hubble-peer + namespace: kube-system +spec: + internalTrafficPolicy: Cluster + ports: + - name: peer-service + port: 443 + protocol: TCP + targetPort: 4244 + selector: + k8s-app: cilium + sessionAffinity: None + type: ClusterIP From d846d9fc78b2907514f2ee2f6dd645cd03c3c199 Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Thu, 28 Sep 2023 22:43:01 +0000 Subject: [PATCH 2/4] update hubble configs and add metrics test --- .gitignore | 3 + test/integration/hubble/hubble_test.go | 160 ++++++++++++++++++ .../{hubble-svc.yaml => hubble-peer-svc.yaml} | 0 3 files changed, 163 insertions(+) create mode 100644 test/integration/hubble/hubble_test.go rename test/integration/manifests/cilium/hubble/{hubble-svc.yaml => hubble-peer-svc.yaml} (100%) diff --git a/.gitignore b/.gitignore index 6ecb9304d9..e55c1f21fd 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ go.work* # scale-test test/scale/generated/* + +# test env file +*.env diff --git a/test/integration/hubble/hubble_test.go b/test/integration/hubble/hubble_test.go new file mode 100644 index 0000000000..1f0162995a --- /dev/null +++ b/test/integration/hubble/hubble_test.go @@ -0,0 +1,160 @@ +package main + +import ( + "context" + "flag" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "testing" + "time" + + k8s "github.com/Azure/azure-container-networking/test/integration" + "github.com/Azure/azure-container-networking/test/internal/retry" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/homedir" +) + +const ( + retryAttempts = 10 + retryDelay = 5 * time.Second + promAddress = "http://localhost:9965/metrics" +) + +var ( + defaultRetrier = retry.Retrier{Attempts: retryAttempts, Delay: retryDelay} + requiredMetrics = []string{ + "hubble_flows_processed_total", + "hubble_tcp_flags_total", + } +) + +func TestEndpoints(t *testing.T) { + var kubeconfigPath string + flag.StringVar(&kubeconfigPath, "kubeconfig", getDefaultKubeconfigPath(), "Path to the kubeconfig file") + flag.Parse() + + config, err := getClientConfig(kubeconfigPath) + if err != nil { + fmt.Printf("Error creating Kubernetes client config: %v\n", err) + os.Exit(1) + } + + ctx := context.Background() + clusterCtx, cancel := context.WithTimeout(ctx, 100000*time.Second) + defer cancel() + pingCheckFn := func() error { + var pf *k8s.PortForwarder + pf, err = k8s.NewPortForwarder(config, t, k8s.PortForwardingOpts{ + Namespace: "kube-system", + LabelSelector: "k8s-app=cilium", + LocalPort: 9965, + DestPort: 9965, + }) + if err != nil { + t.Error(err) + } + pctx := context.Background() + + portForwardCtx, cancel := context.WithTimeout(pctx, 100000*time.Second) + defer cancel() + + portForwardFn := func() error { + t.Log("attempting port forward") + if err := pf.Forward(portForwardCtx); err != nil { + return fmt.Errorf("could not start port forward: %w", err) + } + return nil + } + + if err := defaultRetrier.Do(portForwardCtx, portForwardFn); err != nil { + t.Fatalf("could not start port forward within %d: %v", 100000*time.Second, err) + } + defer pf.Stop() + + // scrape the hubble metrics + metrics, err := getPrometheusMetrics(promAddress) + if err != nil { + return err + } + + // verify that the response contains the required metrics + for _, reqMetric := range requiredMetrics { + if val, exists := metrics[reqMetric]; !exists { + return fmt.Errorf("scraping %s, did not find metric %s", val, promAddress) + } + } + return nil + } + + if err := defaultRetrier.Do(clusterCtx, pingCheckFn); err != nil { + t.Fatalf("metrics check failed with error: %v", err) + } + +} + +func getPrometheusMetrics(url string) (map[string]struct{}, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) + } + + metricsData, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + metrics := parseMetrics(string(metricsData)) + return metrics, nil +} + +func parseMetrics(metricsData string) map[string]struct{} { + // Create a map to store the strings before the first '{'. + metrics := make(map[string]struct{}) + + // sample metrics + // hubble_tcp_flags_total{destination="",family="IPv4",flag="RST",source="kube-system/metrics-server"} 980 + // hubble_tcp_flags_total{destination="",family="IPv4",flag="SYN",source="kube-system/ama-metrics"} 1777 + // we only want the metric name for the time being + // label order/parseing can happen later + lines := strings.Split(metricsData, "\n") + // Iterate through each line. + for _, line := range lines { + // Find the index of the first '{' character. + index := strings.Index(line, "{") + if index >= 0 { + // Extract the string before the first '{'. + str := strings.TrimSpace(line[:index]) + // Store the string in the map. + metrics[str] = struct{}{} + } + } + + return metrics +} + +func getDefaultKubeconfigPath() string { + home := homedir.HomeDir() + return filepath.Join(home, ".kube", "config") +} + +func getClientConfig(kubeconfigPath string) (*rest.Config, error) { + config, err := rest.InClusterConfig() + if err != nil { + // If running outside a Kubernetes cluster, use the kubeconfig file. + config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + return nil, err + } + } + return config, nil +} diff --git a/test/integration/manifests/cilium/hubble/hubble-svc.yaml b/test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml similarity index 100% rename from test/integration/manifests/cilium/hubble/hubble-svc.yaml rename to test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml From 23593f8fd071b7c6caba2ccb66ce323c6082b7e4 Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Thu, 28 Sep 2023 22:54:03 +0000 Subject: [PATCH 3/4] update pipeline yaml --- .../cilium/cilium-e2e-step-template.yaml | 8 ++++++++ hack/toolbox/server/Dockerfile.heavy | 6 +++--- test/integration/hubble/hubble_test.go | 15 +++++++++------ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml index 914129241c..02c7a0f6c3 100644 --- a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml @@ -41,6 +41,7 @@ steps: echo "deploy Cilium ConfigMap" kubectl apply -f cilium/configmap.yaml kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml + kubectl apply -f test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml echo "install Cilium ${CILIUM_VERSION_TAG}" envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/daemonset.yaml | kubectl apply -f - envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - @@ -177,6 +178,13 @@ steps: name: "WireserverMetadataConnectivityTests" displayName: "Run Wireserver and Metadata Connectivity Tests" + - script: | + echo "verify hubble metrics endpoint is usable" + go test ./test/integration/hubble/ -count=1 -v + retryCountOnTaskFailure: 3 + name: "HubbleConnectivityTests" + displayName: "Run Hubble Connectivity Tests" + - script: | ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/ echo $ARTIFACT_DIR diff --git a/hack/toolbox/server/Dockerfile.heavy b/hack/toolbox/server/Dockerfile.heavy index ee9aea25d2..fbcee1c15d 100644 --- a/hack/toolbox/server/Dockerfile.heavy +++ b/hack/toolbox/server/Dockerfile.heavy @@ -3,7 +3,7 @@ ADD ./ / WORKDIR / RUN CGO_ENABLED=0 GOOS=linux go build -o server . -FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 +FROM mcr.microsoft.com/mirror/docker/library/ubuntu:22.04 RUN apt-get update RUN apt-get install -y \ axel \ @@ -21,14 +21,14 @@ RUN apt-get install -y \ net-tools \ netcat \ nmap \ - python \ python3 \ ssh \ sudo \ tcpdump \ traceroute \ + unzip \ vim \ - wget + wget RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" RUN curl -LO "https://dl.k8s.io/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl.sha256" diff --git a/test/integration/hubble/hubble_test.go b/test/integration/hubble/hubble_test.go index 1f0162995a..d98e434932 100644 --- a/test/integration/hubble/hubble_test.go +++ b/test/integration/hubble/hubble_test.go @@ -23,6 +23,8 @@ const ( retryAttempts = 10 retryDelay = 5 * time.Second promAddress = "http://localhost:9965/metrics" + labelSelector = "k8s-app=cilium" + namespace = "kube-system" ) var ( @@ -45,13 +47,13 @@ func TestEndpoints(t *testing.T) { } ctx := context.Background() - clusterCtx, cancel := context.WithTimeout(ctx, 100000*time.Second) + clusterCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() pingCheckFn := func() error { var pf *k8s.PortForwarder pf, err = k8s.NewPortForwarder(config, t, k8s.PortForwardingOpts{ - Namespace: "kube-system", - LabelSelector: "k8s-app=cilium", + Namespace: namespace, + LabelSelector: labelSelector, LocalPort: 9965, DestPort: 9965, }) @@ -60,11 +62,11 @@ func TestEndpoints(t *testing.T) { } pctx := context.Background() - portForwardCtx, cancel := context.WithTimeout(pctx, 100000*time.Second) + portForwardCtx, cancel := context.WithTimeout(pctx, (retryAttempts+1)*retryDelay) defer cancel() portForwardFn := func() error { - t.Log("attempting port forward") + t.Logf("attempting port forward to a pod with label %s, in namespace %s...", labelSelector, namespace) if err := pf.Forward(portForwardCtx); err != nil { return fmt.Errorf("could not start port forward: %w", err) } @@ -72,7 +74,7 @@ func TestEndpoints(t *testing.T) { } if err := defaultRetrier.Do(portForwardCtx, portForwardFn); err != nil { - t.Fatalf("could not start port forward within %d: %v", 100000*time.Second, err) + t.Fatalf("could not start port forward within %d: %v", (retryAttempts+1)*retryDelay, err) } defer pf.Stop() @@ -88,6 +90,7 @@ func TestEndpoints(t *testing.T) { return fmt.Errorf("scraping %s, did not find metric %s", val, promAddress) } } + t.Logf("all metrics validated: %+v", requiredMetrics) return nil } From 09b044708b2a95473acfc68b70fb8154c82d0fbc Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Thu, 5 Oct 2023 15:04:43 -0700 Subject: [PATCH 4/4] separate cilium+hubble config --- test/integration/hubble/hubble_test.go | 16 +-- .../cilium/cilium-config-hubble.yaml | 98 +++++++++++++++++++ .../manifests/cilium/cilium-config.yaml | 12 +-- 3 files changed, 107 insertions(+), 19 deletions(-) create mode 100644 test/integration/manifests/cilium/cilium-config-hubble.yaml diff --git a/test/integration/hubble/hubble_test.go b/test/integration/hubble/hubble_test.go index d98e434932..1f7a57a2f2 100644 --- a/test/integration/hubble/hubble_test.go +++ b/test/integration/hubble/hubble_test.go @@ -81,13 +81,13 @@ func TestEndpoints(t *testing.T) { // scrape the hubble metrics metrics, err := getPrometheusMetrics(promAddress) if err != nil { - return err + return fmt.Errorf("scraping %s, failed with error: %w", promAddress, err) } // verify that the response contains the required metrics for _, reqMetric := range requiredMetrics { if val, exists := metrics[reqMetric]; !exists { - return fmt.Errorf("scraping %s, did not find metric %s", val, promAddress) + return fmt.Errorf("scraping %s, did not find metric %s", val, promAddress) //nolint:goerr113,gocritic } } t.Logf("all metrics validated: %+v", requiredMetrics) @@ -97,23 +97,23 @@ func TestEndpoints(t *testing.T) { if err := defaultRetrier.Do(clusterCtx, pingCheckFn); err != nil { t.Fatalf("metrics check failed with error: %v", err) } - } func getPrometheusMetrics(url string) (map[string]struct{}, error) { - resp, err := http.Get(url) + client := http.Client{} + resp, err := client.Get(url) //nolint if err != nil { - return nil, err + return nil, fmt.Errorf("HTTP request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) + return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) //nolint:goerr113,gocritic } metricsData, err := io.ReadAll(resp.Body) if err != nil { - return nil, err + return nil, fmt.Errorf("reading HTTP response body failed: %w", err) } metrics := parseMetrics(string(metricsData)) @@ -156,7 +156,7 @@ func getClientConfig(kubeconfigPath string) (*rest.Config, error) { // If running outside a Kubernetes cluster, use the kubeconfig file. config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) if err != nil { - return nil, err + return nil, fmt.Errorf("error creating Kubernetes client config: %w", err) } } return config, nil diff --git a/test/integration/manifests/cilium/cilium-config-hubble.yaml b/test/integration/manifests/cilium/cilium-config-hubble.yaml new file mode 100644 index 0000000000..42ea460c77 --- /dev/null +++ b/test/integration/manifests/cilium/cilium-config-hubble.yaml @@ -0,0 +1,98 @@ +apiVersion: v1 +data: + agent-not-ready-taint-key: node.cilium.io/agent-not-ready + arping-refresh-period: 30s + auto-direct-node-routes: "false" + bpf-lb-external-clusterip: "false" + bpf-lb-map-max: "65536" + bpf-lb-mode: snat + bpf-map-dynamic-size-ratio: "0.0025" + bpf-policy-map-max: "16384" + bpf-root: /sys/fs/bpf + cgroup-root: /run/cilium/cgroupv2 + cilium-endpoint-gc-interval: 5m0s + cluster-id: "0" + cluster-name: default + debug: "false" + disable-cnp-status-updates: "true" + disable-endpoint-crd: "false" + enable-auto-protect-node-port-range: "true" + enable-bgp-control-plane: "false" + enable-bpf-clock-probe: "true" + enable-endpoint-health-checking: "false" + enable-endpoint-routes: "true" + enable-health-check-nodeport: "true" + enable-health-checking: "true" + enable-host-legacy-routing: "true" + enable-hubble: "true" + enable-ipv4: "true" + enable-ipv4-masquerade: "false" + enable-ipv6: "false" + enable-ipv6-masquerade: "false" + enable-k8s-terminating-endpoint: "true" + enable-l2-neigh-discovery: "true" + enable-l7-proxy: "false" + enable-local-node-route: "false" + enable-local-redirect-policy: "false" + enable-metrics: "true" + enable-policy: default + enable-remote-node-identity: "true" + enable-session-affinity: "true" + enable-svc-source-range-check: "true" + enable-vtep: "false" + enable-well-known-identities: "false" + enable-xt-socket-fallback: "true" + hubble-metrics: flow:sourceContext=pod-short;destinationContext=pod-short + tcp:sourceContext=pod-short;destinationContext=pod-short + dns:flow:sourceContext=pod-short;destinationContext=pod-short + hubble-metrics-server: :9965 + hubble-disable-tls: "false" + hubble-listen-address: "" + hubble-socket-path: /dev/null + hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt + hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt + hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key + identity-allocation-mode: crd + install-iptables-rules: "true" + install-no-conntrack-iptables-rules: "false" + ipam: delegated-plugin + kube-proxy-replacement: strict + kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256" + local-router-ipv4: 169.254.23.0 + metrics: +cilium_bpf_map_pressure + monitor-aggregation: medium + monitor-aggregation-flags: all + monitor-aggregation-interval: 5s + node-port-bind-protection: "true" + nodes-gc-interval: 5m0s + operator-api-serve-addr: 127.0.0.1:9234 + operator-prometheus-serve-addr: :9963 + preallocate-bpf-maps: "false" + procfs: /host/proc + prometheus-serve-addr: :9962 + remove-cilium-node-taints: "true" + set-cilium-is-up-condition: "true" + sidecar-istio-proxy-image: cilium/istio_proxy + synchronize-k8s-nodes: "true" + tofqdns-dns-reject-response-code: refused + tofqdns-enable-dns-compression: "true" + tofqdns-endpoint-max-ip-per-hostname: "50" + tofqdns-idle-connection-grace-period: 0s + tofqdns-max-deferred-connection-deletes: "10000" + tofqdns-min-ttl: "3600" + tofqdns-proxy-response-max-delay: 100ms + tunnel: disabled + unmanaged-pod-watcher-interval: "15" + vtep-cidr: "" + vtep-endpoint: "" + vtep-mac: "" + vtep-mask: "" +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: cilium + meta.helm.sh/release-namespace: kube-system + labels: + app.kubernetes.io/managed-by: Helm + name: cilium-config + namespace: kube-system diff --git a/test/integration/manifests/cilium/cilium-config.yaml b/test/integration/manifests/cilium/cilium-config.yaml index 42ea460c77..9e7d7becfd 100644 --- a/test/integration/manifests/cilium/cilium-config.yaml +++ b/test/integration/manifests/cilium/cilium-config.yaml @@ -24,7 +24,7 @@ data: enable-health-check-nodeport: "true" enable-health-checking: "true" enable-host-legacy-routing: "true" - enable-hubble: "true" + enable-hubble: "false" enable-ipv4: "true" enable-ipv4-masquerade: "false" enable-ipv6: "false" @@ -42,16 +42,6 @@ data: enable-vtep: "false" enable-well-known-identities: "false" enable-xt-socket-fallback: "true" - hubble-metrics: flow:sourceContext=pod-short;destinationContext=pod-short - tcp:sourceContext=pod-short;destinationContext=pod-short - dns:flow:sourceContext=pod-short;destinationContext=pod-short - hubble-metrics-server: :9965 - hubble-disable-tls: "false" - hubble-listen-address: "" - hubble-socket-path: /dev/null - hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt - hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt - hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key identity-allocation-mode: crd install-iptables-rules: "true" install-no-conntrack-iptables-rules: "false"