Skip to content

Commit

Permalink
Fix the issue of local probe bypassing flows on Windows (#3510)
Browse files Browse the repository at this point in the history
When proxyAll is enabled, kube-proxy can be replaced by AntreaProxy, then
Service traffic and non-Service traffic can be distinguished by ServiceCTMark
and NotServiceCTMark. Service traffic with ServiceCTMark should not bypass
Network Policies, and non-Service traffic generated by kubelet with
NotServiceCTMark should bypass Network Policies.

Signed-off-by: Hongliang Liu <lhongliang@vmware.com>
  • Loading branch information
hongliangl committed Apr 22, 2022
1 parent 5905e98 commit b93b566
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 23 deletions.
3 changes: 2 additions & 1 deletion pkg/agent/openflow/client.go
Expand Up @@ -697,7 +697,8 @@ func (c *client) generatePipelines() {
c.networkConfig,
c.ovsDatapathType,
c.connectUplinkToBridge,
c.enableMulticast)
c.enableMulticast,
c.proxyAll)
c.activatedFeatures = append(c.activatedFeatures, c.featurePodConnectivity)
c.traceableFeatures = append(c.traceableFeatures, c.featurePodConnectivity)

Expand Down
9 changes: 5 additions & 4 deletions pkg/agent/openflow/fields.go
Expand Up @@ -173,11 +173,12 @@ var (
FromGatewayCTMark = binding.NewCTMark(ConnSourceCTMarkField, gatewayVal)
FromBridgeCTMark = binding.NewCTMark(ConnSourceCTMarkField, bridgeVal)

// CTMark[4]: Mark to indicate DNAT is performed on the connection for Service.
// This CT mark is used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6.
ServiceCTMark = binding.NewOneBitCTMark(4)
// CTMark[4]: Marks to indicate whether DNAT is performed on the connection for Service.
// These CT marks are used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6.
ServiceCTMark = binding.NewOneBitCTMark(4)
NotServiceCTMark = binding.NewOneBitZeroCTMark(4)

// CTMark[5]: Mark to indicate SNAT should be performed on the connection for Service.
// CTMark[5]: Mark to indicate SNAT is performed on the connection for Service.
// This CT mark is only used in CtZone / CtZoneV6.
ConnSNATCTMark = binding.NewOneBitCTMark(5)

Expand Down
44 changes: 29 additions & 15 deletions pkg/agent/openflow/pipeline.go
Expand Up @@ -2094,34 +2094,48 @@ func (f *featureNetworkPolicy) dnsPacketInFlow(conjunctionID uint32) binding.Flo
Done()
}

// localProbeFlow generates the flow to forward locally generated packets to ConntrackCommitTable, bypassing ingress
// rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods.
// On Linux and when OVS kernel datapath is used, it identifies locally generated packets by matching the
// HostLocalSourceMark, otherwise it matches the source IP. The difference is because:
// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated
// traffic from kube-proxy proxied traffic.
// localProbeFlows generates the flows to forward locally generated request packets to stageConntrack directly, bypassing
// ingress rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods.
// On Linux and when OVS kernel datapath is used, the probe packets are identified by matching the HostLocalSourceMark.
// On Windows or when OVS userspace (netdev) datapath is used, we need a different approach because:
// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated traffic
// from kube-proxy proxied traffic.
// 2. pkt_mark field is not properly supported for OVS userspace (netdev) datapath.
// Note that there is a defect in the latter way that NodePort Service access by external clients will be masqueraded as
// a local gateway IP to bypass Network Policies. See https://github.com/antrea-io/antrea/issues/280.
// TODO: Fix it after replacing kube-proxy with AntreaProxy.
func (f *featurePodConnectivity) localProbeFlow() []binding.Flow {
// When proxyAll is disabled, the probe packets are identified by matching the source IP is the Antrea gateway IP;
// otherwise, the packets are identified by matching both the Antrea gateway IP and NotServiceCTMark. Note that, when
// proxyAll is disabled, currently there is no way to distinguish kubelet generated traffic from kube-proxy proxied traffic
// only by matching the Antrea gateway IP. There is a defect that NodePort Service access by external clients will be
// masqueraded as the Antrea gateway IP to bypass NetworkPolicies. See https://github.com/antrea-io/antrea/issues/280.
func (f *featurePodConnectivity) localProbeFlows() []binding.Flow {
cookieID := f.cookieAllocator.Request(f.category).Raw()
var flows []binding.Flow
if runtime.IsWindowsPlatform() || f.ovsDatapathType == ovsconfig.OVSDatapathNetdev {
var ctMarksToMatch []*binding.CtMark
if f.proxyAll {
ctMarksToMatch = append(ctMarksToMatch, NotServiceCTMark)
}
for ipProtocol, gatewayIP := range f.gatewayIPs {
flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh).
Cookie(cookieID).
MatchProtocol(ipProtocol).
MatchCTStateRpl(false).
MatchCTStateTrk(true).
MatchSrcIP(gatewayIP).
MatchCTMark(ctMarksToMatch...).
Action().GotoStage(stageConntrack).
Done())
}
} else {
flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh).
Cookie(cookieID).
MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark).
Action().GotoStage(stageConntrack).
Done())
for _, ipProtocol := range f.ipProtocols {
flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh).
Cookie(cookieID).
MatchProtocol(ipProtocol).
MatchCTStateRpl(false).
MatchCTStateTrk(true).
MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark).
Action().GotoStage(stageConntrack).
Done())
}
}
return flows
}
Expand Down
7 changes: 5 additions & 2 deletions pkg/agent/openflow/pod_connectivity.go
Expand Up @@ -44,6 +44,7 @@ type featurePodConnectivity struct {
ctZoneSrcField *binding.RegField
ipCtZoneTypeRegMarks map[binding.Protocol]*binding.RegMark
enableMulticast bool
proxyAll bool

category cookie.Category
}
Expand All @@ -59,7 +60,8 @@ func newFeaturePodConnectivity(
networkConfig *config.NetworkConfig,
ovsDatapathType ovsconfig.OVSDatapathType,
connectUplinkToBridge bool,
enableMulticast bool) *featurePodConnectivity {
enableMulticast bool,
proxyAll bool) *featurePodConnectivity {
ctZones := make(map[binding.Protocol]int)
gatewayIPs := make(map[binding.Protocol]net.IP)
localCIDRs := make(map[binding.Protocol]net.IPNet)
Expand Down Expand Up @@ -101,6 +103,7 @@ func newFeaturePodConnectivity(
ipCtZoneTypeRegMarks: ipCtZoneTypeRegMarks,
ctZoneSrcField: getZoneSrcField(connectUplinkToBridge),
enableMulticast: enableMulticast,
proxyAll: proxyAll,
category: cookie.PodConnectivity,
}
}
Expand Down Expand Up @@ -140,7 +143,7 @@ func (f *featurePodConnectivity) initFlows() []binding.Flow {
flows = append(flows, f.gatewayIPSpoofGuardFlows()...)
flows = append(flows, f.l3FwdFlowToGateway()...)
// Add flow to ensure the liveliness check packet could be forwarded correctly.
flows = append(flows, f.localProbeFlow()...)
flows = append(flows, f.localProbeFlows()...)

if f.networkConfig.TrafficEncapMode.SupportsEncap() {
flows = append(flows, f.tunnelClassifierFlow(config.DefaultTunOFPort))
Expand Down
3 changes: 3 additions & 0 deletions pkg/ovs/openflow/ofctrl_builder.go
Expand Up @@ -244,6 +244,9 @@ func (b *ofFlowBuilder) MatchCTStateSNAT(set bool) FlowBuilder {
}

func (b *ofFlowBuilder) MatchCTMark(marks ...*CtMark) FlowBuilder {
if len(marks) == 0 {
return b
}
var value, mask uint32
for _, mark := range marks {
value |= mark.GetValue()
Expand Down
2 changes: 1 addition & 1 deletion test/integration/agent/openflow_test.go
Expand Up @@ -1222,7 +1222,7 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa
tableName: "IngressSecurityClassifier",
flows: []*ofTestUtils.ExpectFlow{
{
MatchStr: fmt.Sprintf("priority=210,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()),
MatchStr: fmt.Sprintf("priority=210,ct_state=-rpl+trk,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()),
ActStr: "goto_table:ConntrackCommit",
},
},
Expand Down

0 comments on commit b93b566

Please sign in to comment.