diff --git a/cni/azure-linux-multitenancy.conflist b/cni/azure-linux-multitenancy.conflist new file mode 100644 index 0000000000..c3218f3b71 --- /dev/null +++ b/cni/azure-linux-multitenancy.conflist @@ -0,0 +1,23 @@ +{ + "cniVersion":"0.3.0", + "name":"azure", + "plugins":[ + { + "type":"azure-vnet", + "mode":"bridge", + "bridge":"azure0", + "multiTenancy":true, + "enableSnatOnHost":true, + "ipam":{ + "type":"azure-vnet-ipam" + } + }, + { + "type":"portmap", + "capabilities":{ + "portMappings":true + }, + "snat":true + } + ] +} \ No newline at end of file diff --git a/cni/netconfig.go b/cni/netconfig.go index 33252f27a5..7ce5057bcd 100644 --- a/cni/netconfig.go +++ b/cni/netconfig.go @@ -24,15 +24,17 @@ type KVPair struct { // NetworkConfig represents Azure CNI plugin network configuration. type NetworkConfig struct { - CNIVersion string `json:"cniVersion"` - Name string `json:"name"` - Type string `json:"type"` - Mode string `json:"mode"` - Master string `json:"master"` - Bridge string `json:"bridge,omitempty"` - LogLevel string `json:"logLevel,omitempty"` - LogTarget string `json:"logTarget,omitempty"` - Ipam struct { + CNIVersion string `json:"cniVersion"` + Name string `json:"name"` + Type string `json:"type"` + Mode string `json:"mode"` + Master string `json:"master"` + Bridge string `json:"bridge,omitempty"` + LogLevel string `json:"logLevel,omitempty"` + LogTarget string `json:"logTarget,omitempty"` + MultiTenancy bool `json:"multiTenancy,omitempty"` + EnableSnatOnHost bool `json:"enableSnatOnHost,omitempty"` + Ipam struct { Type string `json:"type"` Environment string `json:"environment,omitempty"` AddrSpace string `json:"addressSpace,omitempty"` diff --git a/cni/network/mutlitenancy.go b/cni/network/mutlitenancy.go new file mode 100644 index 0000000000..e3d26a2e08 --- /dev/null +++ b/cni/network/mutlitenancy.go @@ -0,0 +1,126 @@ +package network + +import ( + "encoding/json" + "fmt" + "net" + "strings" + + "github.com/Azure/azure-container-networking/cni" + "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/cns/cnsclient" + "github.com/Azure/azure-container-networking/common" + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/network" + cniTypes "github.com/containernetworking/cni/pkg/types" + cniTypesCurr "github.com/containernetworking/cni/pkg/types/current" +) + +func SetupRoutingForMultitenancy(nwCfg *cni.NetworkConfig, cnsNetworkConfig *cns.GetNetworkContainerResponse, epInfo *network.EndpointInfo, result *cniTypesCurr.Result) { + // Adding default gateway + if nwCfg.MultiTenancy { + // if snat enabled, add 169.254.0.1 as default gateway + if nwCfg.EnableSnatOnHost { + log.Printf("add default route for multitenancy.snat on host enabled") + addDefaultRoute(cnsNetworkConfig.LocalIPConfiguration.GatewayIPAddress, epInfo, result) + } else { + _, defaultIPNet, _ := net.ParseCIDR("0.0.0.0/0") + dstIP := net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: defaultIPNet.Mask} + gwIP := net.ParseIP(cnsNetworkConfig.IPConfiguration.GatewayIPAddress) + epInfo.Routes = append(epInfo.Routes, network.RouteInfo{Dst: dstIP, Gw: gwIP}) + result.Routes = append(result.Routes, &cniTypes.Route{Dst: dstIP, GW: gwIP}) + } + } +} + +func GetContainerNetworkConfiguration(multiTenancy bool, address string, podName string, podNamespace string) (*cniTypesCurr.Result, *cns.GetNetworkContainerResponse, net.IPNet, error) { + if multiTenancy { + podNameWithoutSuffix := getPodNameWithoutSuffix(podName) + log.Printf("Podname without suffix %v", podNameWithoutSuffix) + return getContainerNetworkConfiguration(address, podNamespace, podNameWithoutSuffix) + } + + return nil, nil, net.IPNet{}, nil +} + +func getContainerNetworkConfiguration(address string, namespace string, podName string) (*cniTypesCurr.Result, *cns.GetNetworkContainerResponse, net.IPNet, error) { + cnsClient, err := cnsclient.NewCnsClient(address) + if err != nil { + log.Printf("Initializing CNS client error %v", err) + return nil, nil, net.IPNet{}, err + } + + podInfo := cns.KubernetesPodInfo{PodName: podName, PodNamespace: namespace} + orchestratorContext, err := json.Marshal(podInfo) + if err != nil { + log.Printf("Marshalling KubernetesPodInfo failed with %v", err) + return nil, nil, net.IPNet{}, err + } + + networkConfig, err := cnsClient.GetNetworkConfiguration(orchestratorContext) + if err != nil { + log.Printf("GetNetworkConfiguration failed with %v", err) + return nil, nil, net.IPNet{}, err + } + + log.Printf("Network config received from cns %+v", networkConfig) + + subnetPrefix := common.GetInterfaceSubnetWithSpecificIp(networkConfig.PrimaryInterfaceIdentifier) + if subnetPrefix == nil { + errBuf := fmt.Sprintf("Interface not found for this ip %v", networkConfig.PrimaryInterfaceIdentifier) + log.Printf(errBuf) + return nil, nil, net.IPNet{}, fmt.Errorf(errBuf) + } + + return convertToCniResult(networkConfig), networkConfig, *subnetPrefix, nil +} + +func convertToCniResult(networkConfig *cns.GetNetworkContainerResponse) *cniTypesCurr.Result { + result := &cniTypesCurr.Result{} + resultIpconfig := &cniTypesCurr.IPConfig{} + + ipconfig := networkConfig.IPConfiguration + ipAddr := net.ParseIP(ipconfig.IPSubnet.IPAddress) + + if ipAddr.To4() != nil { + resultIpconfig.Version = "4" + resultIpconfig.Address = net.IPNet{IP: ipAddr, Mask: net.CIDRMask(int(ipconfig.IPSubnet.PrefixLength), 32)} + } else { + resultIpconfig.Version = "6" + resultIpconfig.Address = net.IPNet{IP: ipAddr, Mask: net.CIDRMask(int(ipconfig.IPSubnet.PrefixLength), 128)} + } + + resultIpconfig.Gateway = net.ParseIP(ipconfig.GatewayIPAddress) + result.IPs = append(result.IPs, resultIpconfig) + result.DNS.Nameservers = ipconfig.DNSServers + + if networkConfig.Routes != nil && len(networkConfig.Routes) > 0 { + for _, route := range networkConfig.Routes { + _, routeIPnet, _ := net.ParseCIDR(route.IPAddress) + gwIP := net.ParseIP(route.GatewayIPAddress) + result.Routes = append(result.Routes, &cniTypes.Route{Dst: *routeIPnet, GW: gwIP}) + } + } + + for _, ipRouteSubnet := range networkConfig.CnetAddressSpace { + log.Printf("Adding cnetAddressspace routes %v %v", ipRouteSubnet.IPAddress, ipRouteSubnet.PrefixLength) + routeIPnet := net.IPNet{IP: net.ParseIP(ipRouteSubnet.IPAddress), Mask: net.CIDRMask(int(ipRouteSubnet.PrefixLength), 32)} + gwIP := net.ParseIP(ipconfig.GatewayIPAddress) + result.Routes = append(result.Routes, &cniTypes.Route{Dst: routeIPnet, GW: gwIP}) + } + + return result +} + +func getPodNameWithoutSuffix(podName string) string { + nameSplit := strings.Split(podName, "-") + log.Printf("namesplit %v", nameSplit) + if len(nameSplit) > 2 { + nameSplit = nameSplit[:len(nameSplit)-2] + } else { + return podName + } + + log.Printf("Pod name after splitting based on - : %v", nameSplit) + return strings.Join(nameSplit, "-") +} diff --git a/cni/network/network.go b/cni/network/network.go index cd173609c4..63be90e394 100644 --- a/cni/network/network.go +++ b/cni/network/network.go @@ -9,12 +9,12 @@ import ( "strings" "github.com/Azure/azure-container-networking/cni" + "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/common" "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/network" "github.com/Azure/azure-container-networking/platform" "github.com/Azure/azure-container-networking/telemetry" - cniSkel "github.com/containernetworking/cni/pkg/skel" cniTypes "github.com/containernetworking/cni/pkg/types" cniTypesCurr "github.com/containernetworking/cni/pkg/types/current" @@ -22,7 +22,8 @@ import ( const ( // Plugin name. - name = "azure-vnet" + name = "azure-vnet" + dockerNetworkOption = "com.docker.network.generic" // Supported IP version. Currently support only IPv4 ipVersion = "4" @@ -92,6 +93,7 @@ func (plugin *netPlugin) Stop() { plugin.nm.Uninitialize() plugin.Uninitialize() log.Printf("[cni-net] Plugin stopped.") + log.Close() } // FindMasterInterface returns the name of the master interface. @@ -136,12 +138,13 @@ func GetEndpointID(args *cniSkel.CmdArgs) string { // Add handles CNI add commands. func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { var ( - result *cniTypesCurr.Result - err error - nwCfg *cni.NetworkConfig - ipconfig *cniTypesCurr.IPConfig - epInfo *network.EndpointInfo - iface *cniTypesCurr.Interface + result *cniTypesCurr.Result + err error + nwCfg *cni.NetworkConfig + epInfo *network.EndpointInfo + iface *cniTypesCurr.Interface + subnetPrefix net.IPNet + cnsNetworkConfig *cns.GetNetworkContainerResponse ) log.Printf("[cni-net] Processing ADD command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v}.", @@ -149,11 +152,18 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { defer func() { // Add Interfaces to result. + if result == nil { + result = &cniTypesCurr.Result{} + } + iface = &cniTypesCurr.Interface{ Name: args.IfName, } + result.Interfaces = append(result.Interfaces, iface) + addSnatInterface(nwCfg, result) + // Convert result to the requested CNI version. res, err := result.GetAsVersion(nwCfg.CNIVersion) if err != nil { @@ -199,43 +209,58 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { networkId := nwCfg.Name endpointId := GetEndpointID(args) + result, cnsNetworkConfig, subnetPrefix, err = GetContainerNetworkConfiguration(nwCfg.MultiTenancy, "", k8sPodName, k8sNamespace) + if err != nil { + log.Printf("GetContainerNetworkConfiguration failed for podname %v namespace %v with error %v", k8sPodName, k8sNamespace, err) + return err + } + + log.Printf("PrimaryInterfaceIdentifier :%v", subnetPrefix.IP.String()) + + policies := cni.GetPoliciesFromNwCfg(nwCfg.AdditionalArgs) + + // Check whether the network already exists. nwInfo, nwInfoErr := plugin.nm.GetNetworkInfo(networkId) - /* Handle consecutive ADD calls for infrastructure containers. - * This is a temporary work around for issue #57253 of Kubernetes. - * We can delete this if statement once they fix it. - * Issue link: https://github.com/kubernetes/kubernetes/issues/57253 - */ - epInfo, _ = plugin.nm.GetEndpointInfo(networkId, endpointId) - if epInfo != nil { - result, err = handleConsecutiveAdd(args.ContainerID, endpointId, nwInfo, nwCfg) - if err != nil { - return err - } + if nwInfoErr == nil { + /* Handle consecutive ADD calls for infrastructure containers. + * This is a temporary work around for issue #57253 of Kubernetes. + * We can delete this if statement once they fix it. + * Issue link: https://github.com/kubernetes/kubernetes/issues/57253 + */ + epInfo, _ := plugin.nm.GetEndpointInfo(networkId, endpointId) + if epInfo != nil { + result, err = handleConsecutiveAdd(args.ContainerID, endpointId, nwInfo, nwCfg) + if err != nil { + log.Printf("handleConsecutiveAdd failed with error %v", err) + return err + } - if result != nil { - return nil + if result != nil { + return nil + } } } - policies := cni.GetPoliciesFromNwCfg(nwCfg.AdditionalArgs) - - // Check whether the network already exists. if nwInfoErr != nil { // Network does not exist. + log.Printf("[cni-net] Creating network %v.", networkId) - // Call into IPAM plugin to allocate an address pool for the network. - result, err = plugin.DelegateAdd(nwCfg.Ipam.Type, nwCfg) - if err != nil { - err = plugin.Errorf("Failed to allocate pool: %v", err) - return err + if !nwCfg.MultiTenancy { + // Call into IPAM plugin to allocate an address pool for the network. + result, err = plugin.DelegateAdd(nwCfg.Ipam.Type, nwCfg) + if err != nil { + err = plugin.Errorf("Failed to allocate pool: %v", err) + return err + } + + // Derive the subnet prefix from allocated IP address. + subnetPrefix = result.IPs[0].Address } - // Derive the subnet prefix from allocated IP address. - ipconfig = result.IPs[0] - subnetPrefix := ipconfig.Address - subnetPrefix.IP = subnetPrefix.IP.Mask(subnetPrefix.Mask) + ipconfig := result.IPs[0] + gateway := ipconfig.Gateway // On failure, call into IPAM plugin to release the address and address pool. defer func() { @@ -249,6 +274,7 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { } }() + subnetPrefix.IP = subnetPrefix.IP.Mask(subnetPrefix.Mask) // Find the master interface. masterIfName := plugin.findMasterInterface(nwCfg, &subnetPrefix) if masterIfName == "" { @@ -272,10 +298,11 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { network.SubnetInfo{ Family: platform.AfINET, Prefix: subnetPrefix, - Gateway: ipconfig.Gateway, + Gateway: gateway, }, }, - BridgeName: nwCfg.Bridge, + BridgeName: nwCfg.Bridge, + EnableSnatOnHost: nwCfg.EnableSnatOnHost, DNS: network.DNSInfo{ Servers: nwCfg.DNS.Nameservers, Suffix: k8sNamespace + "." + strings.Join(nwCfg.DNS.Search, ","), @@ -283,6 +310,9 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { Policies: policies, } + nwInfo.Options = make(map[string]interface{}) + setNetworkOptions(cnsNetworkConfig, &nwInfo) + err = plugin.nm.CreateNetwork(&nwInfo) if err != nil { err = plugin.Errorf("Failed to create network: %v", err) @@ -291,30 +321,43 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { log.Printf("[cni-net] Created network %v with subnet %v.", networkId, subnetPrefix.String()) } else { - // Network already exists. - subnetPrefix := nwInfo.Subnets[0].Prefix.String() - log.Printf("[cni-net] Found network %v with subnet %v.", networkId, subnetPrefix) + if !nwCfg.MultiTenancy { + // Network already exists. + subnetPrefix := nwInfo.Subnets[0].Prefix.String() + log.Printf("[cni-net] Found network %v with subnet %v.", networkId, subnetPrefix) + + // Call into IPAM plugin to allocate an address for the endpoint. + nwCfg.Ipam.Subnet = subnetPrefix + result, err = plugin.DelegateAdd(nwCfg.Ipam.Type, nwCfg) + if err != nil { + err = plugin.Errorf("Failed to allocate address: %v", err) + return err + } - // Call into IPAM plugin to allocate an address for the endpoint. - nwCfg.Ipam.Subnet = subnetPrefix - result, err = plugin.DelegateAdd(nwCfg.Ipam.Type, nwCfg) - if err != nil { - err = plugin.Errorf("Failed to allocate address: %v", err) - return err - } + ipconfig := result.IPs[0] - ipconfig = result.IPs[0] + // On failure, call into IPAM plugin to release the address. + defer func() { + if err != nil { + nwCfg.Ipam.Address = ipconfig.Address.IP.String() + plugin.DelegateDel(nwCfg.Ipam.Type, nwCfg) + } + }() + } + } - // On failure, call into IPAM plugin to release the address. - defer func() { - if err != nil { - nwCfg.Ipam.Address = ipconfig.Address.IP.String() - plugin.DelegateDel(nwCfg.Ipam.Type, nwCfg) - } - }() + epInfo = &network.EndpointInfo{ + Id: endpointId, + ContainerID: args.ContainerID, + NetNsPath: args.Netns, + IfName: args.IfName, + EnableSnatOnHost: nwCfg.EnableSnatOnHost, } + epInfo.Data = make(map[string]interface{}) + + vethName := fmt.Sprintf("%s.%s", k8sNamespace, k8sPodName) + setEndpointOptions(cnsNetworkConfig, epInfo, vethName) - // Initialize endpoint info. var dns network.DNSInfo if (len(nwCfg.DNS.Search) == 0) != (len(nwCfg.DNS.Nameservers) == 0) { err = plugin.Errorf("Wrong DNS configuration: %+v", nwCfg.DNS) @@ -333,14 +376,8 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { } } - epInfo = &network.EndpointInfo{ - Id: endpointId, - ContainerID: args.ContainerID, - NetNsPath: args.Netns, - IfName: args.IfName, - DNS: dns, - Policies: policies, - } + epInfo.DNS = dns + epInfo.Policies = policies // Populate addresses. for _, ipconfig := range result.IPs { @@ -352,8 +389,7 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error { epInfo.Routes = append(epInfo.Routes, network.RouteInfo{Dst: route.Dst, Gw: route.GW}) } - epInfo.Data = make(map[string]interface{}) - epInfo.Data[network.OptVethName] = fmt.Sprintf("%s.%s", k8sNamespace, k8sPodName) + SetupRoutingForMultitenancy(nwCfg, cnsNetworkConfig, epInfo, result) // Create the endpoint. log.Printf("[cni-net] Creating endpoint %v.", epInfo.Id) diff --git a/cni/network/network_linux.go b/cni/network/network_linux.go index aa8110ec52..e1b9fb5af4 100644 --- a/cni/network/network_linux.go +++ b/cni/network/network_linux.go @@ -1,12 +1,61 @@ package network import ( + "net" + "strconv" + "github.com/Azure/azure-container-networking/cni" + "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/network" + cniTypes "github.com/containernetworking/cni/pkg/types" cniTypesCurr "github.com/containernetworking/cni/pkg/types/current" ) +const ( + snatInterface = "eth1" +) + // handleConsecutiveAdd is a dummy function for Linux platform. func handleConsecutiveAdd(containerId, endpointId string, nwInfo *network.NetworkInfo, nwCfg *cni.NetworkConfig) (*cniTypesCurr.Result, error) { return nil, nil } + +func addDefaultRoute(gwIPString string, epInfo *network.EndpointInfo, result *cniTypesCurr.Result) { + _, defaultIPNet, _ := net.ParseCIDR("0.0.0.0/0") + dstIP := net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: defaultIPNet.Mask} + gwIP := net.ParseIP(gwIPString) + epInfo.Routes = append(epInfo.Routes, network.RouteInfo{Dst: dstIP, Gw: gwIP, DevName: snatInterface}) + result.Routes = append(result.Routes, &cniTypes.Route{Dst: dstIP, GW: gwIP}) +} + +func setNetworkOptions(cnsNwConfig *cns.GetNetworkContainerResponse, nwInfo *network.NetworkInfo) { + if cnsNwConfig != nil && cnsNwConfig.MultiTenancyInfo.ID != 0 { + log.Printf("Setting Network Options") + vlanMap := make(map[string]interface{}) + vlanMap[network.VlanIDKey] = strconv.Itoa(cnsNwConfig.MultiTenancyInfo.ID) + vlanMap[network.SnatBridgeIPKey] = cnsNwConfig.LocalIPConfiguration.GatewayIPAddress + "/" + strconv.Itoa(int(cnsNwConfig.LocalIPConfiguration.IPSubnet.PrefixLength)) + nwInfo.Options[dockerNetworkOption] = vlanMap + } +} + +func setEndpointOptions(cnsNwConfig *cns.GetNetworkContainerResponse, epInfo *network.EndpointInfo, vethName string) { + if cnsNwConfig != nil && cnsNwConfig.MultiTenancyInfo.ID != 0 { + log.Printf("Setting Endpoint Options") + epInfo.Data[network.VlanIDKey] = cnsNwConfig.MultiTenancyInfo.ID + epInfo.Data[network.LocalIPKey] = cnsNwConfig.LocalIPConfiguration.IPSubnet.IPAddress + "/" + strconv.Itoa(int(cnsNwConfig.LocalIPConfiguration.IPSubnet.PrefixLength)) + epInfo.Data[network.SnatBridgeIPKey] = cnsNwConfig.LocalIPConfiguration.GatewayIPAddress + "/" + strconv.Itoa(int(cnsNwConfig.LocalIPConfiguration.IPSubnet.PrefixLength)) + } + + epInfo.Data[network.OptVethName] = vethName +} + +func addSnatInterface(nwCfg *cni.NetworkConfig, result *cniTypesCurr.Result) { + if nwCfg != nil && nwCfg.MultiTenancy { + snatIface := &cniTypesCurr.Interface{ + Name: snatInterface, + } + + result.Interfaces = append(result.Interfaces, snatIface) + } +} diff --git a/cni/network/network_windows.go b/cni/network/network_windows.go index d98be65894..ed2634ade1 100644 --- a/cni/network/network_windows.go +++ b/cni/network/network_windows.go @@ -4,6 +4,7 @@ import ( "net" "github.com/Azure/azure-container-networking/cni" + "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/network" "github.com/Microsoft/hcsshim" @@ -56,3 +57,15 @@ func handleConsecutiveAdd(containerId, endpointId string, nwInfo *network.Networ return nil, nil } + +func addDefaultRoute(gwIPString string, epInfo *network.EndpointInfo, result *cniTypesCurr.Result) { +} + +func setNetworkOptions(cnsNwConfig *cns.GetNetworkContainerResponse, nwInfo *network.NetworkInfo) { +} + +func setEndpointOptions(cnsNwConfig *cns.GetNetworkContainerResponse, epInfo *network.EndpointInfo, vethName string) { +} + +func addSnatInterface(nwCfg *cni.NetworkConfig, result *cniTypesCurr.Result) { +} diff --git a/cnm/network/network.go b/cnm/network/network.go index 29a0b0aa59..f51c86fe3a 100644 --- a/cnm/network/network.go +++ b/cnm/network/network.go @@ -231,6 +231,8 @@ func (plugin *netPlugin) createEndpoint(w http.ResponseWriter, r *http.Request) IPAddresses: []net.IPNet{*ipv4Address}, } + epInfo.Data = make(map[string]interface{}) + err = plugin.nm.CreateEndpoint(req.NetworkID, &epInfo) if err != nil { plugin.SendErrorResponse(w, err) diff --git a/cnm/plugin/main.go b/cnm/plugin/main.go index 07d5d91315..995b957545 100644 --- a/cnm/plugin/main.go +++ b/cnm/plugin/main.go @@ -214,4 +214,6 @@ func main() { if ipamPlugin != nil { ipamPlugin.Stop() } + + log.Close() } diff --git a/cns/dnccontract.go b/cns/NetworkContainerContract.go similarity index 73% rename from cns/dnccontract.go rename to cns/NetworkContainerContract.go index 863255b0cc..117012b9e4 100644 --- a/cns/dnccontract.go +++ b/cns/NetworkContainerContract.go @@ -4,15 +4,18 @@ import "encoding/json" // Container Network Service DNC Contract const ( - CreateOrUpdateNetworkContainer = "/network/createorupdatenetworkcontainer" - DeleteNetworkContainer = "/network/deletenetworkcontainer" - GetNetworkContainerStatus = "/network/getnetworkcontainerstatus" - GetInterfaceForContainer = "/network/getinterfaceforcontainer" + SetOrchestratorType = "/network/setorchestratortype" + CreateOrUpdateNetworkContainer = "/network/createorupdatenetworkcontainer" + DeleteNetworkContainer = "/network/deletenetworkcontainer" + GetNetworkContainerStatus = "/network/getnetworkcontainerstatus" + GetInterfaceForContainer = "/network/getinterfaceforcontainer" + GetNetworkContainerByOrchestratorContext = "/network/getnetworkcontainerbyorchestratorcontext" ) // NetworkContainer Types const ( AzureContainerInstance = "AzureContainerInstance" + WebApps = "WebApps" ) // Orchestrator Types @@ -20,6 +23,12 @@ const ( Kubernetes = "Kubernetes" ) +// Encap Types +const ( + Vlan = "Vlan" + Vxlan = "Vxlan" +) + // CreateNetworkContainerRequest specifies request to create a network container or network isolation boundary. type CreateNetworkContainerRequest struct { Version string @@ -27,19 +36,14 @@ type CreateNetworkContainerRequest struct { NetworkContainerid string // Mandatory input. PrimaryInterfaceIdentifier string // Primary CA. AuthorizationToken string - OrchestratorInfo OrchestratorInfo + LocalIPConfiguration IPConfiguration + OrchestratorContext json.RawMessage IPConfiguration IPConfiguration MultiTenancyInfo MultiTenancyInfo - VnetAddressSpace []IPSubnet // To setup SNAT (should include service endpoint vips). + CnetAddressSpace []IPSubnet // To setup SNAT (should include service endpoint vips). Routes []Route } -// OrchestratorInfo contains orchestrator type which is used to cast OrchestratorContext. -type OrchestratorInfo struct { - OrchestratorType string - OrchestratorContext json.RawMessage -} - // KubernetesPodInfo is an OrchestratorContext that holds PodName and PodNamespace. type KubernetesPodInfo struct { PodName string @@ -72,6 +76,11 @@ type Route struct { InterfaceToUse string } +// SetOrchestratorTypeRequest specifies the orchestrator type for the node. +type SetOrchestratorTypeRequest struct { + OrchestratorType string +} + // CreateNetworkContainerResponse specifies response of creating a network container. type CreateNetworkContainerResponse struct { Response Response @@ -92,11 +101,19 @@ type GetNetworkContainerStatusResponse struct { // GetNetworkContainerRequest specifies the details about the request to retrieve a specifc network container. type GetNetworkContainerRequest struct { + NetworkContainerid string + OrchestratorContext json.RawMessage } // GetNetworkContainerResponse describes the response to retrieve a specifc network container. type GetNetworkContainerResponse struct { - Response Response + IPConfiguration IPConfiguration + Routes []Route + CnetAddressSpace []IPSubnet + MultiTenancyInfo MultiTenancyInfo + PrimaryInterfaceIdentifier string + LocalIPConfiguration IPConfiguration + Response Response } // DeleteNetworkContainerRequest specifies the details about the request to delete a specifc network container. @@ -117,7 +134,7 @@ type GetInterfaceForContainerRequest struct { // GetInterfaceForContainerResponse specifies the interface for a given container ID. type GetInterfaceForContainerResponse struct { NetworkInterface NetworkInterface - VnetAddressSpace []IPSubnet + CnetAddressSpace []IPSubnet Response Response } diff --git a/cns/cnsclient/cnsclient.go b/cns/cnsclient/cnsclient.go new file mode 100644 index 0000000000..2a24c50134 --- /dev/null +++ b/cns/cnsclient/cnsclient.go @@ -0,0 +1,79 @@ +package cnsclient + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + + "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/log" +) + +// CNSClient specifies a client to connect to Ipam Plugin. +type CNSClient struct { + connectionURL string +} + +const ( + defaultCnsURL = "http://localhost:10090" +) + +// NewCnsClient create a new cns client. +func NewCnsClient(url string) (*CNSClient, error) { + if url == "" { + url = defaultCnsURL + } + + return &CNSClient{ + connectionURL: url, + }, nil +} + +// GetNetworkConfiguration Request to get network config. +func (cnsClient *CNSClient) GetNetworkConfiguration(orchestratorContext []byte) (*cns.GetNetworkContainerResponse, error) { + var body bytes.Buffer + + httpc := &http.Client{} + url := cnsClient.connectionURL + cns.GetNetworkContainerByOrchestratorContext + log.Printf("GetNetworkConfiguration url %v", url) + + payload := &cns.GetNetworkContainerRequest{ + OrchestratorContext: orchestratorContext, + } + + err := json.NewEncoder(&body).Encode(payload) + if err != nil { + log.Printf("encoding json failed with %v", err) + return nil, err + } + + res, err := httpc.Post(url, "application/json", &body) + if err != nil { + log.Printf("[Azure CNSClient] HTTP Post returned error %v", err.Error()) + return nil, err + } + + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + errMsg := fmt.Sprintf("[Azure CNSClient] GetNetworkConfiguration invalid http status code: %v", res.StatusCode) + log.Printf(errMsg) + return nil, fmt.Errorf(errMsg) + } + + var resp cns.GetNetworkContainerResponse + + err = json.NewDecoder(res.Body).Decode(&resp) + if err != nil { + log.Printf("[Azure CNSClient] Error received while parsing GetNetworkConfiguration response resp:%v err:%v", res.Body, err.Error()) + return nil, err + } + + if resp.Response.ReturnCode != 0 { + log.Printf("[Azure CNSClient] GetNetworkConfiguration received error response :%v", resp.Response.Message) + return nil, fmt.Errorf(resp.Response.Message) + } + + return &resp, nil +} diff --git a/cns/common/utils.go b/cns/common/utils.go deleted file mode 100644 index b807929e51..0000000000 --- a/cns/common/utils.go +++ /dev/null @@ -1,29 +0,0 @@ -package common - -import ( - "fmt" - "os/exec" - - "github.com/Azure/azure-container-networking/log" -) - -func ExecuteShellCommand(command string) error { - log.Printf("[Azure-CNS] %s", command) - cmd := exec.Command("sh", "-c", command) - err := cmd.Start() - if err != nil { - return err - } - return cmd.Wait() -} - -func SetOutboundSNAT(subnet string) error { - cmd := fmt.Sprintf("iptables -t nat -A POSTROUTING -m iprange ! --dst-range 168.63.129.16 -m addrtype ! --dst-type local ! -d %v -j MASQUERADE", - subnet) - err := ExecuteShellCommand(cmd) - if err != nil { - log.Printf("SNAT Iptable rule was not set") - return err - } - return nil -} diff --git a/cns/dockerclient/dockerclient.go b/cns/dockerclient/dockerclient.go index 86384d848c..eba45b5780 100644 --- a/cns/dockerclient/dockerclient.go +++ b/cns/dockerclient/dockerclient.go @@ -9,7 +9,7 @@ import ( "fmt" "net/http" - "github.com/Azure/azure-container-networking/cns/common" + "github.com/Azure/azure-container-networking/platform" "github.com/Azure/azure-container-networking/cns/imdsclient" "github.com/Azure/azure-container-networking/log" ) @@ -140,7 +140,7 @@ func (dockerClient *DockerClient) CreateNetwork(networkName string, nicInfo *imd } if enableSnat { - err = common.SetOutboundSNAT(nicInfo.Subnet) + err = platform.SetOutboundSNAT(nicInfo.Subnet) if err != nil { log.Printf("[Azure CNS] Error setting up SNAT outbound rule %v", err) } @@ -179,7 +179,7 @@ func (dockerClient *DockerClient) DeleteNetwork(networkName string) error { cmd := fmt.Sprintf("iptables -t nat -D POSTROUTING -m iprange ! --dst-range 168.63.129.16 -m addrtype ! --dst-type local ! -d %v -j MASQUERADE", primaryNic.Subnet) - err = common.ExecuteShellCommand(cmd) + _, err = platform.ExecuteCommand(cmd) if err != nil { log.Printf("[Azure CNS] Error Removing Outbound SNAT rule %v", err) } diff --git a/cns/restserver/api.go b/cns/restserver/api.go index 1b802515db..6db8f6f5f5 100644 --- a/cns/restserver/api.go +++ b/cns/restserver/api.go @@ -19,5 +19,6 @@ const ( NetworkContainerNotSpecified = 16 CallToHostFailed = 17 UnknownContainerID = 18 + UnsupportedOrchestratorType = 19 UnexpectedError = 99 ) diff --git a/cns/restserver/restserver.go b/cns/restserver/restserver.go index 23353ad454..542e5dfe99 100644 --- a/cns/restserver/restserver.go +++ b/cns/restserver/restserver.go @@ -4,6 +4,7 @@ package restserver import ( + "encoding/json" "fmt" "net" "net/http" @@ -51,12 +52,14 @@ type containerstatus struct { // httpRestServiceState contains the state we would like to persist. type httpRestServiceState struct { - Location string - NetworkType string - Initialized bool - ContainerStatus map[string]containerstatus - Networks map[string]*networkInfo - TimeStamp time.Time + Location string + NetworkType string + OrchestratorType string + Initialized bool + ContainerIDByOrchestratorContext map[string]string // OrchestratorContext is key and value is NetworkContainerID. + ContainerStatus map[string]containerstatus // NetworkContainerID is key. + Networks map[string]*networkInfo + TimeStamp time.Time } type networkInfo struct { @@ -143,6 +146,8 @@ func (service *httpRestService) Start(config *common.ServiceConfig) error { listener.AddHandler(cns.DeleteNetworkContainer, service.deleteNetworkContainer) listener.AddHandler(cns.GetNetworkContainerStatus, service.getNetworkContainerStatus) listener.AddHandler(cns.GetInterfaceForContainer, service.getInterfaceForContainer) + listener.AddHandler(cns.SetOrchestratorType, service.setOrchestratorType) + listener.AddHandler(cns.GetNetworkContainerByOrchestratorContext, service.getNetworkContainerByOrchestratorContext) // handlers for v0.2 listener.AddHandler(cns.V2Prefix+cns.SetEnvironmentPath, service.setEnvironment) @@ -157,6 +162,8 @@ func (service *httpRestService) Start(config *common.ServiceConfig) error { listener.AddHandler(cns.V2Prefix+cns.DeleteNetworkContainer, service.deleteNetworkContainer) listener.AddHandler(cns.V2Prefix+cns.GetNetworkContainerStatus, service.getNetworkContainerStatus) listener.AddHandler(cns.V2Prefix+cns.GetInterfaceForContainer, service.getInterfaceForContainer) + listener.AddHandler(cns.V2Prefix+cns.SetOrchestratorType, service.setOrchestratorType) + listener.AddHandler(cns.V2Prefix+cns.GetNetworkContainerByOrchestratorContext, service.getNetworkContainerByOrchestratorContext) log.Printf("[Azure CNS] Listening.") return nil @@ -798,17 +805,104 @@ func (service *httpRestService) restoreState() error { return nil } +func (service *httpRestService) setOrchestratorType(w http.ResponseWriter, r *http.Request) { + log.Printf("[Azure CNS] setOrchestratorType") + + var req cns.SetOrchestratorTypeRequest + returnMessage := "" + returnCode := 0 + + err := service.Listener.Decode(w, r, &req) + if err != nil { + return + } + + service.lock.Lock() + + switch req.OrchestratorType { + case cns.Kubernetes: + service.state.OrchestratorType = cns.Kubernetes + service.saveState() + break + case cns.WebApps: + service.state.OrchestratorType = cns.WebApps + service.saveState() + break + default: + returnMessage = fmt.Sprintf("Invalid Orchestrator type %v", req.OrchestratorType) + returnCode = UnsupportedOrchestratorType + } + + service.lock.Unlock() + + resp := cns.Response{ + ReturnCode: returnCode, + Message: returnMessage, + } + + err = service.Listener.Encode(w, &resp) + log.Response(service.Name, resp, err) +} + +func (service *httpRestService) saveNetworkContainerGoalState(req cns.CreateNetworkContainerRequest) (int, string) { + // we don't want to overwrite what other calls may have written + service.lock.Lock() + defer service.lock.Unlock() + + existing, ok := service.state.ContainerStatus[req.NetworkContainerid] + var hostVersion string + if ok { + hostVersion = existing.HostVersion + } + + if service.state.ContainerStatus == nil { + service.state.ContainerStatus = make(map[string]containerstatus) + } + + service.state.ContainerStatus[req.NetworkContainerid] = + containerstatus{ + ID: req.NetworkContainerid, + VMVersion: req.Version, + CreateNetworkContainerRequest: req, + HostVersion: hostVersion} + + if req.NetworkContainerType == cns.AzureContainerInstance { + switch service.state.OrchestratorType { + case cns.Kubernetes: + var podInfo cns.KubernetesPodInfo + err := json.Unmarshal(req.OrchestratorContext, &podInfo) + if err != nil { + errBuf := fmt.Sprintf("Unmarshalling AzureContainerInstanceInfo failed with error %v", err) + return UnexpectedError, errBuf + } + + log.Printf("Pod info %v", podInfo) + + if service.state.ContainerIDByOrchestratorContext == nil { + service.state.ContainerIDByOrchestratorContext = make(map[string]string) + } + + service.state.ContainerIDByOrchestratorContext[podInfo.PodName+podInfo.PodNamespace] = req.NetworkContainerid + break + + default: + log.Printf("Invalid orchestrator type %v", service.state.OrchestratorType) + } + } + + service.saveState() + return 0, "" +} + func (service *httpRestService) createOrUpdateNetworkContainer(w http.ResponseWriter, r *http.Request) { log.Printf("[Azure CNS] createOrUpdateNetworkContainer") var req cns.CreateNetworkContainerRequest - returnMessage := "" returnCode := 0 - err := service.Listener.Decode(w, r, &req) + err := service.Listener.Decode(w, r, &req) log.Request(service.Name, &req, err) - if err != nil { return } @@ -820,41 +914,19 @@ func (service *httpRestService) createOrUpdateNetworkContainer(w http.ResponseWr switch r.Method { case "POST": - nc := service.networkContainer - err := nc.Create(req) - - if err != nil { - returnMessage = fmt.Sprintf("[Azure CNS] Error. CreateOrUpdateNetworkContainer failed %v", err.Error()) - returnCode = UnexpectedError - break - } - - // we don't want to overwrite what other calls may have written - service.lock.Lock() - defer service.lock.Unlock() - - existing, ok := service.state.ContainerStatus[req.NetworkContainerid] - var hostVersion string - if ok { - hostVersion = existing.HostVersion - } - - if service.state.ContainerStatus == nil { - service.state.ContainerStatus = make(map[string]containerstatus) + if req.NetworkContainerType == cns.WebApps { + nc := service.networkContainer + if err := nc.Create(req); err != nil { + returnMessage = fmt.Sprintf("[Azure CNS] Error. CreateOrUpdateNetworkContainer failed %v", err.Error()) + returnCode = UnexpectedError + break + } } - - service.state.ContainerStatus[req.NetworkContainerid] = - containerstatus{ - ID: req.NetworkContainerid, - VMVersion: req.Version, - CreateNetworkContainerRequest: req, - HostVersion: hostVersion} - service.saveState() + returnCode, returnMessage = service.saveNetworkContainerGoalState(req) default: returnMessage = "[Azure CNS] Error. CreateOrUpdateNetworkContainer did not receive a POST." returnCode = InvalidParameter - } resp := cns.Response{ @@ -868,17 +940,15 @@ func (service *httpRestService) createOrUpdateNetworkContainer(w http.ResponseWr log.Response(service.Name, reserveResp, err) } -func (service *httpRestService) getNetworkContainer(w http.ResponseWriter, r *http.Request) { - log.Printf("[Azure CNS] getNetworkContainer") +func (service *httpRestService) getNetworkContainerByID(w http.ResponseWriter, r *http.Request) { + log.Printf("[Azure CNS] getNetworkContainerByID") var req cns.GetNetworkContainerRequest - returnMessage := "" returnCode := 0 - err := service.Listener.Decode(w, r, &req) + err := service.Listener.Decode(w, r, &req) log.Request(service.Name, &req, err) - if err != nil { return } @@ -890,22 +960,81 @@ func (service *httpRestService) getNetworkContainer(w http.ResponseWriter, r *ht reserveResp := &cns.GetNetworkContainerResponse{Response: resp} err = service.Listener.Encode(w, &reserveResp) - log.Response(service.Name, reserveResp, err) +} + +func (service *httpRestService) getNetworkContainerResponse(req cns.GetNetworkContainerRequest) cns.GetNetworkContainerResponse { + var containerID string + var getNetworkContainerResponse cns.GetNetworkContainerResponse + + switch service.state.OrchestratorType { + case cns.Kubernetes: + var podInfo cns.KubernetesPodInfo + err := json.Unmarshal(req.OrchestratorContext, &podInfo) + if err != nil { + getNetworkContainerResponse.Response.ReturnCode = UnexpectedError + getNetworkContainerResponse.Response.Message = fmt.Sprintf("Unmarshalling orchestrator context failed with error %v", err) + return getNetworkContainerResponse + } + + log.Printf("pod info %+v", podInfo) + containerID = service.state.ContainerIDByOrchestratorContext[podInfo.PodName+podInfo.PodNamespace] + log.Printf("containerid %v", containerID) + break + + default: + getNetworkContainerResponse.Response.ReturnCode = UnsupportedOrchestratorType + getNetworkContainerResponse.Response.Message = fmt.Sprintf("Invalid orchestrator type %v", service.state.OrchestratorType) + return getNetworkContainerResponse + } + + containerStatus := service.state.ContainerStatus + containerDetails, ok := containerStatus[containerID] + if !ok { + getNetworkContainerResponse.Response.ReturnCode = UnknownContainerID + getNetworkContainerResponse.Response.Message = "NetworkContainer doesn't exist." + return getNetworkContainerResponse + } + savedReq := containerDetails.CreateNetworkContainerRequest + getNetworkContainerResponse = cns.GetNetworkContainerResponse{ + IPConfiguration: savedReq.IPConfiguration, + Routes: savedReq.Routes, + CnetAddressSpace: savedReq.CnetAddressSpace, + MultiTenancyInfo: savedReq.MultiTenancyInfo, + PrimaryInterfaceIdentifier: savedReq.PrimaryInterfaceIdentifier, + LocalIPConfiguration: savedReq.LocalIPConfiguration, + } + + return getNetworkContainerResponse +} + +func (service *httpRestService) getNetworkContainerByOrchestratorContext(w http.ResponseWriter, r *http.Request) { + log.Printf("[Azure CNS] getNetworkContainerByOrchestratorContext") + + var req cns.GetNetworkContainerRequest + + err := service.Listener.Decode(w, r, &req) + log.Request(service.Name, &req, err) + if err != nil { + return + } + + getNetworkContainerResponse := service.getNetworkContainerResponse(req) + + err = service.Listener.Encode(w, &getNetworkContainerResponse) + log.Response(service.Name, getNetworkContainerResponse, err) } func (service *httpRestService) deleteNetworkContainer(w http.ResponseWriter, r *http.Request) { log.Printf("[Azure CNS] deleteNetworkContainer") var req cns.DeleteNetworkContainerRequest - returnMessage := "" returnCode := 0 - err := service.Listener.Decode(w, r, &req) + err := service.Listener.Decode(w, r, &req) log.Request(service.Name, &req, err) - if err != nil { return } @@ -917,20 +1046,40 @@ func (service *httpRestService) deleteNetworkContainer(w http.ResponseWriter, r switch r.Method { case "POST": - nc := service.networkContainer - err := nc.Delete(req.NetworkContainerid) + var containerStatus containerstatus + var ok bool - if err != nil { - returnMessage = fmt.Sprintf("[Azure CNS] Error. DeleteNetworkContainer failed %v", err.Error()) - returnCode = UnexpectedError + if containerStatus, ok = service.state.ContainerStatus[req.NetworkContainerid]; !ok { + log.Printf("Not able to retrieve network container details for this container id %v", req.NetworkContainerid) break - } else { - service.lock.Lock() - if service.state.ContainerStatus != nil { - delete(service.state.ContainerStatus, req.NetworkContainerid) + } + + if containerStatus.CreateNetworkContainerRequest.NetworkContainerType == cns.WebApps { + nc := service.networkContainer + if err := nc.Delete(req.NetworkContainerid); err != nil { + returnMessage = fmt.Sprintf("[Azure CNS] Error. DeleteNetworkContainer failed %v", err.Error()) + returnCode = UnexpectedError + break } - service.lock.Unlock() } + + service.lock.Lock() + defer service.lock.Unlock() + + if service.state.ContainerStatus != nil { + delete(service.state.ContainerStatus, req.NetworkContainerid) + } + + if service.state.ContainerIDByOrchestratorContext != nil { + for orchestratorContext, networkContainerID := range service.state.ContainerIDByOrchestratorContext { + if networkContainerID == req.NetworkContainerid { + delete(service.state.ContainerIDByOrchestratorContext, orchestratorContext) + break + } + } + } + + service.saveState() break default: returnMessage = "[Azure CNS] Error. DeleteNetworkContainer did not receive a POST." @@ -954,9 +1103,9 @@ func (service *httpRestService) getNetworkContainerStatus(w http.ResponseWriter, var req cns.GetNetworkContainerStatusRequest returnMessage := "" returnCode := 0 + err := service.Listener.Decode(w, r, &req) log.Request(service.Name, &req, err) - if err != nil { return } @@ -1017,9 +1166,9 @@ func (service *httpRestService) getInterfaceForContainer(w http.ResponseWriter, var req cns.GetInterfaceForContainerRequest returnMessage := "" returnCode := 0 + err := service.Listener.Decode(w, r, &req) log.Request(service.Name, &req, err) - if err != nil { return } @@ -1028,12 +1177,12 @@ func (service *httpRestService) getInterfaceForContainer(w http.ResponseWriter, containerDetails, ok := containerInfo[req.NetworkContainerID] var interfaceName string var ipaddress string - var vnetSpace []cns.IPSubnet + var cnetSpace []cns.IPSubnet if ok { savedReq := containerDetails.CreateNetworkContainerRequest interfaceName = savedReq.NetworkContainerid - vnetSpace = savedReq.VnetAddressSpace + cnetSpace = savedReq.CnetAddressSpace ipaddress = savedReq.IPConfiguration.IPSubnet.IPAddress // it has to exist } else { returnMessage = "[Azure CNS] Never received call to create this container." @@ -1050,7 +1199,7 @@ func (service *httpRestService) getInterfaceForContainer(w http.ResponseWriter, getInterfaceForContainerResponse := cns.GetInterfaceForContainerResponse{ Response: resp, NetworkInterface: cns.NetworkInterface{Name: interfaceName, IPAddress: ipaddress}, - VnetAddressSpace: vnetSpace, + CnetAddressSpace: cnetSpace, } err = service.Listener.Encode(w, &getInterfaceForContainerResponse) @@ -1093,7 +1242,7 @@ func (service *httpRestService) restoreNetworkState() error { } if enableSnat { - err := common.SetOutboundSNAT(nwInfo.NicInfo.Subnet) + err := platform.SetOutboundSNAT(nwInfo.NicInfo.Subnet) if err != nil { log.Printf("[Azure CNS] Error setting up SNAT outbound rule %v", err) return err diff --git a/cns/restserver/restserver_test.go b/cns/restserver/restserver_test.go index a4affa1a3e..1d9c421a62 100644 --- a/cns/restserver/restserver_test.go +++ b/cns/restserver/restserver_test.go @@ -266,7 +266,36 @@ func TestGetUnhealthyIPAddresses(t *testing.T) { } } -func creatOrUpdateWebAppContainerWithName(t *testing.T, name string, ip string) error { +func setOrchestratorType(t *testing.T, orchestratorType string) error { + var body bytes.Buffer + + info := &cns.SetOrchestratorTypeRequest{OrchestratorType: orchestratorType} + + json.NewEncoder(&body).Encode(info) + + req, err := http.NewRequest(http.MethodPost, cns.SetOrchestratorType, &body) + if err != nil { + t.Fatal(err) + } + + w := httptest.NewRecorder() + mux.ServeHTTP(w, req) + + var resp cns.Response + err = decodeResponse(w, &resp) + fmt.Printf("Raw response: %+v", w.Body) + if err != nil || resp.ReturnCode != 0 { + t.Errorf("setOrchestratorType failed with response %+v Err:%+v", resp, err) + t.Fatal(err) + } else { + fmt.Printf("setOrchestratorType passed with response %+v Err:%+v", resp, err) + } + + fmt.Printf("setOrchestratorType succeeded with response %+v\n", resp) + return nil +} + +func creatOrUpdateNetworkContainerWithName(t *testing.T, name string, ip string, containerType string) error { var body bytes.Buffer var ipConfig cns.IPConfiguration ipConfig.DNSServers = []string{"8.8.8.8", "8.8.4.4"} @@ -275,11 +304,14 @@ func creatOrUpdateWebAppContainerWithName(t *testing.T, name string, ip string) ipSubnet.IPAddress = ip ipSubnet.PrefixLength = 24 ipConfig.IPSubnet = ipSubnet + podInfo := cns.KubernetesPodInfo{PodName: "testpod", PodNamespace: "testpodnamespace"} + context, _ := json.Marshal(podInfo) info := &cns.CreateNetworkContainerRequest{ Version: "0.1", - NetworkContainerType: "WebApps", + NetworkContainerType: containerType, NetworkContainerid: name, + OrchestratorContext: context, IPConfiguration: ipConfig, PrimaryInterfaceIdentifier: "11.0.0.7", } @@ -335,6 +367,60 @@ func deleteNetworkAdapterWithName(t *testing.T, name string) error { return nil } +func getNetworkCotnainerByContext(t *testing.T, name string) error { + var body bytes.Buffer + var resp cns.GetNetworkContainerResponse + + podInfo := cns.KubernetesPodInfo{PodName: "testpod", PodNamespace: "testpodnamespace"} + podInfoBytes, err := json.Marshal(podInfo) + getReq := &cns.GetNetworkContainerRequest{OrchestratorContext: podInfoBytes} + + json.NewEncoder(&body).Encode(getReq) + req, err := http.NewRequest(http.MethodPost, cns.GetNetworkContainerByOrchestratorContext, &body) + if err != nil { + t.Fatal(err) + } + + w := httptest.NewRecorder() + mux.ServeHTTP(w, req) + + err = decodeResponse(w, &resp) + if err != nil || resp.Response.ReturnCode != 0 { + t.Errorf("GetNetworkContainerByContext failed with response %+v Err:%+v", resp, err) + t.Fatal(err) + } + + fmt.Printf("**GetNetworkContainerByContext succeded with response %+v, raw:%+v\n", resp, w.Body) + return nil +} + +func getNonExistNetworkCotnainerByContext(t *testing.T, name string) error { + var body bytes.Buffer + var resp cns.GetNetworkContainerResponse + + podInfo := cns.KubernetesPodInfo{PodName: "testpod", PodNamespace: "testpodnamespace"} + podInfoBytes, err := json.Marshal(podInfo) + getReq := &cns.GetNetworkContainerRequest{OrchestratorContext: podInfoBytes} + + json.NewEncoder(&body).Encode(getReq) + req, err := http.NewRequest(http.MethodPost, cns.GetNetworkContainerByOrchestratorContext, &body) + if err != nil { + t.Fatal(err) + } + + w := httptest.NewRecorder() + mux.ServeHTTP(w, req) + + err = decodeResponse(w, &resp) + if err != nil || resp.Response.ReturnCode != UnknownContainerID { + t.Errorf("GetNetworkContainerByContext unexpected response %+v Err:%+v", resp, err) + t.Fatal(err) + } + + fmt.Printf("**GetNonExistNetworkContainerByContext succeded with response %+v, raw:%+v\n", resp, w.Body) + return nil +} + func getNetworkCotnainerStatus(t *testing.T, name string) error { var body bytes.Buffer var resp cns.GetNetworkContainerStatusResponse @@ -389,17 +475,31 @@ func getInterfaceForContainer(t *testing.T, name string) error { return nil } +func TestSetOrchestratorType(t *testing.T) { + fmt.Println("Test: TestSetOrchestratorType") + + setEnv(t) + + err := setOrchestratorType(t, cns.Kubernetes) + if err != nil { + t.Errorf("setOrchestratorType failed Err:%+v", err) + t.Fatal(err) + } +} + func TestCreateNetworkContainer(t *testing.T) { // requires more than 30 seconds to run fmt.Println("Test: TestCreateNetworkContainer") + setEnv(t) - err := creatOrUpdateWebAppContainerWithName(t, "ethWebApp", "11.0.0.5") + + err := creatOrUpdateNetworkContainerWithName(t, "ethWebApp", "11.0.0.5", "WebApps") if err != nil { t.Errorf("creatOrUpdateWebAppContainerWithName failed Err:%+v", err) t.Fatal(err) } - err = creatOrUpdateWebAppContainerWithName(t, "ethWebApp", "11.0.0.6") + err = creatOrUpdateNetworkContainerWithName(t, "ethWebApp", "11.0.0.6", "WebApps") if err != nil { t.Errorf("Updating interface failed Err:%+v", err) t.Fatal(err) @@ -414,11 +514,48 @@ func TestCreateNetworkContainer(t *testing.T) { } } +func TestGetNetworkContainerByOrchestratorContext(t *testing.T) { + // requires more than 30 seconds to run + fmt.Println("Test: TestGetNetworkContainerByOrchestratorContext") + + setEnv(t) + setOrchestratorType(t, cns.Kubernetes) + + err := creatOrUpdateNetworkContainerWithName(t, "ethWebApp", "11.0.0.5", "AzureContainerInstance") + if err != nil { + t.Errorf("creatOrUpdateNetworkContainerWithName failed Err:%+v", err) + t.Fatal(err) + } + + fmt.Println("Now calling getNetworkCotnainerStatus") + err = getNetworkCotnainerByContext(t, "ethWebApp") + if err != nil { + t.Errorf("TestGetNetworkContainerByOrchestratorContext failed Err:%+v", err) + t.Fatal(err) + } + + fmt.Println("Now calling DeleteNetworkContainer") + + err = deleteNetworkAdapterWithName(t, "ethWebApp") + if err != nil { + t.Errorf("Deleting interface failed Err:%+v", err) + t.Fatal(err) + } + + err = getNonExistNetworkCotnainerByContext(t, "ethWebApp") + if err != nil { + t.Errorf("TestGetNetworkContainerByOrchestratorContext failed Err:%+v", err) + t.Fatal(err) + } +} + func TestGetNetworkContainerStatus(t *testing.T) { // requires more than 30 seconds to run fmt.Println("Test: TestCreateNetworkContainer") + setEnv(t) - err := creatOrUpdateWebAppContainerWithName(t, "ethWebApp", "11.0.0.5") + + err := creatOrUpdateNetworkContainerWithName(t, "ethWebApp", "11.0.0.5", "WebApps") if err != nil { t.Errorf("creatOrUpdateWebAppContainerWithName failed Err:%+v", err) t.Fatal(err) @@ -443,8 +580,10 @@ func TestGetNetworkContainerStatus(t *testing.T) { func TestGetInterfaceForNetworkContainer(t *testing.T) { // requires more than 30 seconds to run fmt.Println("Test: TestCreateNetworkContainer") + setEnv(t) - err := creatOrUpdateWebAppContainerWithName(t, "ethWebApp", "11.0.0.5") + + err := creatOrUpdateNetworkContainerWithName(t, "ethWebApp", "11.0.0.5", "WebApps") if err != nil { t.Errorf("creatOrUpdateWebAppContainerWithName failed Err:%+v", err) t.Fatal(err) diff --git a/cns/service/main.go b/cns/service/main.go index 32039631b0..4a4ee0a98d 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -70,6 +70,8 @@ var args = acn.ArgumentList{ acn.OptLogTargetSyslog: log.TargetSyslog, acn.OptLogTargetStderr: log.TargetStderr, acn.OptLogTargetFile: log.TargetLogfile, + acn.OptLogStdout: log.TargetStdout, + acn.OptLogMultiWrite: log.TargetStdOutAndLogFile, }, }, { @@ -100,6 +102,13 @@ var args = acn.ArgumentList{ Type: "string", DefaultValue: "", }, + { + Name: acn.OptStopAzureVnet, + Shorthand: acn.OptStopAzureVnetAlias, + Description: "Stop Azure-CNM if flag is true", + Type: "bool", + DefaultValue: false, + }, { Name: acn.OptVersion, Shorthand: acn.OptVersionAlias, @@ -117,6 +126,7 @@ func printVersion() { // Main is the entry point for CNS. func main() { + var stopcnm = false // Initialize and parse command line arguments. acn.ParseArgs(&args, printVersion) @@ -128,6 +138,7 @@ func main() { logDirectory := acn.GetArg(acn.OptLogLocation).(string) ipamQueryUrl, _ := acn.GetArg(acn.OptIpamQueryUrl).(string) ipamQueryInterval, _ := acn.GetArg(acn.OptIpamQueryInterval).(int) + stopcnm = acn.GetArg(acn.OptStopAzureVnet).(bool) vers := acn.GetArg(acn.OptVersion).(bool) if vers { @@ -143,70 +154,43 @@ func main() { // Create a channel to receive unhandled errors from CNS. config.ErrChan = make(chan error, 1) - // Create the key value store. var err error - config.Store, err = store.NewJsonFileStore(platform.CNMRuntimePath + name + ".json") - if err != nil { - fmt.Printf("Failed to create store: %v\n", err) - return - } - - // Create CNS object. - httpRestService, err := restserver.NewHTTPRestService(&config) - if err != nil { - fmt.Printf("Failed to create CNS object, err:%v.\n", err) - return + // Create logging provider. + log.SetName(name) + log.SetLevel(logLevel) + if logDirectory != "" { + log.SetLogDirectory(logDirectory) } - var pluginConfig acn.PluginConfig - pluginConfig.Version = version - - // Create a channel to receive unhandled errors from the plugins. - pluginConfig.ErrChan = make(chan error, 1) - - // Create network plugin. - netPlugin, err := network.NewPlugin(&pluginConfig) + err = log.SetTarget(logTarget) if err != nil { - fmt.Printf("Failed to create network plugin, err:%v.\n", err) + fmt.Printf("Failed to configure logging: %v\n", err) return } - // Create IPAM plugin. - ipamPlugin, err := ipam.NewPlugin(&pluginConfig) - if err != nil { - fmt.Printf("Failed to create IPAM plugin, err:%v.\n", err) - return - } + // Log platform information. + log.Printf("Running on %v", platform.GetOSInfo()) err = acn.CreateDirectory(platform.CNMRuntimePath) if err != nil { - fmt.Printf("Failed to create File Store directory Error:%v", err.Error()) + log.Printf("Failed to create File Store directory Error:%v", err.Error()) return } // Create the key value store. - pluginConfig.Store, err = store.NewJsonFileStore(platform.CNMRuntimePath + pluginName + ".json") + config.Store, err = store.NewJsonFileStore(platform.CNMRuntimePath + name + ".json") if err != nil { - fmt.Printf("Failed to create store: %v\n", err) + log.Printf("Failed to create store: %v\n", err) return } - // Create logging provider. - log.SetName(name) - log.SetLevel(logLevel) - if logDirectory != "" { - log.SetLogDirectory(logDirectory) - } - - err = log.SetTarget(logTarget) + // Create CNS object. + httpRestService, err := restserver.NewHTTPRestService(&config) if err != nil { - fmt.Printf("Failed to configure logging: %v\n", err) + log.Printf("Failed to create CNS object, err:%v.\n", err) return } - // Log platform information. - log.Printf("Running on %v", platform.GetOSInfo()) - // Set CNS options. httpRestService.SetOption(acn.OptCnsURL, cnsURL) @@ -214,32 +198,56 @@ func main() { if httpRestService != nil { err = httpRestService.Start(&config) if err != nil { - fmt.Printf("Failed to start CNS, err:%v.\n", err) + log.Printf("Failed to start CNS, err:%v.\n", err) return } } - // Set plugin options. - netPlugin.SetOption(acn.OptAPIServerURL, url) + var netPlugin network.NetPlugin + var ipamPlugin ipam.IpamPlugin - ipamPlugin.SetOption(acn.OptEnvironment, environment) - ipamPlugin.SetOption(acn.OptAPIServerURL, url) - ipamPlugin.SetOption(acn.OptIpamQueryUrl, ipamQueryUrl) - ipamPlugin.SetOption(acn.OptIpamQueryInterval, ipamQueryInterval) + if !stopcnm { + var pluginConfig acn.PluginConfig + pluginConfig.Version = version - if netPlugin != nil { - log.Printf("Start netplugin\n") - err = netPlugin.Start(&pluginConfig) + // Create a channel to receive unhandled errors from the plugins. + pluginConfig.ErrChan = make(chan error, 1) + + // Create network plugin. + netPlugin, err = network.NewPlugin(&pluginConfig) if err != nil { - fmt.Printf("Failed to start network plugin, err:%v.\n", err) + log.Printf("Failed to create network plugin, err:%v.\n", err) return } - } - if ipamPlugin != nil { - err = ipamPlugin.Start(&pluginConfig) + // Create IPAM plugin. + ipamPlugin, err = ipam.NewPlugin(&pluginConfig) if err != nil { - fmt.Printf("Failed to start IPAM plugin, err:%v.\n", err) + log.Printf("Failed to create IPAM plugin, err:%v.\n", err) + return + } + + // Create the key value store. + pluginConfig.Store, err = store.NewJsonFileStore(platform.CNMRuntimePath + pluginName + ".json") + if err != nil { + log.Printf("Failed to create store: %v\n", err) + return + } + + // Set plugin options. + netPlugin.SetOption(acn.OptAPIServerURL, url) + log.Printf("Start netplugin\n") + if err := netPlugin.Start(&pluginConfig); err != nil { + log.Printf("Failed to create network plugin, err:%v.\n", err) + return + } + + ipamPlugin.SetOption(acn.OptEnvironment, environment) + ipamPlugin.SetOption(acn.OptAPIServerURL, url) + ipamPlugin.SetOption(acn.OptIpamQueryUrl, ipamQueryUrl) + ipamPlugin.SetOption(acn.OptIpamQueryInterval, ipamQueryInterval) + if err := ipamPlugin.Start(&pluginConfig); err != nil { + log.Printf("Failed to create IPAM plugin, err:%v.\n", err) return } } @@ -261,11 +269,15 @@ func main() { httpRestService.Stop() } - if netPlugin != nil { - netPlugin.Stop() - } + if !stopcnm { + if netPlugin != nil { + netPlugin.Stop() + } - if ipamPlugin != nil { - ipamPlugin.Stop() + if ipamPlugin != nil { + ipamPlugin.Stop() + } } + + log.Close() } diff --git a/common/config.go b/common/config.go index d670c08a5b..c986cc990f 100644 --- a/common/config.go +++ b/common/config.go @@ -29,6 +29,8 @@ const ( OptLogTargetSyslog = "syslog" OptLogTargetStderr = "stderr" OptLogTargetFile = "logfile" + OptLogStdout = "stdout" + OptLogMultiWrite = "stdoutfile" // Logging location OptLogLocation = "log-location" @@ -42,6 +44,10 @@ const ( OptIpamQueryInterval = "ipam-query-interval" OptIpamQueryIntervalAlias = "i" + // Don't Start CNM + OptStopAzureVnet = "stop-azure-cnm" + OptStopAzureVnetAlias = "stopcnm" + // Version. OptVersion = "version" OptVersionAlias = "v" diff --git a/common/utils.go b/common/utils.go index 6b0cde1f21..ba6dcf7d51 100644 --- a/common/utils.go +++ b/common/utils.go @@ -4,6 +4,7 @@ package common import ( + "encoding/binary" "encoding/xml" "net" "os" @@ -74,3 +75,31 @@ func CreateDirectory(dirPath string) error { return err } + +func IpToInt(ip net.IP) uint32 { + if len(ip) == 16 { + return binary.BigEndian.Uint32(ip[12:16]) + } + + return binary.BigEndian.Uint32(ip) +} + +func GetInterfaceSubnetWithSpecificIp(ipAddr string) *net.IPNet { + addrs, err := net.InterfaceAddrs() + if err != nil { + log.Printf("InterfaceAddrs failed with %+v", err) + return nil + } + + for _, a := range addrs { + if ipnet, ok := a.(*net.IPNet); ok && !ipnet.IP.IsLoopback() { + if ipnet.IP.To4() != nil { + if ipnet.IP.String() == ipAddr { + return ipnet + } + } + } + } + + return nil +} diff --git a/log/logger.go b/log/logger.go index 033b5d1232..e5176d6688 100644 --- a/log/logger.go +++ b/log/logger.go @@ -26,6 +26,8 @@ const ( TargetStderr = iota TargetSyslog TargetLogfile + TargetStdout + TargetStdOutAndLogFile ) const ( diff --git a/log/logger_linux.go b/log/logger_linux.go index 78450bc38b..fcd4d6829d 100644 --- a/log/logger_linux.go +++ b/log/logger_linux.go @@ -1,39 +1,54 @@ -// Copyright 2017 Microsoft. All rights reserved. -// MIT License - -package log - -import ( - "fmt" - "log" - "log/syslog" - "os" -) - -const ( - // LogPath is the path where log files are stored. - LogPath = "/var/log/" -) - -// SetTarget sets the log target. -func (logger *Logger) SetTarget(target int) error { - var err error - - switch target { - case TargetStderr: - logger.out = os.Stderr - case TargetSyslog: - logger.out, err = syslog.New(log.LstdFlags, logger.name) - case TargetLogfile: - logger.out, err = os.OpenFile(logger.getLogFileName(), os.O_CREATE|os.O_APPEND|os.O_RDWR, logFilePerm) - default: - err = fmt.Errorf("Invalid log target %d", target) - } - - if err == nil { - logger.l.SetOutput(logger.out) - logger.target = target - } - - return err -} +// Copyright 2017 Microsoft. All rights reserved. +// MIT License + +package log + +import ( + "fmt" + "io" + "log" + "log/syslog" + "os" +) + +const ( + // LogPath is the path where log files are stored. + LogPath = "/var/log/" +) + +// SetTarget sets the log target. +func (logger *Logger) SetTarget(target int) error { + var err error + + switch target { + case TargetStdout: + logger.out = os.Stdout + + case TargetStderr: + logger.out = os.Stderr + + case TargetSyslog: + logger.out, err = syslog.New(log.LstdFlags, logger.name) + + case TargetLogfile: + logger.out, err = os.OpenFile(logger.getLogFileName(), os.O_CREATE|os.O_APPEND|os.O_RDWR, logFilePerm) + + case TargetStdOutAndLogFile: + logger.out, err = os.OpenFile(logger.getLogFileName(), os.O_CREATE|os.O_APPEND|os.O_RDWR, logFilePerm) + if err == nil { + logger.l.SetOutput(io.MultiWriter(os.Stdout, logger.out)) + logger.target = target + return nil + } + + default: + err = fmt.Errorf("Invalid log target %d", target) + } + + if err == nil { + logger.l.SetOutput(logger.out) + logger.target = target + } + + return err +} diff --git a/log/logger_windows.go b/log/logger_windows.go index 815e93faa5..3e5a5bc6ec 100644 --- a/log/logger_windows.go +++ b/log/logger_windows.go @@ -1,35 +1,46 @@ -// Copyright 2017 Microsoft. All rights reserved. -// MIT License - -package log - -import ( - "fmt" - "os" -) - -const ( - // LogPath is the path where log files are stored. - LogPath = "" -) - -// SetTarget sets the log target. -func (logger *Logger) SetTarget(target int) error { - var err error - - switch target { - case TargetStderr: - logger.out = os.Stderr - case TargetLogfile: - logger.out, err = os.OpenFile(logger.getLogFileName(), os.O_CREATE|os.O_APPEND|os.O_RDWR, logFilePerm) - default: - err = fmt.Errorf("Invalid log target %d", target) - } - - if err == nil { - logger.l.SetOutput(logger.out) - logger.target = target - } - - return err -} +// Copyright 2017 Microsoft. All rights reserved. +// MIT License + +package log + +import ( + "fmt" + "io" + "os" +) + +const ( + // LogPath is the path where log files are stored. + LogPath = "" +) + +// SetTarget sets the log target. +func (logger *Logger) SetTarget(target int) error { + var err error + + switch target { + case TargetStderr: + logger.out = os.Stderr + + case TargetLogfile: + logger.out, err = os.OpenFile(logger.getLogFileName(), os.O_CREATE|os.O_APPEND|os.O_RDWR, logFilePerm) + + case TargetStdOutAndLogFile: + logger.out, err = os.OpenFile(logger.getLogFileName(), os.O_CREATE|os.O_APPEND|os.O_RDWR, logFilePerm) + if err == nil { + logger.l.SetOutput(io.MultiWriter(os.Stdout, logger.out)) + logger.target = target + return nil + } + + default: + err = fmt.Errorf("Invalid log target %d", target) + } + + if err == nil { + logger.l.SetOutput(logger.out) + logger.target = target + } + + return err +} diff --git a/network/api.go b/network/api.go index 9ce77c4dde..4b6b93ba81 100644 --- a/network/api.go +++ b/network/api.go @@ -17,6 +17,4 @@ var ( errEndpointNotFound = fmt.Errorf("Endpoint not found") errEndpointInUse = fmt.Errorf("Endpoint is already joined to a sandbox") errEndpointNotInUse = fmt.Errorf("Endpoint is not joined to a sandbox") - - OptVethName = "vethname" ) diff --git a/network/bridge_endpointclient_linux.go b/network/bridge_endpointclient_linux.go new file mode 100644 index 0000000000..9f1d95e363 --- /dev/null +++ b/network/bridge_endpointclient_linux.go @@ -0,0 +1,153 @@ +package network + +import ( + "net" + + "github.com/Azure/azure-container-networking/ebtables" + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/netlink" +) + +type LinuxBridgeEndpointClient struct { + bridgeName string + hostPrimaryIfName string + hostVethName string + containerVethName string + hostPrimaryMac net.HardwareAddr + containerMac net.HardwareAddr + mode string +} + +func NewLinuxBridgeEndpointClient( + extIf *externalInterface, + hostVethName string, + containerVethName string, + mode string, +) *LinuxBridgeEndpointClient { + + client := &LinuxBridgeEndpointClient{ + bridgeName: extIf.BridgeName, + hostPrimaryIfName: extIf.Name, + hostVethName: hostVethName, + containerVethName: containerVethName, + hostPrimaryMac: extIf.MacAddress, + mode: mode, + } + + return client +} + +func (client *LinuxBridgeEndpointClient) AddEndpoints(epInfo *EndpointInfo) error { + if err := createEndpoint(client.hostVethName, client.containerVethName); err != nil { + return err + } + + containerIf, err := net.InterfaceByName(client.containerVethName) + if err != nil { + return err + } + + client.containerMac = containerIf.HardwareAddr + return nil +} + +func (client *LinuxBridgeEndpointClient) AddEndpointRules(epInfo *EndpointInfo) error { + var err error + + log.Printf("[net] Setting link %v master %v.", client.hostVethName, client.bridgeName) + if err := netlink.SetLinkMaster(client.hostVethName, client.bridgeName); err != nil { + return err + } + + for _, ipAddr := range epInfo.IPAddresses { + // Add ARP reply rule. + log.Printf("[net] Adding ARP reply rule for IP address %v", ipAddr.String()) + if err = ebtables.SetArpReply(ipAddr.IP, client.getArpReplyAddress(client.containerMac), ebtables.Append); err != nil { + return err + } + + // Add MAC address translation rule. + log.Printf("[net] Adding MAC DNAT rule for IP address %v", ipAddr.String()) + if err := ebtables.SetDnatForIPAddress(client.hostPrimaryIfName, ipAddr.IP, client.containerMac, ebtables.Append); err != nil { + return err + } + } + + return nil +} + +func (client *LinuxBridgeEndpointClient) DeleteEndpointRules(ep *endpoint) { + // Delete rules for IP addresses on the container interface. + for _, ipAddr := range ep.IPAddresses { + // Delete ARP reply rule. + log.Printf("[net] Deleting ARP reply rule for IP address %v on %v.", ipAddr.String(), ep.Id) + err := ebtables.SetArpReply(ipAddr.IP, client.getArpReplyAddress(ep.MacAddress), ebtables.Delete) + if err != nil { + log.Printf("[net] Failed to delete ARP reply rule for IP address %v: %v.", ipAddr.String(), err) + } + + // Delete MAC address translation rule. + log.Printf("[net] Deleting MAC DNAT rule for IP address %v on %v.", ipAddr.String(), ep.Id) + err = ebtables.SetDnatForIPAddress(client.hostPrimaryIfName, ipAddr.IP, ep.MacAddress, ebtables.Delete) + if err != nil { + log.Printf("[net] Failed to delete MAC DNAT rule for IP address %v: %v.", ipAddr.String(), err) + } + } +} + +// getArpReplyAddress returns the MAC address to use in ARP replies. +func (client *LinuxBridgeEndpointClient) getArpReplyAddress(epMacAddress net.HardwareAddr) net.HardwareAddr { + var macAddress net.HardwareAddr + + if client.mode == opModeTunnel { + // In tunnel mode, resolve all IP addresses to the virtual MAC address for hairpinning. + macAddress, _ = net.ParseMAC(virtualMacAddress) + } else { + // Otherwise, resolve to actual MAC address. + macAddress = epMacAddress + } + + return macAddress +} + +func (client *LinuxBridgeEndpointClient) MoveEndpointsToContainerNS(epInfo *EndpointInfo, nsID uintptr) error { + // Move the container interface to container's network namespace. + log.Printf("[net] Setting link %v netns %v.", client.containerVethName, epInfo.NetNsPath) + if err := netlink.SetLinkNetNs(client.containerVethName, nsID); err != nil { + return err + } + + return nil +} + +func (client *LinuxBridgeEndpointClient) SetupContainerInterfaces(epInfo *EndpointInfo) error { + if err := setupContainerInterface(client.containerVethName, epInfo.IfName); err != nil { + return err + } + + client.containerVethName = epInfo.IfName + return nil +} + +func (client *LinuxBridgeEndpointClient) ConfigureContainerInterfacesAndRoutes(epInfo *EndpointInfo) error { + if err := assignIPToInterface(client.containerVethName, epInfo.IPAddresses); err != nil { + return err + } + + if err := addRoutes(client.containerVethName, epInfo.Routes); err != nil { + return err + } + + return nil +} + +func (client *LinuxBridgeEndpointClient) DeleteEndpoints(ep *endpoint) error { + log.Printf("[net] Deleting veth pair %v %v.", ep.HostIfName, ep.IfName) + err := netlink.DeleteLink(ep.HostIfName) + if err != nil { + log.Printf("[net] Failed to delete veth pair %v: %v.", ep.HostIfName, err) + return err + } + + return nil +} diff --git a/network/bridge_networkclient_linux.go b/network/bridge_networkclient_linux.go new file mode 100644 index 0000000000..24ebb0b79d --- /dev/null +++ b/network/bridge_networkclient_linux.go @@ -0,0 +1,107 @@ +package network + +import ( + "net" + + "github.com/Azure/azure-container-networking/ebtables" + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/netlink" +) + +type LinuxBridgeClient struct { + bridgeName string + hostInterfaceName string + mode string +} + +func NewLinuxBridgeClient(bridgeName string, hostInterfaceName string, mode string) *LinuxBridgeClient { + client := &LinuxBridgeClient{ + bridgeName: bridgeName, + mode: mode, + hostInterfaceName: hostInterfaceName, + } + + return client +} + +func (client *LinuxBridgeClient) CreateBridge() error { + log.Printf("[net] Creating bridge %v.", client.bridgeName) + + link := netlink.BridgeLink{ + LinkInfo: netlink.LinkInfo{ + Type: netlink.LINK_TYPE_BRIDGE, + Name: client.bridgeName, + }, + } + + return netlink.AddLink(&link) +} + +func (client *LinuxBridgeClient) DeleteBridge() error { + // Disconnect external interface from its bridge. + err := netlink.SetLinkMaster(client.hostInterfaceName, "") + if err != nil { + log.Printf("[net] Failed to disconnect interface %v from bridge, err:%v.", client.hostInterfaceName, err) + } + + // Delete the bridge. + err = netlink.DeleteLink(client.bridgeName) + if err != nil { + log.Printf("[net] Failed to delete bridge %v, err:%v.", client.bridgeName, err) + } + + return nil +} + +func (client *LinuxBridgeClient) AddL2Rules(extIf *externalInterface) error { + hostIf, err := net.InterfaceByName(client.hostInterfaceName) + if err != nil { + return err + } + + // Add SNAT rule to translate container egress traffic. + log.Printf("[net] Adding SNAT rule for egress traffic on %v.", client.hostInterfaceName) + if err := ebtables.SetSnatForInterface(client.hostInterfaceName, hostIf.HardwareAddr, ebtables.Append); err != nil { + return err + } + + // Add ARP reply rule for host primary IP address. + // ARP requests for all IP addresses are forwarded to the SDN fabric, but fabric + // doesn't respond to ARP requests from the VM for its own primary IP address. + primary := extIf.IPAddresses[0].IP + log.Printf("[net] Adding ARP reply rule for primary IP address %v.", primary) + if err := ebtables.SetArpReply(primary, hostIf.HardwareAddr, ebtables.Append); err != nil { + return err + } + + // Add DNAT rule to forward ARP replies to container interfaces. + log.Printf("[net] Adding DNAT rule for ingress ARP traffic on interface %v.", client.hostInterfaceName) + if err := ebtables.SetDnatForArpReplies(client.hostInterfaceName, ebtables.Append); err != nil { + return err + } + + // Enable VEPA for host policy enforcement if necessary. + if client.mode == opModeTunnel { + log.Printf("[net] Enabling VEPA mode for %v.", client.hostInterfaceName) + if err := ebtables.SetVepaMode(client.bridgeName, commonInterfacePrefix, virtualMacAddress, ebtables.Append); err != nil { + return err + } + } + + return nil +} + +func (client *LinuxBridgeClient) DeleteL2Rules(extIf *externalInterface) { + ebtables.SetVepaMode(client.bridgeName, commonInterfacePrefix, virtualMacAddress, ebtables.Delete) + ebtables.SetDnatForArpReplies(extIf.Name, ebtables.Delete) + ebtables.SetArpReply(extIf.IPAddresses[0].IP, extIf.MacAddress, ebtables.Delete) + ebtables.SetSnatForInterface(extIf.Name, extIf.MacAddress, ebtables.Delete) +} + +func (client *LinuxBridgeClient) SetBridgeMasterToHostInterface() error { + return netlink.SetLinkMaster(client.hostInterfaceName, client.bridgeName) +} + +func (client *LinuxBridgeClient) SetHairpinOnHostInterface(enable bool) error { + return netlink.SetLinkHairpin(client.hostInterfaceName, enable) +} diff --git a/network/endpoint.go b/network/endpoint.go index 502eef4f7a..ee0e945573 100644 --- a/network/endpoint.go +++ b/network/endpoint.go @@ -13,39 +13,43 @@ import ( // Endpoint represents a container network interface. type endpoint struct { - Id string - HnsId string `json:",omitempty"` - SandboxKey string - IfName string - HostIfName string - MacAddress net.HardwareAddr - IPAddresses []net.IPNet - Gateways []net.IP - DNS DNSInfo - Routes []RouteInfo + Id string + HnsId string `json:",omitempty"` + SandboxKey string + IfName string + HostIfName string + MacAddress net.HardwareAddr + IPAddresses []net.IPNet + Gateways []net.IP + DNS DNSInfo + Routes []RouteInfo + VlanID int + EnableSnatOnHost bool } // EndpointInfo contains read-only information about an endpoint. type EndpointInfo struct { - Id string - ContainerID string - NetNsPath string - IfName string - SandboxKey string - IfIndex int - MacAddress net.HardwareAddr - DNS DNSInfo - IPAddresses []net.IPNet - Routes []RouteInfo - Policies []policy.Policy - Gateways []net.IP - Data map[string]interface{} + Id string + ContainerID string + NetNsPath string + IfName string + SandboxKey string + IfIndex int + MacAddress net.HardwareAddr + DNS DNSInfo + IPAddresses []net.IPNet + Routes []RouteInfo + Policies []policy.Policy + Gateways []net.IP + EnableSnatOnHost bool + Data map[string]interface{} } // RouteInfo contains information about an IP route. type RouteInfo struct { - Dst net.IPNet - Gw net.IP + Dst net.IPNet + Gw net.IP + DevName string } // ConstructEndpointID constructs endpoint name from netNsPath. @@ -147,13 +151,14 @@ func (nw *network) getEndpoint(endpointId string) (*endpoint, error) { // GetInfo returns information about the endpoint. func (ep *endpoint) getInfo() *EndpointInfo { info := &EndpointInfo{ - Id: ep.Id, - IPAddresses: ep.IPAddresses, - Data: make(map[string]interface{}), - MacAddress: ep.MacAddress, - SandboxKey: ep.SandboxKey, - IfIndex: 0, // Azure CNI supports only one interface - DNS: ep.DNS, + Id: ep.Id, + IPAddresses: ep.IPAddresses, + Data: make(map[string]interface{}), + MacAddress: ep.MacAddress, + SandboxKey: ep.SandboxKey, + IfIndex: 0, // Azure CNI supports only one interface + DNS: ep.DNS, + EnableSnatOnHost: ep.EnableSnatOnHost, } for _, route := range ep.Routes { diff --git a/network/endpoint_common_linux.go b/network/endpoint_common_linux.go new file mode 100644 index 0000000000..d96934d0c5 --- /dev/null +++ b/network/endpoint_common_linux.go @@ -0,0 +1,128 @@ +package network + +import ( + "net" + "strings" + + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/netlink" +) + +func createEndpoint(hostVethName string, containerVethName string) error { + log.Printf("[net] Creating veth pair %v %v.", hostVethName, containerVethName) + + link := netlink.VEthLink{ + LinkInfo: netlink.LinkInfo{ + Type: netlink.LINK_TYPE_VETH, + Name: hostVethName, + }, + PeerName: containerVethName, + } + + err := netlink.AddLink(&link) + if err != nil { + log.Printf("[net] Failed to create veth pair, err:%v.", err) + return err + } + + log.Printf("[net] Setting link %v state up.", hostVethName) + err = netlink.SetLinkState(hostVethName, true) + if err != nil { + return err + } + + return nil +} + +func setupContainerInterface(containerVethName string, targetIfName string) error { + // Interface needs to be down before renaming. + log.Printf("[net] Setting link %v state down.", containerVethName) + if err := netlink.SetLinkState(containerVethName, false); err != nil { + return err + } + + // Rename the container interface. + log.Printf("[net] Setting link %v name %v.", containerVethName, targetIfName) + if err := netlink.SetLinkName(containerVethName, targetIfName); err != nil { + return err + } + + // Bring the interface back up. + log.Printf("[net] Setting link %v state up.", targetIfName) + return netlink.SetLinkState(targetIfName, true) +} + +func assignIPToInterface(interfaceName string, ipAddresses []net.IPNet) error { + // Assign IP address to container network interface. + for _, ipAddr := range ipAddresses { + log.Printf("[net] Adding IP address %v to link %v.", ipAddr.String(), interfaceName) + err := netlink.AddIpAddress(interfaceName, ipAddr.IP, &ipAddr) + if err != nil { + return err + } + } + + return nil +} + +func addRoutes(interfaceName string, routes []RouteInfo) error { + ifIndex := 0 + interfaceIf, _ := net.InterfaceByName(interfaceName) + + for _, route := range routes { + log.Printf("[ovs] Adding IP route %+v to link %v.", route, interfaceName) + + if route.DevName != "" { + devIf, _ := net.InterfaceByName(route.DevName) + ifIndex = devIf.Index + } else { + ifIndex = interfaceIf.Index + } + + nlRoute := &netlink.Route{ + Family: netlink.GetIpAddressFamily(route.Gw), + Dst: &route.Dst, + Gw: route.Gw, + LinkIndex: ifIndex, + } + + if err := netlink.AddIpRoute(nlRoute); err != nil { + if !strings.Contains(strings.ToLower(err.Error()), "file exists") { + return err + } else { + log.Printf("route already exists") + } + } + } + + return nil +} + +func deleteRoutes(interfaceName string, routes []RouteInfo) error { + ifIndex := 0 + interfaceIf, _ := net.InterfaceByName(interfaceName) + + for _, route := range routes { + log.Printf("[ovs] Adding IP route %+v to link %v.", route, interfaceName) + + if route.DevName != "" { + devIf, _ := net.InterfaceByName(route.DevName) + ifIndex = devIf.Index + } else { + ifIndex = interfaceIf.Index + } + + nlRoute := &netlink.Route{ + Family: netlink.GetIpAddressFamily(route.Gw), + Dst: &route.Dst, + Gw: route.Gw, + LinkIndex: ifIndex, + } + + if err := netlink.DeleteIpRoute(nlRoute); err != nil { + return err + } + } + + return nil +} diff --git a/network/endpoint_linux.go b/network/endpoint_linux.go index 496b17e7ed..690fab492e 100644 --- a/network/endpoint_linux.go +++ b/network/endpoint_linux.go @@ -11,9 +11,7 @@ import ( "fmt" "net" - "github.com/Azure/azure-container-networking/ebtables" "github.com/Azure/azure-container-networking/log" - "github.com/Azure/azure-container-networking/netlink" ) const ( @@ -41,6 +39,8 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { var err error var hostIfName string var contIfName string + var epClient EndpointClient + var vlanid int = 0 if nw.Endpoints[epInfo.Id] != nil { log.Printf("[net] Endpoint alreday exists.") @@ -48,6 +48,12 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { return nil, err } + if epInfo.Data != nil { + if _, ok := epInfo.Data[VlanIDKey]; ok { + vlanid = epInfo.Data[VlanIDKey].(int) + } + } + if _, ok := epInfo.Data[OptVethName]; ok { log.Printf("Generate veth name based on the key provided") key := epInfo.Data[OptVethName].(string) @@ -61,72 +67,53 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { contIfName = fmt.Sprintf("%s%s-2", hostVEthInterfacePrefix, epInfo.Id[:7]) } - log.Printf("[net] Creating veth pair %v %v.", hostIfName, contIfName) - - link := netlink.VEthLink{ - LinkInfo: netlink.LinkInfo{ - Type: netlink.LINK_TYPE_VETH, - Name: contIfName, - }, - PeerName: hostIfName, - } - - err = netlink.AddLink(&link) - if err != nil { - log.Printf("[net] Failed to create veth pair, err:%v.", err) - return nil, err + if vlanid != 0 { + epClient = NewOVSEndpointClient( + nw.extIf, + epInfo, + hostIfName, + contIfName, + vlanid) + } else { + epClient = NewLinuxBridgeEndpointClient(nw.extIf, hostIfName, contIfName, nw.Mode) } - // On failure, delete the veth pair. + // Cleanup on failure. defer func() { if err != nil { - netlink.DeleteLink(contIfName) - } - }() + log.Printf("CNI error. Delete Endpoint %v and rules that are created.", contIfName) + endpt := &endpoint{ + Id: epInfo.Id, + IfName: contIfName, + HostIfName: hostIfName, + IPAddresses: epInfo.IPAddresses, + Gateways: []net.IP{nw.extIf.IPv4Gateway}, + DNS: epInfo.DNS, + VlanID: vlanid, + EnableSnatOnHost: epInfo.EnableSnatOnHost, + } - // - // Host network interface setup. - // + if containerIf != nil { + endpt.MacAddress = containerIf.HardwareAddr + epClient.DeleteEndpointRules(endpt) + } - // Host interface up. - log.Printf("[net] Setting link %v state up.", hostIfName) - err = netlink.SetLinkState(hostIfName, true) - if err != nil { - return nil, err - } + epClient.DeleteEndpoints(endpt) + } + }() - // Connect host interface to the bridge. - log.Printf("[net] Setting link %v master %v.", hostIfName, nw.extIf.BridgeName) - err = netlink.SetLinkMaster(hostIfName, nw.extIf.BridgeName) - if err != nil { + if err = epClient.AddEndpoints(epInfo); err != nil { return nil, err } - // - // Container network interface setup. - // - - // Query container network interface info. containerIf, err = net.InterfaceByName(contIfName) if err != nil { return nil, err } // Setup rules for IP addresses on the container interface. - for _, ipAddr := range epInfo.IPAddresses { - // Add ARP reply rule. - log.Printf("[net] Adding ARP reply rule for IP address %v on %v.", ipAddr.String(), contIfName) - err = ebtables.SetArpReply(ipAddr.IP, nw.getArpReplyAddress(containerIf.HardwareAddr), ebtables.Append) - if err != nil { - return nil, err - } - - // Add MAC address translation rule. - log.Printf("[net] Adding MAC DNAT rule for IP address %v on %v.", ipAddr.String(), contIfName) - err = ebtables.SetDnatForIPAddress(nw.extIf.Name, ipAddr.IP, containerIf.HardwareAddr, ebtables.Append) - if err != nil { - return nil, err - } + if err = epClient.AddEndpointRules(epInfo); err != nil { + return nil, err } // If a network namespace for the container interface is specified... @@ -139,25 +126,20 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { } defer ns.Close() - // Move the container interface to container's network namespace. - log.Printf("[net] Setting link %v netns %v.", contIfName, epInfo.NetNsPath) - err = netlink.SetLinkNetNs(contIfName, ns.GetFd()) - if err != nil { + if err := epClient.MoveEndpointsToContainerNS(epInfo, ns.GetFd()); err != nil { return nil, err } // Enter the container network namespace. log.Printf("[net] Entering netns %v.", epInfo.NetNsPath) - err = ns.Enter() - if err != nil { + if err = ns.Enter(); err != nil { return nil, err } // Return to host network namespace. defer func() { log.Printf("[net] Exiting netns %v.", epInfo.NetNsPath) - err = ns.Exit() - if err != nil { + if err := ns.Exit(); err != nil { log.Printf("[net] Failed to exit netns, err:%v.", err) } }() @@ -165,64 +147,26 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { // If a name for the container interface is specified... if epInfo.IfName != "" { - // Interface needs to be down before renaming. - log.Printf("[net] Setting link %v state down.", contIfName) - err = netlink.SetLinkState(contIfName, false) - if err != nil { - return nil, err - } - - // Rename the container interface. - log.Printf("[net] Setting link %v name %v.", contIfName, epInfo.IfName) - err = netlink.SetLinkName(contIfName, epInfo.IfName) - if err != nil { - return nil, err - } - contIfName = epInfo.IfName - - // Bring the interface back up. - log.Printf("[net] Setting link %v state up.", contIfName) - err = netlink.SetLinkState(contIfName, true) - if err != nil { + if err = epClient.SetupContainerInterfaces(epInfo); err != nil { return nil, err } } - // Assign IP address to container network interface. - for _, ipAddr := range epInfo.IPAddresses { - log.Printf("[net] Adding IP address %v to link %v.", ipAddr.String(), contIfName) - err = netlink.AddIpAddress(contIfName, ipAddr.IP, &ipAddr) - if err != nil { - return nil, err - } - } - - // Add IP routes to container network interface. - for _, route := range epInfo.Routes { - log.Printf("[net] Adding IP route %+v to link %v.", route, contIfName) - - nlRoute := &netlink.Route{ - Family: netlink.GetIpAddressFamily(route.Gw), - Dst: &route.Dst, - Gw: route.Gw, - LinkIndex: containerIf.Index, - } - - err = netlink.AddIpRoute(nlRoute) - if err != nil { - return nil, err - } + if err = epClient.ConfigureContainerInterfacesAndRoutes(epInfo); err != nil { + return nil, err } // Create the endpoint object. ep = &endpoint{ - Id: epInfo.Id, - IfName: contIfName, - HostIfName: hostIfName, - MacAddress: containerIf.HardwareAddr, - IPAddresses: epInfo.IPAddresses, - Gateways: []net.IP{nw.extIf.IPv4Gateway}, - DNS: epInfo.DNS, + Id: epInfo.Id, + IfName: contIfName, + HostIfName: hostIfName, + MacAddress: containerIf.HardwareAddr, + IPAddresses: epInfo.IPAddresses, + Gateways: []net.IP{nw.extIf.IPv4Gateway}, + DNS: epInfo.DNS, + VlanID: vlanid, + EnableSnatOnHost: epInfo.EnableSnatOnHost, } for _, route := range epInfo.Routes { @@ -234,51 +178,24 @@ func (nw *network) newEndpointImpl(epInfo *EndpointInfo) (*endpoint, error) { // deleteEndpointImpl deletes an existing endpoint from the network. func (nw *network) deleteEndpointImpl(ep *endpoint) error { + var epClient EndpointClient + // Delete the veth pair by deleting one of the peer interfaces. // Deleting the host interface is more convenient since it does not require // entering the container netns and hence works both for CNI and CNM. - log.Printf("[net] Deleting veth pair %v %v.", ep.HostIfName, ep.IfName) - err := netlink.DeleteLink(ep.HostIfName) - if err != nil { - log.Printf("[net] Failed to delete veth pair %v: %v.", ep.HostIfName, err) - return err + if ep.VlanID != 0 { + epInfo := ep.getInfo() + epClient = NewOVSEndpointClient(nw.extIf, epInfo, ep.HostIfName, "", ep.VlanID) + } else { + epClient = NewLinuxBridgeEndpointClient(nw.extIf, ep.HostIfName, "", nw.Mode) } - // Delete rules for IP addresses on the container interface. - for _, ipAddr := range ep.IPAddresses { - // Delete ARP reply rule. - log.Printf("[net] Deleting ARP reply rule for IP address %v on %v.", ipAddr.String(), ep.Id) - err = ebtables.SetArpReply(ipAddr.IP, nw.getArpReplyAddress(ep.MacAddress), ebtables.Delete) - if err != nil { - log.Printf("[net] Failed to delete ARP reply rule for IP address %v: %v.", ipAddr.String(), err) - } - - // Delete MAC address translation rule. - log.Printf("[net] Deleting MAC DNAT rule for IP address %v on %v.", ipAddr.String(), ep.Id) - err = ebtables.SetDnatForIPAddress(nw.extIf.Name, ipAddr.IP, ep.MacAddress, ebtables.Delete) - if err != nil { - log.Printf("[net] Failed to delete MAC DNAT rule for IP address %v: %v.", ipAddr.String(), err) - } - } + epClient.DeleteEndpointRules(ep) + epClient.DeleteEndpoints(ep) return nil } -// getArpReplyAddress returns the MAC address to use in ARP replies. -func (nw *network) getArpReplyAddress(epMacAddress net.HardwareAddr) net.HardwareAddr { - var macAddress net.HardwareAddr - - if nw.Mode == opModeTunnel { - // In tunnel mode, resolve all IP addresses to the virtual MAC address for hairpinning. - macAddress, _ = net.ParseMAC(virtualMacAddress) - } else { - // Otherwise, resolve to actual MAC address. - macAddress = epMacAddress - } - - return macAddress -} - // getInfoImpl returns information about the endpoint. func (ep *endpoint) getInfoImpl(epInfo *EndpointInfo) { } diff --git a/network/manager.go b/network/manager.go index 41126615c2..80e2519c9d 100644 --- a/network/manager.go +++ b/network/manager.go @@ -15,9 +15,29 @@ import ( const ( // Network store key. - storeKey = "Network" + storeKey = "Network" + VlanIDKey = "VlanID" ) +type NetworkClient interface { + CreateBridge() error + DeleteBridge() error + AddL2Rules(extIf *externalInterface) error + DeleteL2Rules(extIf *externalInterface) + SetBridgeMasterToHostInterface() error + SetHairpinOnHostInterface(bool) error +} + +type EndpointClient interface { + AddEndpoints(epInfo *EndpointInfo) error + AddEndpointRules(epInfo *EndpointInfo) error + DeleteEndpointRules(ep *endpoint) + MoveEndpointsToContainerNS(epInfo *EndpointInfo, nsID uintptr) error + SetupContainerInterfaces(epInfo *EndpointInfo) error + ConfigureContainerInterfacesAndRoutes(epInfo *EndpointInfo) error + DeleteEndpoints(ep *endpoint) error +} + // NetworkManager manages the set of container networking resources. type networkManager struct { Version string @@ -226,8 +246,11 @@ func (nm *networkManager) GetNetworkInfo(networkId string) (*NetworkInfo, error) Id: networkId, Subnets: nw.Subnets, Mode: nw.Mode, + Options: make(map[string]interface{}), } + getNetworkInfoImpl(nwInfo, nw) + if nw.extIf != nil { nwInfo.BridgeName = nw.extIf.BridgeName } @@ -245,6 +268,13 @@ func (nm *networkManager) CreateEndpoint(networkId string, epInfo *EndpointInfo) return err } + if nw.VlanId != 0 { + if epInfo.Data[VlanIDKey] == nil { + log.Printf("overriding endpoint vlanid with network vlanid") + epInfo.Data[VlanIDKey] = nw.VlanId + } + } + _, err = nw.newEndpoint(epInfo) if err != nil { return err diff --git a/network/network.go b/network/network.go index 433c342dd2..f2def56489 100644 --- a/network/network.go +++ b/network/network.go @@ -33,23 +33,26 @@ type externalInterface struct { // A container network is a set of endpoints allowed to communicate with each other. type network struct { - Id string - HnsId string `json:",omitempty"` - Mode string - Subnets []SubnetInfo - Endpoints map[string]*endpoint - extIf *externalInterface + Id string + HnsId string `json:",omitempty"` + Mode string + VlanId int + Subnets []SubnetInfo + Endpoints map[string]*endpoint + extIf *externalInterface + EnableSnatOnHost bool } // NetworkInfo contains read-only information about a container network. type NetworkInfo struct { - Id string - Mode string - Subnets []SubnetInfo - DNS DNSInfo - Policies []policy.Policy - BridgeName string - Options map[string]interface{} + Id string + Mode string + Subnets []SubnetInfo + DNS DNSInfo + Policies []policy.Policy + BridgeName string + EnableSnatOnHost bool + Options map[string]interface{} } // SubnetInfo contains subnet information for a container network. diff --git a/network/network_linux.go b/network/network_linux.go index 2915d8edf7..65a46bb3f0 100644 --- a/network/network_linux.go +++ b/network/network_linux.go @@ -8,9 +8,9 @@ package network import ( "fmt" "net" + "strconv" "strings" - "github.com/Azure/azure-container-networking/ebtables" "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/netlink" "golang.org/x/sys/unix" @@ -22,6 +22,14 @@ const ( // Virtual MAC address used by Azure VNET. virtualMacAddress = "12:34:56:78:9a:bc" + + genericData = "com.docker.network.generic" + + SnatBridgeIPKey = "snatBridgeIP" + + LocalIPKey = "localIP" + + OptVethName = "vethname" ) // Linux implementation of route. @@ -30,24 +38,35 @@ type route netlink.Route // NewNetworkImpl creates a new container network. func (nm *networkManager) newNetworkImpl(nwInfo *NetworkInfo, extIf *externalInterface) (*network, error) { // Connect the external interface. + var vlanid int + opt, _ := nwInfo.Options[genericData].(map[string]interface{}) + log.Printf("opt %+v options %+v", opt, nwInfo.Options) + switch nwInfo.Mode { case opModeTunnel: fallthrough case opModeBridge: - err := nm.connectExternalInterface(extIf, nwInfo) - if err != nil { + log.Printf("create bridge") + if err := nm.connectExternalInterface(extIf, nwInfo); err != nil { return nil, err } + + if opt != nil && opt[VlanIDKey] != nil { + vlanid, _ = strconv.Atoi(opt[VlanIDKey].(string)) + } + default: return nil, errNetworkModeInvalid } // Create the network object. nw := &network{ - Id: nwInfo.Id, - Mode: nwInfo.Mode, - Endpoints: make(map[string]*endpoint), - extIf: extIf, + Id: nwInfo.Id, + Mode: nwInfo.Mode, + Endpoints: make(map[string]*endpoint), + extIf: extIf, + VlanId: vlanid, + EnableSnatOnHost: nwInfo.EnableSnatOnHost, } return nw, nil @@ -55,9 +74,17 @@ func (nm *networkManager) newNetworkImpl(nwInfo *NetworkInfo, extIf *externalInt // DeleteNetworkImpl deletes an existing container network. func (nm *networkManager) deleteNetworkImpl(nw *network) error { + var networkClient NetworkClient + + if nw.VlanId != 0 { + networkClient = NewOVSClient(nw.extIf.BridgeName, nw.extIf.Name, "", nw.EnableSnatOnHost) + } else { + networkClient = NewLinuxBridgeClient(nw.extIf.BridgeName, nw.extIf.Name, nw.Mode) + } + // Disconnect the interface if this was the last network using it. if len(nw.extIf.Networks) == 1 { - nm.disconnectExternalInterface(nw.extIf) + nm.disconnectExternalInterface(nw.extIf, networkClient) } return nil @@ -141,56 +168,10 @@ func (nm *networkManager) applyIPConfig(extIf *externalInterface, targetIf *net. return nil } -// AddBridgeRules adds bridge frame table rules for container traffic. -func (nm *networkManager) addBridgeRules(extIf *externalInterface, hostIf *net.Interface, bridgeName string, opMode string) error { - // Add SNAT rule to translate container egress traffic. - log.Printf("[net] Adding SNAT rule for egress traffic on %v.", hostIf.Name) - err := ebtables.SetSnatForInterface(hostIf.Name, hostIf.HardwareAddr, ebtables.Append) - if err != nil { - return err - } - - // Add ARP reply rule for host primary IP address. - // ARP requests for all IP addresses are forwarded to the SDN fabric, but fabric - // doesn't respond to ARP requests from the VM for its own primary IP address. - primary := extIf.IPAddresses[0].IP - log.Printf("[net] Adding ARP reply rule for primary IP address %v.", primary) - err = ebtables.SetArpReply(primary, hostIf.HardwareAddr, ebtables.Append) - if err != nil { - return err - } - - // Add DNAT rule to forward ARP replies to container interfaces. - log.Printf("[net] Adding DNAT rule for ingress ARP traffic on interface %v.", hostIf.Name) - err = ebtables.SetDnatForArpReplies(hostIf.Name, ebtables.Append) - if err != nil { - return err - } - - // Enable VEPA for host policy enforcement if necessary. - if opMode == opModeTunnel { - log.Printf("[net] Enabling VEPA mode for %v.", hostIf.Name) - err = ebtables.SetVepaMode(bridgeName, commonInterfacePrefix, virtualMacAddress, ebtables.Append) - if err != nil { - return err - } - } - - return nil -} - -// DeleteBridgeRules deletes bridge rules for container traffic. -func (nm *networkManager) deleteBridgeRules(extIf *externalInterface) { - ebtables.SetVepaMode(extIf.BridgeName, commonInterfacePrefix, virtualMacAddress, ebtables.Delete) - ebtables.SetDnatForArpReplies(extIf.Name, ebtables.Delete) - ebtables.SetArpReply(extIf.IPAddresses[0].IP, extIf.MacAddress, ebtables.Delete) - ebtables.SetSnatForInterface(extIf.Name, extIf.MacAddress, ebtables.Delete) -} - // ConnectExternalInterface connects the given host interface to a bridge. func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwInfo *NetworkInfo) error { var err error - + var networkClient NetworkClient log.Printf("[net] Connecting interface %v.", extIf.Name) defer func() { log.Printf("[net] Connecting interface %v completed with err:%v.", extIf.Name, err) }() @@ -212,28 +193,33 @@ func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwI bridgeName = fmt.Sprintf("%s%d", bridgePrefix, hostIf.Index) } + opt, _ := nwInfo.Options[genericData].(map[string]interface{}) + if opt != nil && opt[VlanIDKey] != nil { + snatBridgeIP := "" + + if opt != nil && opt[SnatBridgeIPKey] != nil { + snatBridgeIP, _ = opt[SnatBridgeIPKey].(string) + } + + networkClient = NewOVSClient(bridgeName, extIf.Name, snatBridgeIP, nwInfo.EnableSnatOnHost) + } else { + networkClient = NewLinuxBridgeClient(bridgeName, extIf.Name, nwInfo.Mode) + } + // Check if the bridge already exists. bridge, err := net.InterfaceByName(bridgeName) if err != nil { // Create the bridge. - log.Printf("[net] Creating bridge %v.", bridgeName) - link := netlink.BridgeLink{ - LinkInfo: netlink.LinkInfo{ - Type: netlink.LINK_TYPE_BRIDGE, - Name: bridgeName, - }, - } - - err = netlink.AddLink(&link) - if err != nil { + if err := networkClient.CreateBridge(); err != nil { + log.Printf("Error while creating bridge %+v", err) return err } // On failure, delete the bridge. defer func() { if err != nil { - netlink.DeleteLink(bridgeName) + networkClient.DeleteBridge() } }() @@ -252,12 +238,6 @@ func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwI log.Printf("[net] Failed to save IP configuration for interface %v: %v.", hostIf.Name, err) } - // Add the bridge rules. - err = nm.addBridgeRules(extIf, hostIf, bridgeName, nwInfo.Mode) - if err != nil { - return err - } - // External interface down. log.Printf("[net] Setting link %v state down.", hostIf.Name) err = netlink.SetLinkState(hostIf.Name, false) @@ -267,8 +247,7 @@ func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwI // Connect the external interface to the bridge. log.Printf("[net] Setting link %v master %v.", hostIf.Name, bridgeName) - err = netlink.SetLinkMaster(hostIf.Name, bridgeName) - if err != nil { + if err := networkClient.SetBridgeMasterToHostInterface(); err != nil { return err } @@ -279,20 +258,25 @@ func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwI return err } - // External interface hairpin on. - log.Printf("[net] Setting link %v hairpin on.", hostIf.Name) - err = netlink.SetLinkHairpin(hostIf.Name, true) + // Bridge up. + log.Printf("[net] Setting link %v state up.", bridgeName) + err = netlink.SetLinkState(bridgeName, true) if err != nil { return err } - // Bridge up. - log.Printf("[net] Setting link %v state up.", bridgeName) - err = netlink.SetLinkState(bridgeName, true) + // Add the bridge rules. + err = networkClient.AddL2Rules(extIf) if err != nil { return err } + // External interface hairpin on. + log.Printf("[net] Setting link %v hairpin on.", hostIf.Name) + if err := networkClient.SetHairpinOnHostInterface(true); err != nil { + return err + } + // Apply IP configuration to the bridge for host traffic. err = nm.applyIPConfig(extIf, bridge) if err != nil { @@ -308,29 +292,23 @@ func (nm *networkManager) connectExternalInterface(extIf *externalInterface, nwI } // DisconnectExternalInterface disconnects a host interface from its bridge. -func (nm *networkManager) disconnectExternalInterface(extIf *externalInterface) error { +func (nm *networkManager) disconnectExternalInterface(extIf *externalInterface, networkClient NetworkClient) { log.Printf("[net] Disconnecting interface %v.", extIf.Name) + log.Printf("[net] Deleting bridge rules") // Delete bridge rules set on the external interface. - nm.deleteBridgeRules(extIf) + networkClient.DeleteL2Rules(extIf) - // Disconnect external interface from its bridge. - err := netlink.SetLinkMaster(extIf.Name, "") - if err != nil { - log.Printf("[net] Failed to disconnect interface %v from bridge, err:%v.", extIf.Name, err) - } - - // Delete the bridge. - err = netlink.DeleteLink(extIf.BridgeName) - if err != nil { - log.Printf("[net] Failed to delete bridge %v, err:%v.", extIf.BridgeName, err) - } + log.Printf("[net] Deleting bridge") + // Delete Bridge + networkClient.DeleteBridge() extIf.BridgeName = "" + log.Printf("Restoring ipconfig with primary interface %v", extIf.Name) // Restore IP configuration. hostIf, _ := net.InterfaceByName(extIf.Name) - err = nm.applyIPConfig(extIf, hostIf) + err := nm.applyIPConfig(extIf, hostIf) if err != nil { log.Printf("[net] Failed to apply IP configuration: %v.", err) } @@ -339,6 +317,12 @@ func (nm *networkManager) disconnectExternalInterface(extIf *externalInterface) extIf.Routes = nil log.Printf("[net] Disconnected interface %v.", extIf.Name) +} - return nil +func getNetworkInfoImpl(nwInfo *NetworkInfo, nw *network) { + if nw.VlanId != 0 { + vlanMap := make(map[string]interface{}) + vlanMap[VlanIDKey] = strconv.Itoa(nw.VlanId) + nwInfo.Options[genericData] = vlanMap + } } diff --git a/network/network_windows.go b/network/network_windows.go index 29f23cf84d..19f3bf1895 100644 --- a/network/network_windows.go +++ b/network/network_windows.go @@ -90,3 +90,6 @@ func (nm *networkManager) deleteNetworkImpl(nw *network) error { return err } + +func getNetworkInfoImpl(nwInfo *NetworkInfo, nw *network) { +} diff --git a/network/ovs_endpointclient_linux.go b/network/ovs_endpointclient_linux.go new file mode 100644 index 0000000000..848ab31c25 --- /dev/null +++ b/network/ovs_endpointclient_linux.go @@ -0,0 +1,257 @@ +package network + +import ( + "fmt" + "net" + + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/netlink" + "github.com/Azure/azure-container-networking/ovsctl" +) + +type OVSEndpointClient struct { + bridgeName string + hostPrimaryIfName string + hostVethName string + hostPrimaryMac string + containerVethName string + containerMac string + snatVethName string + snatBridgeIP string + localIP string + vlanID int + enableSnatOnHost bool +} + +const ( + snatVethInterfacePrefix = commonInterfacePrefix + "vint" + azureSnatIfName = "eth1" +) + +func NewOVSEndpointClient( + extIf *externalInterface, + epInfo *EndpointInfo, + hostVethName string, + containerVethName string, + vlanid int, +) *OVSEndpointClient { + + client := &OVSEndpointClient{ + bridgeName: extIf.BridgeName, + hostPrimaryIfName: extIf.Name, + hostVethName: hostVethName, + hostPrimaryMac: extIf.MacAddress.String(), + containerVethName: containerVethName, + vlanID: vlanid, + enableSnatOnHost: epInfo.EnableSnatOnHost, + } + + if _, ok := epInfo.Data[LocalIPKey]; ok { + client.localIP = epInfo.Data[LocalIPKey].(string) + } + + if _, ok := epInfo.Data[SnatBridgeIPKey]; ok { + client.snatBridgeIP = epInfo.Data[SnatBridgeIPKey].(string) + } + + return client +} + +func (client *OVSEndpointClient) AddEndpoints(epInfo *EndpointInfo) error { + if err := createEndpoint(client.hostVethName, client.containerVethName); err != nil { + return err + } + + containerIf, err := net.InterfaceByName(client.containerVethName) + if err != nil { + return err + } + + client.containerMac = containerIf.HardwareAddr.String() + + if client.enableSnatOnHost { + if err := createSnatBridge(client.snatBridgeIP, client.bridgeName); err != nil { + log.Printf("creating snat bridge failed with error %v", err) + return err + } + + if err := addMasqueradeRule(client.snatBridgeIP); err != nil { + log.Printf("Adding snat rule failed with error %v", err) + return err + } + + if err := addVlanDropRule(); err != nil { + log.Printf("Adding vlan drop rule failed with error %v", err) + return err + } + + if err := addStaticRoute(imdsIP, client.bridgeName); err != nil { + log.Printf("Adding imds static route failed with error %v", err) + return err + } + + hostIfName := fmt.Sprintf("%s%s", snatVethInterfacePrefix, epInfo.Id[:7]) + contIfName := fmt.Sprintf("%s%s-2", snatVethInterfacePrefix, epInfo.Id[:7]) + + if err := createEndpoint(hostIfName, contIfName); err != nil { + return err + } + + if err := netlink.SetLinkMaster(hostIfName, snatBridgeName); err != nil { + return err + } + + client.snatVethName = contIfName + } + + return nil +} + +func (client *OVSEndpointClient) AddEndpointRules(epInfo *EndpointInfo) error { + log.Printf("[ovs] Setting link %v master %v.", client.hostVethName, client.bridgeName) + if err := ovsctl.AddPortOnOVSBridge(client.hostVethName, client.bridgeName, client.vlanID); err != nil { + return err + } + + log.Printf("[ovs] Get ovs port for interface %v.", client.hostVethName) + containerPort, err := ovsctl.GetOVSPortNumber(client.hostVethName) + if err != nil { + log.Printf("[ovs] Get ofport failed with error %v", err) + return err + } + + log.Printf("[ovs] Get ovs port for interface %v.", client.hostPrimaryIfName) + hostPort, err := ovsctl.GetOVSPortNumber(client.hostPrimaryIfName) + if err != nil { + log.Printf("[ovs] Get ofport failed with error %v", err) + return err + } + + // IP SNAT Rule + log.Printf("[ovs] Adding IP SNAT rule for egress traffic on %v.", containerPort) + if err := ovsctl.AddIpSnatRule(client.bridgeName, containerPort, client.hostPrimaryMac); err != nil { + return err + } + + for _, ipAddr := range epInfo.IPAddresses { + // Add Arp Reply Rules + // Set Vlan id on arp request packet and forward it to table 1 + if err := ovsctl.AddFakeArpReply(client.bridgeName, ipAddr.IP); err != nil { + return err + } + + // Add IP DNAT rule based on dst ip and vlanid + log.Printf("[ovs] Adding MAC DNAT rule for IP address %v on %v.", ipAddr.IP.String(), hostPort) + if err := ovsctl.AddMacDnatRule(client.bridgeName, hostPort, ipAddr.IP, client.containerMac, client.vlanID); err != nil { + return err + } + } + + return nil +} + +func (client *OVSEndpointClient) DeleteEndpointRules(ep *endpoint) { + log.Printf("[ovs] Get ovs port for interface %v.", ep.HostIfName) + containerPort, err := ovsctl.GetOVSPortNumber(client.hostVethName) + if err != nil { + log.Printf("[ovs] Get portnum failed with error %v", err) + } + + log.Printf("[ovs] Get ovs port for interface %v.", client.hostPrimaryIfName) + hostPort, err := ovsctl.GetOVSPortNumber(client.hostPrimaryIfName) + if err != nil { + log.Printf("[ovs] Get portnum failed with error %v", err) + } + + // Delete IP SNAT + log.Printf("[ovs] Deleting IP SNAT for port %v", containerPort) + ovsctl.DeleteIPSnatRule(client.bridgeName, containerPort) + + // Delete Arp Reply Rules for container + log.Printf("[ovs] Deleting ARP reply rule for ip %v vlanid %v for container port", ep.IPAddresses[0].IP.String(), ep.VlanID, containerPort) + ovsctl.DeleteArpReplyRule(client.bridgeName, containerPort, ep.IPAddresses[0].IP, ep.VlanID) + + // Delete MAC address translation rule. + log.Printf("[ovs] Deleting MAC DNAT rule for IP address %v and vlan %v.", ep.IPAddresses[0].IP.String(), ep.VlanID) + ovsctl.DeleteMacDnatRule(client.bridgeName, hostPort, ep.IPAddresses[0].IP, ep.VlanID) + + // Delete port from ovs bridge + log.Printf("[ovs] Deleting interface %v from bridge %v", client.hostVethName, client.bridgeName) + ovsctl.DeletePortFromOVS(client.bridgeName, client.hostVethName) +} + +func (client *OVSEndpointClient) MoveEndpointsToContainerNS(epInfo *EndpointInfo, nsID uintptr) error { + // Move the container interface to container's network namespace. + log.Printf("[ovs] Setting link %v netns %v.", client.containerVethName, epInfo.NetNsPath) + if err := netlink.SetLinkNetNs(client.containerVethName, nsID); err != nil { + return err + } + + if client.enableSnatOnHost { + log.Printf("[ovs] Setting link %v netns %v.", client.snatVethName, epInfo.NetNsPath) + if err := netlink.SetLinkNetNs(client.snatVethName, nsID); err != nil { + return err + } + } + + return nil +} + +func (client *OVSEndpointClient) SetupContainerInterfaces(epInfo *EndpointInfo) error { + + if err := setupContainerInterface(client.containerVethName, epInfo.IfName); err != nil { + return err + } + + client.containerVethName = epInfo.IfName + + if client.enableSnatOnHost { + if err := setupContainerInterface(client.snatVethName, azureSnatIfName); err != nil { + return err + } + client.snatVethName = azureSnatIfName + } + + return nil +} + +func (client *OVSEndpointClient) ConfigureContainerInterfacesAndRoutes(epInfo *EndpointInfo) error { + if err := assignIPToInterface(client.containerVethName, epInfo.IPAddresses); err != nil { + return err + } + + if client.enableSnatOnHost { + log.Printf("[ovs] Adding IP address %v to link %v.", client.localIP, client.snatVethName) + ip, intIpAddr, _ := net.ParseCIDR(client.localIP) + if err := netlink.AddIpAddress(client.snatVethName, ip, intIpAddr); err != nil { + return err + } + } + + if err := addRoutes(client.containerVethName, epInfo.Routes); err != nil { + return err + } + + return nil +} + +func (client *OVSEndpointClient) DeleteEndpoints(ep *endpoint) error { + log.Printf("[ovs] Deleting veth pair %v %v.", ep.HostIfName, ep.IfName) + err := netlink.DeleteLink(ep.HostIfName) + if err != nil { + log.Printf("[ovs] Failed to delete veth pair %v: %v.", ep.HostIfName, err) + return err + } + + if client.enableSnatOnHost { + hostIfName := fmt.Sprintf("%s%s", snatVethInterfacePrefix, ep.Id[:7]) + log.Printf("[ovs] Deleting snat veth pair %v.", hostIfName) + err = netlink.DeleteLink(hostIfName) + if err != nil { + log.Printf("[ovs] Failed to delete veth pair %v: %v.", hostIfName, err) + return err + } + } + + return nil +} diff --git a/network/ovs_networkclient_linux.go b/network/ovs_networkclient_linux.go new file mode 100644 index 0000000000..2e8afd4520 --- /dev/null +++ b/network/ovs_networkclient_linux.go @@ -0,0 +1,326 @@ +package network + +import ( + "bytes" + "fmt" + "net" + "os" + "strings" + + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/netlink" + "github.com/Azure/azure-container-networking/ovsctl" + "github.com/Azure/azure-container-networking/platform" +) + +type OVSNetworkClient struct { + bridgeName string + hostInterfaceName string + snatBridgeIP string + enableSnatOnHost bool +} + +const ( + azureSnatVeth0 = "azSnatveth0" + azureSnatVeth1 = "azSnatveth1" + snatBridgeName = "azSnatbr" + imdsIP = "169.254.169.254/32" + ovsConfigFile = "/etc/default/openvswitch-switch" + ovsOpt = "OVS_CTL_OPTS='--delete-bridges'" +) + +func updateOVSConfig(option string) error { + f, err := os.OpenFile(ovsConfigFile, os.O_APPEND|os.O_RDWR, 0666) + if err != nil { + log.Printf("Error while opening ovs config %v", err) + return err + } + + defer f.Close() + + buf := new(bytes.Buffer) + buf.ReadFrom(f) + contents := buf.String() + + conSplit := strings.Split(contents, "\n") + for _, existingOption := range conSplit { + if option == existingOption { + log.Printf("Not updating ovs config. Found option already written") + return nil + } + } + + log.Printf("writing ovsconfig option %v", option) + + if _, err = f.WriteString(option); err != nil { + log.Printf("Error while writing ovs config %v", err) + return err + } + + return nil +} + +func NewOVSClient(bridgeName, hostInterfaceName, snatBridgeIP string, enableSnatOnHost bool) *OVSNetworkClient { + ovsClient := &OVSNetworkClient{ + bridgeName: bridgeName, + hostInterfaceName: hostInterfaceName, + snatBridgeIP: snatBridgeIP, + enableSnatOnHost: enableSnatOnHost, + } + + return ovsClient +} + +func (client *OVSNetworkClient) CreateBridge() error { + if err := ovsctl.CreateOVSBridge(client.bridgeName); err != nil { + return err + } + + if err := updateOVSConfig(ovsOpt); err != nil { + return err + } + + if client.enableSnatOnHost { + if err := createSnatBridge(client.snatBridgeIP, client.bridgeName); err != nil { + log.Printf("[net] Creating snat bridge failed with erro %v", err) + return err + } + + if err := addMasqueradeRule(client.snatBridgeIP); err != nil { + return err + } + + return addVlanDropRule() + } + + return nil +} + +func addVlanDropRule() error { + cmd := "ebtables -t nat -L PREROUTING" + out, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("Error while listing ebtable rules %v", err) + return err + } + + out = strings.TrimSpace(out) + if strings.Contains(out, "-p 802_1Q -j DROP") { + log.Printf("vlan drop rule already exists") + return nil + } + + cmd = "ebtables -t nat -A PREROUTING -p 802_1Q -j DROP" + log.Printf("Adding ebtable rule to drop vlan traffic on snat bridge %v", cmd) + _, err = platform.ExecuteCommand(cmd) + return err +} + +func addMasqueradeRule(snatBridgeIPWithPrefix string) error { + _, ipNet, _ := net.ParseCIDR(snatBridgeIPWithPrefix) + cmd := fmt.Sprintf("iptables -t nat -C POSTROUTING -s %v -j MASQUERADE", ipNet.String()) + _, err := platform.ExecuteCommand(cmd) + if err == nil { + log.Printf("iptable snat rule already exists") + return nil + } + + cmd = fmt.Sprintf("iptables -t nat -A POSTROUTING -s %v -j MASQUERADE", ipNet.String()) + log.Printf("Adding iptable snat rule %v", cmd) + _, err = platform.ExecuteCommand(cmd) + return err +} + +func deleteMasqueradeRule(interfaceName string) error { + snatIf, err := net.InterfaceByName(interfaceName) + if err != nil { + return err + } + + addrs, _ := snatIf.Addrs() + for _, addr := range addrs { + ipAddr, ipNet, err := net.ParseCIDR(addr.String()) + if err != nil { + log.Printf("error %v", err) + continue + } + + if ipAddr.To4() != nil { + cmd := fmt.Sprintf("iptables -t nat -D POSTROUTING -s %v -j MASQUERADE", ipNet.String()) + log.Printf("Deleting iptable snat rule %v", cmd) + _, err = platform.ExecuteCommand(cmd) + return err + } + } + + return nil +} + +func (client *OVSNetworkClient) DeleteBridge() error { + if err := ovsctl.DeleteOVSBridge(client.bridgeName); err != nil { + log.Printf("Deleting ovs bridge failed with error %v", err) + return err + } + + if client.enableSnatOnHost { + deleteMasqueradeRule(snatBridgeName) + + cmd := "ebtables -t nat -D PREROUTING -p 802_1Q -j DROP" + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("Deleting ebtable vlan drop rule failed with error %v", err) + } + + if err := ovsctl.DeletePortFromOVS(client.bridgeName, azureSnatVeth1); err != nil { + return err + } + + if err := DeleteSnatBridge(); err != nil { + log.Printf("Deleting snat bridge failed with error %v", err) + return err + } + + return netlink.DeleteLink(azureSnatVeth0) + } + + return nil +} + +func createSnatBridge(snatBridgeIP string, mainInterface string) error { + _, err := net.InterfaceByName(snatBridgeName) + if err == nil { + log.Printf("Snat Bridge already exists") + return nil + } + + log.Printf("[net] Creating Snat bridge %v.", snatBridgeName) + + link := netlink.BridgeLink{ + LinkInfo: netlink.LinkInfo{ + Type: netlink.LINK_TYPE_BRIDGE, + Name: snatBridgeName, + }, + } + + if err := netlink.AddLink(&link); err != nil { + return err + } + + _, err = net.InterfaceByName(azureSnatVeth0) + if err == nil { + log.Printf("Azure snat veth already exists") + return nil + } + + vethLink := netlink.VEthLink{ + LinkInfo: netlink.LinkInfo{ + Type: netlink.LINK_TYPE_VETH, + Name: azureSnatVeth0, + }, + PeerName: azureSnatVeth1, + } + + err = netlink.AddLink(&vethLink) + if err != nil { + log.Printf("[net] Failed to create veth pair, err:%v.", err) + return err + } + + log.Printf("Assigning %v on snat bridge", snatBridgeIP) + + ip, addr, _ := net.ParseCIDR(snatBridgeIP) + err = netlink.AddIpAddress(snatBridgeName, ip, addr) + if err != nil && !strings.Contains(strings.ToLower(err.Error()), "file exists") { + log.Printf("[net] Failed to add IP address %v: %v.", addr, err) + return err + } + + if err := netlink.SetLinkState(snatBridgeName, true); err != nil { + return err + } + + if err := netlink.SetLinkState(azureSnatVeth0, true); err != nil { + return err + } + + if err := netlink.SetLinkMaster(azureSnatVeth0, snatBridgeName); err != nil { + return err + } + + if err := netlink.SetLinkState(azureSnatVeth1, true); err != nil { + return err + } + + if err := ovsctl.AddPortOnOVSBridge(azureSnatVeth1, mainInterface, 0); err != nil { + return err + } + + return nil +} + +func addStaticRoute(ip string, interfaceName string) error { + log.Printf("[ovs] Adding %v static route", ip) + var routes []RouteInfo + _, ipNet, _ := net.ParseCIDR(imdsIP) + gwIP := net.ParseIP("0.0.0.0") + route := RouteInfo{Dst: *ipNet, Gw: gwIP} + routes = append(routes, route) + if err := addRoutes(interfaceName, routes); err != nil { + if err != nil && !strings.Contains(strings.ToLower(err.Error()), "file exists") { + log.Printf("addroutes failed with error %v", err) + return err + } + } + + return nil +} + +func DeleteSnatBridge() error { + // Delete the bridge. + err := netlink.DeleteLink(snatBridgeName) + if err != nil { + log.Printf("[net] Failed to delete bridge %v, err:%v.", snatBridgeName, err) + } + + return err +} + +func (client *OVSNetworkClient) AddL2Rules(extIf *externalInterface) error { + //primary := extIf.IPAddresses[0].IP.String() + mac := extIf.MacAddress.String() + macHex := strings.Replace(mac, ":", "", -1) + + ofport, err := ovsctl.GetOVSPortNumber(client.hostInterfaceName) + if err != nil { + return err + } + + // Arp SNAT Rule + log.Printf("[ovs] Adding ARP SNAT rule for egress traffic on interface %v", client.hostInterfaceName) + if err := ovsctl.AddArpSnatRule(client.bridgeName, mac, macHex, ofport); err != nil { + return err + } + + log.Printf("[ovs] Adding DNAT rule for ingress ARP traffic on interface %v.", client.hostInterfaceName) + if err := ovsctl.AddArpDnatRule(client.bridgeName, ofport, macHex); err != nil { + return err + } + + if client.enableSnatOnHost { + addStaticRoute(imdsIP, client.bridgeName) + } + + return nil +} + +func (client *OVSNetworkClient) DeleteL2Rules(extIf *externalInterface) { + ovsctl.DeletePortFromOVS(client.bridgeName, client.hostInterfaceName) +} + +func (client *OVSNetworkClient) SetBridgeMasterToHostInterface() error { + return ovsctl.AddPortOnOVSBridge(client.hostInterfaceName, client.bridgeName, 0) +} + +func (client *OVSNetworkClient) SetHairpinOnHostInterface(enable bool) error { + return nil +} diff --git a/ovsctl/ovsctl.go b/ovsctl/ovsctl.go new file mode 100644 index 0000000000..7e777121a3 --- /dev/null +++ b/ovsctl/ovsctl.go @@ -0,0 +1,232 @@ +package ovsctl + +import ( + "fmt" + "net" + "strings" + + "github.com/Azure/azure-container-networking/common" + "github.com/Azure/azure-container-networking/log" + "github.com/Azure/azure-container-networking/platform" +) + +const ( + defaultMacForArpResponse = "12:34:56:78:9a:bc" +) + +func CreateOVSBridge(bridgeName string) error { + log.Printf("[ovs] Creating OVS Bridge %v", bridgeName) + + ovsCreateCmd := fmt.Sprintf("ovs-vsctl add-br %s", bridgeName) + _, err := platform.ExecuteCommand(ovsCreateCmd) + if err != nil { + log.Printf("[ovs] Error while creating OVS bridge %v", err) + return err + } + + return nil +} + +func DeleteOVSBridge(bridgeName string) error { + log.Printf("[ovs] Deleting OVS Bridge %v", bridgeName) + + ovsCreateCmd := fmt.Sprintf("ovs-vsctl del-br %s", bridgeName) + _, err := platform.ExecuteCommand(ovsCreateCmd) + if err != nil { + log.Printf("[ovs] Error while deleting OVS bridge %v", err) + return err + } + + return nil +} + +func AddPortOnOVSBridge(hostIfName string, bridgeName string, vlanID int) error { + cmd := "" + + if vlanID == 0 { + cmd = fmt.Sprintf("ovs-vsctl add-port %s %s", bridgeName, hostIfName) + } else { + cmd = fmt.Sprintf("ovs-vsctl add-port %s %s tag=%d", bridgeName, hostIfName, vlanID) + } + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Error while setting OVS as master to primary interface %v", err) + return err + } + + return nil +} + +func GetOVSPortNumber(interfaceName string) (string, error) { + cmd := fmt.Sprintf("ovs-vsctl get Interface %s ofport", interfaceName) + ofport, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Get ofport failed with error %v", err) + return "", err + } + + return strings.Trim(ofport, "\n"), nil +} + +func AddVMIpAcceptRule(bridgeName string, primaryIP string, mac string) error { + cmd := fmt.Sprintf("ovs-ofctl add-flow %s ip,nw_dst=%s,dl_dst=%s,priority=20,actions=normal", bridgeName, primaryIP, mac) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding SNAT rule failed with error %v", err) + return err + } + + return nil +} + +func AddArpSnatRule(bridgeName string, mac string, macHex string, ofport string) error { + cmd := fmt.Sprintf(`ovs-ofctl add-flow %v table=1,priority=10,arp,arp_op=1,actions='mod_dl_src:%s, + load:0x%s->NXM_NX_ARP_SHA[],output:%s'`, bridgeName, mac, macHex, ofport) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding ARP SNAT rule failed with error %v", err) + return err + } + + return nil +} + +func AddIpSnatRule(bridgeName string, port string, mac string) error { + cmd := fmt.Sprintf("ovs-ofctl add-flow %v priority=20,ip,in_port=%s,vlan_tci=0,actions=mod_dl_src:%s,strip_vlan,normal", + bridgeName, port, mac) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding IP SNAT rule failed with error %v", err) + return err + } + + cmd = fmt.Sprintf("ovs-ofctl add-flow %v priority=10,ip,in_port=%s,actions=drop", + bridgeName, port) + _, err = platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Dropping vlantag packet rule failed with error %v", err) + return err + } + + return nil +} + +func AddArpDnatRule(bridgeName string, port string, mac string) error { + // Add DNAT rule to forward ARP replies to container interfaces. + cmd := fmt.Sprintf(`ovs-ofctl add-flow %s arp,arp_op=2,in_port=%s,actions='mod_dl_dst:ff:ff:ff:ff:ff:ff, + load:0x%s->NXM_NX_ARP_THA[],normal'`, bridgeName, port, mac) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding DNAT rule failed with error %v", err) + return err + } + + return nil +} + +func AddFakeArpReply(bridgeName string, ip net.IP) error { + // If arp fields matches, set arp reply rule for the request + macAddrHex := strings.Replace(defaultMacForArpResponse, ":", "", -1) + ipAddrInt := common.IpToInt(ip) + + log.Printf("[ovs] Adding ARP reply rule for IP address %v ", ip.String()) + cmd := fmt.Sprintf(`ovs-ofctl add-flow %s arp,arp_op=1,priority=20,actions='load:0x2->NXM_OF_ARP_OP[], + move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],mod_dl_src:%s, + move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],move:NXM_OF_ARP_TPA[]->NXM_OF_ARP_SPA[], + load:0x%s->NXM_NX_ARP_SHA[],load:0x%x->NXM_OF_ARP_TPA[],IN_PORT'`, + bridgeName, defaultMacForArpResponse, macAddrHex, ipAddrInt) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding ARP reply rule failed with error %v", err) + return err + } + + return nil +} + +func AddArpReplyRule(bridgeName string, port string, ip net.IP, mac string, vlanid int, mode string) error { + ipAddrInt := common.IpToInt(ip) + macAddrHex := strings.Replace(mac, ":", "", -1) + + log.Printf("[ovs] Adding ARP reply rule to add vlan %v and forward packet to table 1 for port %v", vlanid, port) + cmd := fmt.Sprintf(`ovs-ofctl add-flow %s arp,arp_op=1,in_port=%s,actions='mod_vlan_vid:%v,resubmit(,1)'`, + bridgeName, port, vlanid) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding ARP reply rule failed with error %v", err) + return err + } + + // If arp fields matches, set arp reply rule for the request + log.Printf("[ovs] Adding ARP reply rule for IP address %v and vlanid %v.", ip, vlanid) + cmd = fmt.Sprintf(`ovs-ofctl add-flow %s table=1,arp,arp_tpa=%s,dl_vlan=%v,arp_op=1,priority=20,actions='load:0x2->NXM_OF_ARP_OP[], + move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],mod_dl_src:%s, + move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[], + load:0x%s->NXM_NX_ARP_SHA[],load:0x%x->NXM_OF_ARP_SPA[],strip_vlan,IN_PORT'`, + bridgeName, ip.String(), vlanid, mac, macAddrHex, ipAddrInt) + _, err = platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding ARP reply rule failed with error %v", err) + return err + } + + return nil +} + +func AddMacDnatRule(bridgeName string, port string, ip net.IP, mac string, vlanid int) error { + cmd := fmt.Sprintf("ovs-ofctl add-flow %s ip,nw_dst=%s,dl_vlan=%v,in_port=%s,actions=mod_dl_dst:%s,normal", + bridgeName, ip.String(), vlanid, port, mac) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Adding MAC DNAT rule failed with error %v", err) + return err + } + + return nil +} + +func DeleteArpReplyRule(bridgeName string, port string, ip net.IP, vlanid int) { + cmd := fmt.Sprintf("ovs-ofctl del-flows %s arp,arp_op=1,in_port=%s", + bridgeName, port) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[net] Deleting ARP reply rule failed with error %v", err) + } + + cmd = fmt.Sprintf("ovs-ofctl del-flows %s table=1,arp,arp_tpa=%s,dl_vlan=%v,arp_op=1", + bridgeName, ip.String(), vlanid) + _, err = platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[net] Deleting ARP reply rule failed with error %v", err) + } +} + +func DeleteIPSnatRule(bridgeName string, port string) { + cmd := fmt.Sprintf("ovs-ofctl del-flows %v ip,in_port=%s", + bridgeName, port) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("Error while deleting ovs rule %v error %v", cmd, err) + } +} + +func DeleteMacDnatRule(bridgeName string, port string, ip net.IP, vlanid int) { + cmd := fmt.Sprintf("ovs-ofctl del-flows %s ip,nw_dst=%s,dl_vlan=%v,in_port=%s", + bridgeName, ip.String(), vlanid, port) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[net] Deleting MAC DNAT rule failed with error %v", err) + } +} + +func DeletePortFromOVS(bridgeName string, interfaceName string) error { + // Disconnect external interface from its bridge. + cmd := fmt.Sprintf("ovs-vsctl del-port %s %s", bridgeName, interfaceName) + _, err := platform.ExecuteCommand(cmd) + if err != nil { + log.Printf("[ovs] Failed to disconnect interface %v from bridge, err:%v.", interfaceName, err) + return err + } + + return nil +} diff --git a/platform/os_linux.go b/platform/os_linux.go index fa79a022e8..d07a12e62a 100644 --- a/platform/os_linux.go +++ b/platform/os_linux.go @@ -4,10 +4,13 @@ package platform import ( + "bytes" + "fmt" "io/ioutil" - "log" "os/exec" "time" + + "github.com/Azure/azure-container-networking/log" ) const ( @@ -49,13 +52,30 @@ func GetLastRebootTime() (time.Time, error) { return rebootTime.UTC(), nil } -// ExecuteShellCommand executes a shell command. -func ExecuteShellCommand(command string) error { - //log.Debugf("[shell] %s", command) +func ExecuteCommand(command string) (string, error) { + log.Printf("[Azure-Utils] %s", command) + + var stderr bytes.Buffer + var out bytes.Buffer cmd := exec.Command("sh", "-c", command) - err := cmd.Start() + cmd.Stderr = &stderr + cmd.Stdout = &out + + err := cmd.Run() + if err != nil { + return "", fmt.Errorf("%s:%s", err.Error(), stderr.String()) + } + + return out.String(), nil +} + +func SetOutboundSNAT(subnet string) error { + cmd := fmt.Sprintf("iptables -t nat -A POSTROUTING -m iprange ! --dst-range 168.63.129.16 -m addrtype ! --dst-type local ! -d %v -j MASQUERADE", + subnet) + _, err := ExecuteCommand(cmd) if err != nil { + log.Printf("SNAT Iptable rule was not set") return err } - return cmd.Wait() + return nil } diff --git a/platform/os_windows.go b/platform/os_windows.go index 5d4a467c6c..706dc95b03 100644 --- a/platform/os_windows.go +++ b/platform/os_windows.go @@ -26,3 +26,11 @@ func GetLastRebootTime() (time.Time, error) { var rebootTime time.Time return rebootTime, nil } + +func ExecuteCommand(command string) (string, error) { + return "", nil +} + +func SetOutboundSNAT(subnet string) error { + return nil +}