-
Notifications
You must be signed in to change notification settings - Fork 18.7k
/
interface_linux.go
160 lines (144 loc) · 5.99 KB
/
interface_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
package bridge
import (
"context"
"fmt"
"net"
"net/netip"
"os"
"github.com/containerd/log"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libnetwork/internal/netiputil"
"github.com/vishvananda/netlink"
)
const (
// DefaultBridgeName is the default name for the bridge interface managed
// by the driver when unspecified by the caller.
DefaultBridgeName = "docker0"
)
// Interface models the bridge network device.
type bridgeInterface struct {
Link netlink.Link
bridgeIPv4 *net.IPNet
bridgeIPv6 *net.IPNet
gatewayIPv4 net.IP
gatewayIPv6 net.IP
nlh *netlink.Handle
}
// newInterface creates a new bridge interface structure. It attempts to find
// an already existing device identified by the configuration BridgeName field,
// or the default bridge name when unspecified, but doesn't attempt to create
// one when missing
func newInterface(nlh *netlink.Handle, config *networkConfiguration) (*bridgeInterface, error) {
var err error
i := &bridgeInterface{nlh: nlh}
// Initialize the bridge name to the default if unspecified.
if config.BridgeName == "" {
config.BridgeName = DefaultBridgeName
}
// Attempt to find an existing bridge named with the specified name.
i.Link, err = nlh.LinkByName(config.BridgeName)
if err != nil {
log.G(context.TODO()).Debugf("Did not find any interface with name %s: %v", config.BridgeName, err)
} else if _, ok := i.Link.(*netlink.Bridge); !ok {
return nil, fmt.Errorf("existing interface %s is not a bridge", i.Link.Attrs().Name)
}
return i, nil
}
// exists indicates if the existing bridge interface exists on the system.
func (i *bridgeInterface) exists() bool {
return i.Link != nil
}
// addresses returns a bridge's addresses, IPv4 (with family=netlink.FAMILY_V4)
// or IPv6 (family=netlink.FAMILY_V6).
func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
if !i.exists() {
// A nonexistent interface, by definition, cannot have any addresses.
return nil, nil
}
addrs, err := i.nlh.AddrList(i.Link, family)
if err != nil {
return nil, fmt.Errorf("Failed to retrieve addresses: %v", err)
}
return addrs, nil
}
func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
requiredAddrs = make(map[netip.Addr]netip.Prefix)
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
// Always give the bridge 'fe80::1' - every interface is required to have an
// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
// assigned address will not be a second address in the LL subnet.
ra, ok := netiputil.ToPrefix(bridgeIPv6)
if !ok {
err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
return nil, err
}
requiredAddrs[ra.Addr()] = ra
}
ra, ok := netiputil.ToPrefix(config.AddressIPv6)
if !ok {
err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
return nil, err
}
requiredAddrs[ra.Addr()] = ra
return requiredAddrs, nil
}
func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
// Get the IPv6 addresses currently assigned to the bridge, if any.
existingAddrs, err := i.addresses(netlink.FAMILY_V6)
if err != nil {
return errdefs.System(err)
}
// Get the list of required IPv6 addresses for this bridge.
var requiredAddrs map[netip.Addr]netip.Prefix
requiredAddrs, err = getRequiredIPv6Addrs(config)
if err != nil {
return errdefs.System(err)
}
i.bridgeIPv6 = config.AddressIPv6
i.gatewayIPv6 = config.AddressIPv6.IP
// Remove addresses that aren't required.
for _, existingAddr := range existingAddrs {
ea, ok := netip.AddrFromSlice(existingAddr.IP)
if !ok {
return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
}
// Optionally, avoid deleting the kernel-assigned link local address.
// (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the
// kernel_ll address was deleted, the bridge won't get a new kernel_ll address.)
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
if p, _ := ea.Prefix(64); p == linkLocalPrefix {
continue
}
}
// Ignore the prefix length when comparing addresses, it's informational
// (RFC-5942 section 4), and removing/re-adding an address that's still valid
// would disrupt traffic on live-restore.
if _, required := requiredAddrs[ea]; !required {
err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns.
if err != nil {
log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge")
}
}
}
// Add or update required addresses.
for _, addrPrefix := range requiredAddrs {
// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
// existing bridge in a way that doesn't affect the bridge's assigned address,
// the old address has not been removed at this point - because that would be
// service-affecting for a running container.
//
// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
// '2000:dbe::/80', the default bridge will still be assigned address
// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
// the user is likely to expect to see it updated from '64' to '80'.
// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
// doesn't update the prefix length. This is a cosmetic problem, the prefix
// length of an assigned address is not used to determine whether an address is
// "on-link" (RFC-5942).
if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
}
}
return nil
}