-
Notifications
You must be signed in to change notification settings - Fork 260
[NPM] Generate TLS certificates during docker build and bake into image for gRPC secure channels #1262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NPM] Generate TLS certificates during docker build and bake into image for gRPC secure channels #1262
Changes from all commits
cf50116
adc477b
312fe9c
d9cda76
bbd3f9f
143d399
e971fac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,3 +24,9 @@ ipam-*.xml | |
| controller-gen | ||
| build/tools/bin | ||
| npm/debug/http | ||
|
|
||
| # certificates | ||
| */**/certs/ | ||
| *.crt | ||
| *.pem | ||
| *.srl | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ import ( | |
|
|
||
| "github.com/Azure/azure-container-networking/npm/pkg/protos" | ||
| "google.golang.org/grpc" | ||
| "google.golang.org/grpc/credentials/insecure" | ||
| "google.golang.org/grpc/credentials" | ||
| "k8s.io/klog/v2" | ||
| ) | ||
|
|
||
|
|
@@ -37,9 +37,18 @@ func NewEventsClient(ctx context.Context, pod, node, addr string) (*EventsClient | |
| } | ||
|
|
||
| klog.Infof("Connecting to NPM controller gRPC server at address %s\n", addr) | ||
| // TODO Make this secure | ||
| // TODO Remove WithBlock option post testing | ||
| cc, err := grpc.DialContext(ctx, addr, grpc.WithTransportCredentials(insecure.NewCredentials())) | ||
|
|
||
| config, err := clientTLSConfig() | ||
| if err != nil { | ||
| klog.Errorf("failed to load client tls config : %s", err) | ||
| return nil, fmt.Errorf("failed to load client tls config : %w", err) | ||
| } | ||
|
|
||
| cc, err := grpc.DialContext( | ||
| ctx, | ||
| addr, | ||
| grpc.WithTransportCredentials(credentials.NewTLS(config)), | ||
| ) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to dial %s: %w", addr, err) | ||
| } | ||
|
|
@@ -81,11 +90,12 @@ func (c *EventsClient) run(ctx context.Context, stopCh <-chan struct{}) error { | |
| default: | ||
| if connectClient == nil { | ||
| klog.Info("Reconnecting to gRPC server controller") | ||
| opts := []grpc.CallOption{grpc.WaitForReady(true)} | ||
| opts := []grpc.CallOption{grpc.WaitForReady(false)} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. curious what this represents and why we had it true before, false now
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True blocks the connection until the server is ready to accept the connection. We don't want to block our connection since the server pod could crash which we are connecting or in the load-balancing case reject the connection in which case the agent would need to block until timeout. Instead we try sending, error out and try reconnecting. |
||
| connectClient, err = c.Connect(ctx, clientMetadata, opts...) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to connect to dataplane events server: %w", err) | ||
| } | ||
| klog.Info("Successfully connected to gRPC server controller") | ||
| } | ||
| event, err := connectClient.Recv() | ||
| if err != nil { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| package transport | ||
|
|
||
| import ( | ||
| "crypto/tls" | ||
| "crypto/x509" | ||
| "fmt" | ||
| "os" | ||
|
|
||
| "google.golang.org/grpc/credentials" | ||
| ) | ||
|
|
||
| const ( | ||
| serverCertPEMFilename = "tls.crt" | ||
| serverKeyPEMFilename = "tls.key" | ||
| caCertPEMFilename = "ca.crt" | ||
| path = "/usr/local/npm" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we have this path be defined in azure-npm-config ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The certs are baked in through the Dockerfile. Not sure how to make that dynamic. Hence I made this static :/
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could make it dynamic using a volume mount at runtime for a BYO cert situation; it would be better for these to be configurable imo |
||
| ) | ||
|
|
||
| func serverTLSCreds() (credentials.TransportCredentials, error) { | ||
| certFilepath := path + "/" + serverCertPEMFilename | ||
| keyFilepath := path + "/" + serverKeyPEMFilename | ||
|
|
||
| creds, err := credentials.NewServerTLSFromFile(certFilepath, keyFilepath) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to create creds from cert/key files : %w", err) | ||
| } | ||
| return creds, nil | ||
| } | ||
|
|
||
| func clientTLSConfig() (*tls.Config, error) { | ||
| caCertFilepath := path + "/" + caCertPEMFilename | ||
| // Load certificate of the CA who signed server's certificate | ||
| pemServerCA, err := os.ReadFile(caCertFilepath) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("Failed to read the CA cert : %w", err) | ||
| } | ||
|
|
||
| certPool := x509.NewCertPool() | ||
| if !certPool.AppendCertsFromPEM(pemServerCA) { | ||
| return nil, fmt.Errorf("failed to append ca cert to cert pool : %w", ErrTLSCerts) | ||
| } | ||
|
|
||
| // Create the credentials and return it | ||
| return &tls.Config{ //nolint // setting tls min version to 3 | ||
| RootCAs: certPool, | ||
| InsecureSkipVerify: false, | ||
| }, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| #!/bin/bash | ||
|
|
||
| CERTS_STAGING_DIR=. | ||
| SAN_CNF_FILE=san.cnf | ||
| CERTIFICATE_VALIDITY_DAYS=3650 | ||
| CERT_SUBJ="/C=US/ST=Washington/L=Redmond/O=Microsoft/OU=Azure/CN=azure-npm.kube-system.svc.cluster.local" | ||
|
|
||
| # Check if openssl is installed | ||
| if ! command -v openssl &> /dev/null | ||
| then | ||
| echo "openssl could not be found" | ||
| exit | ||
| fi | ||
|
|
||
| # Check if SAN_CNF_FILE exists | ||
| if [ ! -f "$SAN_CNF_FILE" ] | ||
| then | ||
| echo "SAN_CNF_FILE does not exist" | ||
| exit | ||
| fi | ||
|
|
||
| if [ ! -d "$CERTS_STAGING_DIR" ] | ||
| then | ||
| echo "Creating $CERTS_STAGING_DIR" | ||
| mkdir -p $CERTS_STAGING_DIR | ||
| fi | ||
|
|
||
| # Generate the ca certificate and key | ||
| openssl req -x509 -newkey rsa:4096 -days $CERTIFICATE_VALIDITY_DAYS -nodes -keyout $CERTS_STAGING_DIR/ca.key -out $CERTS_STAGING_DIR/ca.crt -subj $CERT_SUBJ | ||
|
|
||
| # Create a certificate signing request for the server | ||
| openssl req -newkey rsa:4096 -nodes -keyout $CERTS_STAGING_DIR/tls.key -out $CERTS_STAGING_DIR/server-req.pem -config $SAN_CNF_FILE -extensions v3_req -subj $CERT_SUBJ | ||
|
|
||
| # Sign the server certificate with the CA | ||
| openssl x509 -req -in $CERTS_STAGING_DIR/server-req.pem -CA $CERTS_STAGING_DIR/ca.crt -CAkey $CERTS_STAGING_DIR/ca.key -CAcreateserial -out $CERTS_STAGING_DIR/tls.crt --days $CERTIFICATE_VALIDITY_DAYS --extfile $SAN_CNF_FILE --extensions v3_req | ||
|
|
||
| # Remove the secret CA key and signing request | ||
| rm -rf $CERTS_STAGING_DIR/ca.key $CERTS_STAGING_DIR/server-req.pem |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| [ req ] | ||
| default_bits = 2048 | ||
| distinguished_name = req_distinguished_name | ||
| req_extensions = v3_req | ||
|
|
||
| [ req_distinguished_name ] | ||
| countryName = Country Name (2 letter code) | ||
| stateOrProvinceName = State or Province Name (full name) | ||
| localityName = Locality Name (eg, city) | ||
| organizationName = Organization Name (eg, company) | ||
| commonName = Common Name (e.g. server FQDN or YOUR name) | ||
|
|
||
| [ v3_req ] | ||
| keyUsage = digitalSignature, nonRepudiation, keyEncipherment | ||
| subjectAltName = @alt_names | ||
|
|
||
| [alt_names] | ||
| DNS.1 = azure-npm.kube-system.svc.cluster.local | ||
| DNS.2 = azure-npm.kube-system | ||
| DNS.3 = azure-npm | ||
| DNS.4 = 0.0.0.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is
npm/npm/intentional?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep. Since I didnt want to change the WORKDIR in the builder image (here) which copies the host's source to
/usr/local/src/npmit has to be this way.