Skip to content

Commit

Permalink
fix: silent refresh errors on active connection (#219)
Browse files Browse the repository at this point in the history
When an instance already have active connections, it should not throw
errors when trying to refresh cloud instance certificates in the
background. Otherwise these errors will bubble up and stop the end user
application.

This changeset fixes it by adding a more resilient system to the
CloudSQLInstance refresh logic that silents errors occurred during
refresh and keeps valid certificate data that can be used if a refresh
is still ongoing or if any error happens when during a refresh.

Fixes: #201
Co-authored-by: Jack Wotherspoon <jackwoth@google.com>
  • Loading branch information
ruyadorno and jackwotherspoon committed Sep 26, 2023
1 parent a998db6 commit 41a8e79
Show file tree
Hide file tree
Showing 5 changed files with 497 additions and 86 deletions.
156 changes: 129 additions & 27 deletions src/cloud-sql-instance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {InstanceMetadata} from './sqladmin-fetcher';
import {generateKeys} from './crypto';
import {RSAKeys} from './rsa-keys';
import {SslCert} from './ssl-cert';
import {getRefreshInterval} from './time';
import {getRefreshInterval, isExpirationTimeValid} from './time';
import {AuthTypes} from './auth-types';

interface Fetcher {
Expand All @@ -43,6 +43,13 @@ interface CloudSQLInstanceOptions {
sqlAdminFetcher: Fetcher;
}

interface RefreshResult {
ephemeralCert: SslCert;
host: string;
privateKey: string;
serverCaCert: SslCert;
}

export class CloudSQLInstance {
static async getCloudSQLInstance(
options: CloudSQLInstanceOptions
Expand All @@ -56,8 +63,10 @@ export class CloudSQLInstance {
private readonly authType: AuthTypes;
private readonly sqlAdminFetcher: Fetcher;
private readonly limitRateInterval: number;
private ongoingRefreshPromise?: Promise<void>;
private scheduledRefreshID?: ReturnType<typeof setTimeout>;
private establishedConnection: boolean = false;
// The ongoing refresh promise is referenced by the `next` property
private next?: Promise<RefreshResult>;
private scheduledRefreshID?: ReturnType<typeof setTimeout> | null = undefined;
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
private throttle?: any;
public readonly instanceInfo: InstanceConnectionInfo;
Expand Down Expand Up @@ -98,60 +107,153 @@ export class CloudSQLInstance {
async forceRefresh(): Promise<void> {
// if a refresh is already ongoing, just await for its promise to fulfill
// so that a new instance info is available before reconnecting
if (this.ongoingRefreshPromise) {
await this.ongoingRefreshPromise;
if (this.next) {
await this.next;
return;
}
this.cancelRefresh();
return this.refresh();
}

async refresh(): Promise<void> {
const currentRefreshId = this.scheduledRefreshID;

// Since forceRefresh might be invoked during an ongoing refresh
// we keep track of the ongoing promise in order to be able to await
// for it in the forceRefresh method.
// In case the throttle mechanism is already initialized, we add the
// extra wait time `limitRateInterval` in order to limit the rate of
// requests to Cloud SQL Admin APIs.
this.ongoingRefreshPromise = this.throttle
? this.throttle(this._refresh).call(this)
: this._refresh();

// awaits for the ongoing promise to resolve, since the refresh is
// completed once the promise is resolved, we just free up the reference
// to the promise at this point, ensuring any new call to `forceRefresh`
// is able to trigger a new refresh
await this.ongoingRefreshPromise;
this.ongoingRefreshPromise = undefined;

// Initializing the rate limiter at the end of the function so that the
// first refresh cycle is never rate-limited, ensuring there are 2 calls
// allowed prior to waiting a throttle interval.
this.next = (
this.throttle && this.scheduledRefreshID
? this.throttle(this.performRefresh).call(this)
: this.performRefresh()
)
// These needs to be part of the chain of promise referenced in
// next in order to avoid race conditions
.then((nextValues: RefreshResult) => {
// in case the id at the moment of starting this refresh cycle has
// changed, that means that it has been canceled
if (currentRefreshId !== this.scheduledRefreshID) {
return;
}

// In case the performRefresh method succeeded
// then we go ahead and update values
this.updateValues(nextValues);

const refreshInterval = getRefreshInterval(
/* c8 ignore next */
String(this.ephemeralCert?.expirationTime)
);
this.scheduleRefresh(refreshInterval);

// This is the end of the successful refresh chain, so now
// we release the reference to the next
this.next = undefined;
})
.catch((err: unknown) => {
// In case there's already an active connection we won't throw
// refresh errors to the final user, scheduling a new
// immediate refresh instead.
if (this.establishedConnection) {
if (currentRefreshId === this.scheduledRefreshID) {
this.scheduleRefresh(0);
}
} else {
throw err as Error;
}

// This refresh cycle has failed, releases ref to next
this.next = undefined;
});

// The rate limiter needs to be initialized _after_ assigning a ref
// to next in order to avoid race conditions with
// the forceRefresh check that ensures a refresh cycle is not ongoing
await this.initializeRateLimiter();

await this.next;
}

private async _refresh(): Promise<void> {
// The performRefresh method will perform all the necessary async steps
// in order to get a new set of values for an instance that can then be
// used to create new connections to a Cloud SQL instance. It throws in
// case any of the internal steps fails.
private async performRefresh(): Promise<RefreshResult> {
const rsaKeys: RSAKeys = await generateKeys();
const metadata: InstanceMetadata =
await this.sqlAdminFetcher.getInstanceMetadata(this.instanceInfo);

this.ephemeralCert = await this.sqlAdminFetcher.getEphemeralCertificate(
const ephemeralCert = await this.sqlAdminFetcher.getEphemeralCertificate(
this.instanceInfo,
rsaKeys.publicKey,
this.authType
);
this.host = selectIpAddress(metadata.ipAddresses, this.ipType);
this.privateKey = rsaKeys.privateKey;
this.serverCaCert = metadata.serverCaCert;
const host = selectIpAddress(metadata.ipAddresses, this.ipType);
const privateKey = rsaKeys.privateKey;
const serverCaCert = metadata.serverCaCert;

this.scheduledRefreshID = setTimeout(() => {
this.refresh();
}, getRefreshInterval(this.ephemeralCert.expirationTime));
const currentValues = {
ephemeralCert: this.ephemeralCert,
host: this.host,
privateKey: this.privateKey,
serverCaCert: this.serverCaCert,
};

const nextValues = {
ephemeralCert,
host,
privateKey,
serverCaCert,
};

// In the rather odd case that the current ephemeral certificate is still
// valid while we get an invalid result from the API calls, then preserve
// the current metadata.
if (this.isValid(currentValues) && !this.isValid(nextValues)) {
return currentValues as RefreshResult;
}

return nextValues;
}

private isValid({
ephemeralCert,
host,
privateKey,
serverCaCert,
}: Partial<RefreshResult>): boolean {
if (!ephemeralCert || !host || !privateKey || !serverCaCert) {
return false;
}
return isExpirationTimeValid(ephemeralCert.expirationTime);
}

private updateValues(nextValues: RefreshResult): void {
const {ephemeralCert, host, privateKey, serverCaCert} = nextValues;

this.ephemeralCert = ephemeralCert;
this.host = host;
this.privateKey = privateKey;
this.serverCaCert = serverCaCert;
}

private scheduleRefresh(delay: number): void {
this.scheduledRefreshID = setTimeout(() => this.refresh(), delay);
}

cancelRefresh(): void {
if (this.scheduledRefreshID) {
clearTimeout(this.scheduledRefreshID);
}
this.scheduledRefreshID = null;
}

// Mark this instance as having an active connection. This is important to
// ensure any possible errors thrown during a future refresh cycle should
// not be thrown to the final user.
setEstablishedConnection(): void {
this.establishedConnection = true;
}
}
3 changes: 3 additions & 0 deletions src/connector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ export class Connector {
tlsSocket.once('error', async () => {
await cloudSqlInstance.forceRefresh();
});
tlsSocket.once('secureConnect', async () => {
cloudSqlInstance.setEstablishedConnection();
});
return tlsSocket;
}

Expand Down
5 changes: 5 additions & 0 deletions src/time.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ export function getNearestExpiration(
}
return new Date(certExp).toISOString();
}

export function isExpirationTimeValid(isoTime: string): boolean {
const expirationTime = Date.parse(isoTime);
return Date.now() < expirationTime;
}
Loading

0 comments on commit 41a8e79

Please sign in to comment.