Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Modified parameters for BigtableToParquet" #1546

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/actions/setup-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ runs:
using: 'composite'
steps:
- name: Setup Go
uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf
uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: ${{ inputs.go-version }}
cache: false
- name: Get Changed Files
id: changed-files
uses: tj-actions/changed-files@61ee456a9d0f512e7ecfdf28863634c97dae2d16
uses: tj-actions/changed-files@4c5f5d698fbf2d763d5f13815ac7c2ccbef1ff7f # v44.2.0
with:
separator: ','
# It shouldn't take too long to build all of this, and it will at least
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/setup-java-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ runs:
using: 'composite'
steps:
- name: Setup Java
uses: actions/setup-java@a12e082d834968c1847f782019214fadd20719f6
uses: actions/setup-java@5896cecc08fd8a1fbdfaf517e29b571164b031f7 # v4.2.0
with:
distribution: 'temurin'
java-version: ${{ inputs.java-version }}
Expand All @@ -57,7 +57,7 @@ runs:
echo "YESTERDAY=$KEY" >> $GITHUB_ENV
fi
- name: Setup Cache
uses: actions/cache@72d1e4fdff0ff7b1b6e86b415f2d4f5941e5c006
uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1
id: setup-cache
with:
path: |
Expand Down
17 changes: 13 additions & 4 deletions .github/scripts/configure-runners.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# For running the script, see go/templates-gitactions-script
# For running the script, see
# https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/main/contributor-docs/maintainers-guide.md#provision-new-runners

# Defaults
NAME_SUFFIX="it"
Expand All @@ -22,6 +23,10 @@ BASE_NAME="gitactions-runner"
REPO_NAME="DataflowTemplates"
REPO_OWNER="GoogleCloudPlatform"
GH_RUNNER_VERSION="2.299.1"

MACHINE_TYPE="n1-highmem-32"
BOOT_DISK_SIZE="200GB"

VERBOSE=0

############################################################
Expand All @@ -44,6 +49,8 @@ Help()
echo "o (optional) Set the owner of the GitHub repo. Default '$REPO_OWNER'"
echo "s (optional) Set the number of runners. Default $SIZE"
echo "v (optional) Set the gitactions runner version. Default $GH_RUNNER_VERSION"
echo "m (optional) Set the machine type for the GCE VM runner. $MACHINE_TYPE"
echo "b (optional) Set the boot disk size for the GCE VM runner. $BOOT_DISK_SIZE"
echo "V Verbose mode."
echo "h Print this Help."
echo
Expand Down Expand Up @@ -79,6 +86,10 @@ while getopts ":h:Vp:a:t:n:S:r:o:s:v:" option; do
SIZE=$OPTARG;;
v) # Enter a version
GH_RUNNER_VERSION=$OPTARG;;
m) # Enter a machine type
MACHINE_TYPE=$OPTARG;;
b) # Enter a boot disk size
BOOT_DISK_SIZE=$OPTARG;;
V) # Verbose
VERBOSE=1;;
\?) # Invalid option
Expand Down Expand Up @@ -163,8 +174,6 @@ gcloud secrets add-iam-policy-binding $SECRET_NAME \
IMAGE_FAMILY="ubuntu-2004-lts"
IMAGE_PROJECT="ubuntu-os-cloud"
BOOT_DISK_TYPE="pd-balanced"
BOOT_DISK_SIZE="200GB"
MACHINE_TYPE="n1-highmem-16"
SCOPE="cloud-platform"
if [ $VERBOSE -eq 1 ]; then echo; echo "Creating instance template: $INSTANCE_TEMPLATE_NAME..."; fi
if [ $VERBOSE -eq 1 ]; then
Expand All @@ -181,7 +190,7 @@ gcloud compute instance-templates create $INSTANCE_TEMPLATE_NAME \
--image-project=$IMAGE_PROJECT \
--boot-disk-type=$BOOT_DISK_TYPE \
--boot-disk-size=$BOOT_DISK_SIZE \
--machine-type="MACHINE_TYPE" \
--machine-type=$MACHINE_TYPE \
--scopes=$SCOPE \
--service-account=${SA_EMAIL} \
--metadata-from-file=startup-script=startup-script-${NAME_SUFFIX}.sh,shutdown-script=shutdown-script-${NAME_SUFFIX}.sh
Expand Down
26 changes: 12 additions & 14 deletions .github/scripts/startup-script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,28 @@ ulimit -n 65536
# increase max virtual memory
sudo sysctl -w vm.max_map_count=262144

# update git
sudo add-apt-repository ppa:git-core/ppa -y
sudo apt update
sudo apt install git -y

# install jq
apt-get update
apt-get -y install jq
sudo apt install jq -y

# install maven
sudo apt update
sudo apt install git maven -y

# update git
sudo add-apt-repository ppa:git-core/ppa -y
sudo apt-get update
sudo apt-get install git -y

# install gh
sudo type -p curl >/dev/null || (sudo apt update && sudo apt install curl -y)
sudo curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
sudo apt install curl -y \
&& sudo curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
&& sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& sudo apt update \
&& sudo apt install gh -y

# install docker
sudo apt-get update
sudo apt-get install \
sudo apt update
sudo apt install \
ca-certificates \
curl \
gnupg \
Expand All @@ -55,8 +53,8 @@ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y
sudo apt update
sudo apt install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y

# add user to docker group
sudo groupadd docker
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/go-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
- name: Setup Go
uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf
uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: '1.21'
- name: Run Fmt
Expand All @@ -53,9 +53,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
- name: Setup Go
uses: actions/setup-go@44e221478fc6847752e5c574fc7a7b3247b00fbf
uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: '1.21'
# By nature, this also makes sure that everything builds
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/java-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ on:
- cron: "0 */12 * * *"
workflow_dispatch:

concurrency:
group: java-pr-${{ github.event.issue.number || github.run_id }}
cancel-in-progress: true

env:
MAVEN_OPTS: -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=error

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/prepare-java-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
run: |
echo "CACHE_KEY=''" >> $GITHUB_ENV
- name: Checkout Code
uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
- name: Setup Java
id: setup-java
uses: ./.github/actions/setup-java-env
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ permissions:
jobs:
release:
name: Create Release
runs-on: [self-hosted, it]
runs-on: [self-hosted, release]
steps:
- name: Get releaser identity
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scorecards-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:

steps:
- name: "Checkout code"
uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.0.0
with:
persist-credentials: false

Expand Down
71 changes: 70 additions & 1 deletion contributor-docs/maintainers-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,73 @@ This repo's code currently is mirrored in Google's internal source control syste
6) Approve the change internally. At this point, the change will get automatically merged internally and externally.

We are actively working to deprecate this process and use GitHub as the only source of truth.
If you encounter unresolvable issues with this flow, please reach out to the Dataflow team directly.
If you encounter unresolvable issues with this flow, please reach out to the Dataflow team directly.

## GitHub actions

There are several workflows that leverage GitHub actions to keep the repo healthy. Of these workflows, there are
currently 2 that are run on self-hosted runners on GCP - [Java PR](../.github/workflows/java-pr.yml) which is used to
test PR's and [Release](../.github/workflows/release.yml) which is the workflow used for releasing new templates each
week.

### Provision new runners

There are instances where we may need to re-provision self-hosted runners, due to unexpected failures, updating
dependencies, increasing memory, etc. In these cases, there are helper scripts to aid in redeployment of the GitHub
actions runners.

There are 3 scripts: [configure-runners.sh](../.github/scripts/configure-runners.sh),
[startup-script.sh](../.github/scripts/startup-script.sh) and
[shutdown-script.sh](../.github/scripts/shutdown-script.sh). The first is the main script used to provision the runners
themselves. The startup script is what will be invoked by the GCE VM as it is booted up for the first time and will
install all necessary packages needed by IT's, unit tests, Release, etc. as well as link the machine as a runner for the
repo. Likewise, the shutdown script is run when the VM is shutdown.

To provision GitHub actions runners, there are a couple prerequisites
- Must be a maintainer of the repo
- Must have access to GCP project cloud-teleport-testing

Things to remember:
- Running the script will tear down existing runners and provision new ones. This will kill any actions currently
running on any of the runners. Failure to spin up new runner correctly will block PR's and Releases, so use carefully.
- After running the script, it is likely the old runners will still be listed under
https://github.com/GoogleCloudPlatform/DataflowTemplates/settings/actions/runners. Simply force remove these to keep the
repo clean
- The commands below will demonstrate how to provision runners for use with our workflows as they exist today. If there
arises a need to provision runners in a different manner, feel free to modify the scripts directly and open a PR with
the necessary changes.

To run the configuration script:

1. Set gcloud project to cloud-teleport-testing if not already set
```
gcloud config set project cloud-teleport-testing
```

2. Export the GitHub actions token
```
GITACTIONS_TOKEN=$(gcloud secrets versions access latest --secret=gitactions-runner-secret)
```

3. Run the script

* For IT runners:

```
./configure-runners.sh \
-p cloud-teleport-testing \
-a 269744978479-compute@developer.gserviceaccount.com \
-t $GITACTIONS_TOKEN
```

* For Performance Test Runner
```
./configure-runners.sh \
-p cloud-teleport-testing \
-a 269744978479-compute@developer.gserviceaccount.com \
-t $GITACTIONS_TOKEN \
-S perf \
-s 1
```

**Note**: To see optional configurable parameters, run `./configure-runners.sh -h`
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,18 @@
import com.google.cloud.bigquery.TableId;
import java.time.format.DateTimeFormatter;
import java.util.regex.Pattern;
import org.apache.commons.lang3.RandomStringUtils;

/** Utilities for {@link BigQueryResourceManager} implementations. */
public final class BigQueryResourceManagerUtils {

private static final int MAX_DATASET_ID_LENGTH = 1024;
private static final Pattern ILLEGAL_DATASET_ID_CHARS = Pattern.compile("[^a-zA-Z0-9_]");
private static final String REPLACE_CHAR = "_";
private static final int MIN_TABLE_ID_LENGTH = 1;
private static final int MAX_TABLE_ID_LENGTH = 1024;
private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_]");
private static final DateTimeFormatter TIME_FORMAT =
DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS");
private static final String TIME_FORMAT = "yyyyMMdd_HHmmss";

private BigQueryResourceManagerUtils() {}

Expand All @@ -46,8 +47,31 @@ private BigQueryResourceManagerUtils() {}
* @return a BigQuery compatible dataset name.
*/
static String generateDatasetId(String datasetName) {

// Take substring of datasetName to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
datasetName =
datasetName.substring(
0,
Math.min(
datasetName.length(),
MAX_DATASET_ID_LENGTH
- REPLACE_CHAR.length()
- TIME_FORMAT.length()
- REPLACE_CHAR.length()
- randomSuffixLength));

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
return generateResourceId(
datasetName, ILLEGAL_DATASET_ID_CHARS, "_", MAX_DATASET_ID_LENGTH, TIME_FORMAT);
datasetName,
ILLEGAL_DATASET_ID_CHARS,
REPLACE_CHAR,
MAX_DATASET_ID_LENGTH,
DateTimeFormatter.ofPattern(TIME_FORMAT))
+ REPLACE_CHAR
+ RandomStringUtils.randomAlphanumeric(6).toLowerCase();
}

/**
Expand Down
Loading
Loading