From 13e75aeef42817cfad9a422966ea27b3409850e9 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:16:55 +0200 Subject: [PATCH 01/60] Create token_consistency.yaml --- .github/workflows/token_consistency.yaml | 60 ++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 .github/workflows/token_consistency.yaml diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml new file mode 100644 index 00000000..1e8f1ab7 --- /dev/null +++ b/.github/workflows/token_consistency.yaml @@ -0,0 +1,60 @@ +name: Check consistency of tokens.txt file + +on: [push, pull_request] + +jobs: + check_tokens: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Get previous tokens.txt version + run: | + git fetch origin main + git diff origin/main -- chebai/preprocessing/bin/smiles_token/tokens.txt > tokens_diff.txt || echo "No previous tokens.txt found" + + - name: Check for deleted or added lines in tokens.txt + run: | + if [ -f tokens_diff.txt ]; then + + # Check for deleted lines (lines starting with '-') + deleted_lines=$(grep '^-' tokens_diff.txt | grep -v '^---' | sed 's/^-//' || true) + if [ -n "$deleted_lines" ]; then + echo "Error: Lines have been deleted from tokens.txt. file" + echo -e "Deleted Lines: \n$deleted_lines" + exit 1 + fi + + # Check for added lines (lines starting with '+') + added_lines=$(grep '^+' tokens_diff.txt | grep -v '^+++' | sed 's/^+//' || true) + if [ -n "$added_lines" ]; then + + # Count how many lines have been added + num_added_lines=$(echo "$added_lines" | wc -l) + + # Get last `n` lines (equal to num_added_lines) of tokens.tx + last_lines=$(tail -n "$num_added_lines" chebai/preprocessing/bin/smiles_token/tokens.txt) + + # Check if the added lines are at the end of the file + if [ "$added_lines" != "$last_lines" ]; then + + # Find lines that were added but not appended at the end of the file + non_appended_lines=$(diff <(echo "$added_lines") <(echo "$last_lines") | grep '^<' | sed 's/^< //') + + echo "Error: New lines have been added, but they are not at the end of tokens.txt." + echo -e "Added lines that are not at end of file: \n$non_appended_lines" + exit 1 + fi + fi + + if [ "$added_lines" == "" ]; then + echo "tokens.txt validation successful: No lines were deleted, and no new lines were added." + else + echo "tokens.txt validation successful: No lines were deleted, and new lines were correctly appended at the end." + fi + + else + echo "No previous version of tokens.txt found." + fi From 22cfd3e9b76aa9801402fa4467027aa206b08c5a Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:19:57 +0200 Subject: [PATCH 02/60] dev branch instead of main --- .github/workflows/token_consistency.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index 1e8f1ab7..58b12a3c 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -12,8 +12,8 @@ jobs: - name: Get previous tokens.txt version run: | - git fetch origin main - git diff origin/main -- chebai/preprocessing/bin/smiles_token/tokens.txt > tokens_diff.txt || echo "No previous tokens.txt found" + git fetch origin dev + git diff origin/dev -- chebai/preprocessing/bin/smiles_token/tokens.txt > tokens_diff.txt || echo "No previous tokens.txt found" - name: Check for deleted or added lines in tokens.txt run: | From f40e2d0f6dde8e0414b3a40e36f4cb8a264de992 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:22:44 +0200 Subject: [PATCH 03/60] global variable for token file path --- .github/workflows/token_consistency.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index 58b12a3c..727305f6 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -10,10 +10,15 @@ jobs: - name: Checkout code uses: actions/checkout@v2 + - name: Set global variable for tokens.txt path + run: | + TOKENS_FILE_PATH="chebai/preprocessing/bin/smiles_token/tokens.txt" + echo "TOKENS_FILE_PATH=$TOKENS_FILE_PATH" >> $GITHUB_ENV + - name: Get previous tokens.txt version run: | git fetch origin dev - git diff origin/dev -- chebai/preprocessing/bin/smiles_token/tokens.txt > tokens_diff.txt || echo "No previous tokens.txt found" + git diff origin/dev -- $TOKENS_FILE_PATH > tokens_diff.txt || echo "No previous tokens.txt found" - name: Check for deleted or added lines in tokens.txt run: | @@ -35,7 +40,7 @@ jobs: num_added_lines=$(echo "$added_lines" | wc -l) # Get last `n` lines (equal to num_added_lines) of tokens.tx - last_lines=$(tail -n "$num_added_lines" chebai/preprocessing/bin/smiles_token/tokens.txt) + last_lines=$(tail -n "$num_added_lines" $TOKENS_FILE_PATH) # Check if the added lines are at the end of the file if [ "$added_lines" != "$last_lines" ]; then From b49476dabfc2499a8a5b13787e387bc73cfe60a2 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:25:37 +0200 Subject: [PATCH 04/60] test deletion of token --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..de4d4b8d 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -20,7 +20,6 @@ O [Mg+2] [Ca++] [Ca+2] -[Na+] [Co++] [Sr++] Cl From 2155bc180452168a32688689f1ddecc3d706cc08 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:26:20 +0200 Subject: [PATCH 05/60] Revert "test deletion of token" This reverts commit b49476dabfc2499a8a5b13787e387bc73cfe60a2. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index de4d4b8d..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -20,6 +20,7 @@ O [Mg+2] [Ca++] [Ca+2] +[Na+] [Co++] [Sr++] Cl From 24abd493e5bfde4930a14437b00e2ca06bd8518c Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:27:11 +0200 Subject: [PATCH 06/60] test adding token in the end of file --- chebai/preprocessing/bin/smiles_token/tokens.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..222e4dfb 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -769,3 +769,6 @@ p [14CH3] [HH] [CH3-] +token1 +token2 +token3 From 62719badb0ec546619bf61d1bbd2f3154702ba0b Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:27:48 +0200 Subject: [PATCH 07/60] Revert "test adding token in the end of file" This reverts commit 24abd493e5bfde4930a14437b00e2ca06bd8518c. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 222e4dfb..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -769,6 +769,3 @@ p [14CH3] [HH] [CH3-] -token1 -token2 -token3 From a797a35ad9ad0a7efdbc094dd651846f6c6dcebd Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:28:46 +0200 Subject: [PATCH 08/60] tokens added in between the file + end of file --- chebai/preprocessing/bin/smiles_token/tokens.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..82de0259 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -761,11 +761,14 @@ p [Ta-] [6Li] [19O] +token3 [194Po] [Nb] [45Sc] [Nd] +token1 [Ti+3] [14CH3] [HH] [CH3-] +token 4 From 8c51420ca13e6de1d7d2dcc0216bc9f537d03009 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:29:32 +0200 Subject: [PATCH 09/60] Revert "tokens added in between the file + end of file" This reverts commit a797a35ad9ad0a7efdbc094dd651846f6c6dcebd. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 82de0259..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -761,14 +761,11 @@ p [Ta-] [6Li] [19O] -token3 [194Po] [Nb] [45Sc] [Nd] -token1 [Ti+3] [14CH3] [HH] [CH3-] -token 4 From 9972ff4a1ddfa0f6c3c10b50e5da6458c89f3f69 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:40:40 +0200 Subject: [PATCH 10/60] trigger action only if changes are made to this token file --- .github/workflows/token_consistency.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index 727305f6..885703d6 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -1,6 +1,12 @@ name: Check consistency of tokens.txt file -on: [push, pull_request] +on: + push: + paths: + - "chebai/preprocessing/bin/smiles_token/tokens.txt" + pull_request: + paths: + - "chebai/preprocessing/bin/smiles_token/tokens.txt" jobs: check_tokens: From 29809609e5a425f5cffe69b94786ea34aa58a4a4 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:42:08 +0200 Subject: [PATCH 11/60] test trigger --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..bfb6b2a1 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -761,7 +761,6 @@ p [Ta-] [6Li] [19O] -[194Po] [Nb] [45Sc] [Nd] From 15a9dcb4ff77e9bef57df0ba8e53a53a9084ea6a Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 17:42:28 +0200 Subject: [PATCH 12/60] Revert "test trigger" This reverts commit 29809609e5a425f5cffe69b94786ea34aa58a4a4. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index bfb6b2a1..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -761,6 +761,7 @@ p [Ta-] [6Li] [19O] +[194Po] [Nb] [45Sc] [Nd] From e2ba5d90c1c2464e9020442ca6e99f3f4ff96cb2 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 19:25:08 +0200 Subject: [PATCH 13/60] code to print name of files changed --- .github/workflows/token_consistency.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index 885703d6..febb87eb 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -4,9 +4,21 @@ on: push: paths: - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" pull_request: paths: - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" jobs: check_tokens: @@ -16,6 +28,18 @@ jobs: - name: Checkout code uses: actions/checkout@v2 + - name: Get list of changed files + id: changed_files + run: | + # Get the list of changed files and save them to a file + git diff --name-only origin/main > changed_files.txt + + # Print the names of changed files on separate lines + echo "Changed files:" + while read -r line; do + echo "Changed File name : $line" + done < changed_files.txt + - name: Set global variable for tokens.txt path run: | TOKENS_FILE_PATH="chebai/preprocessing/bin/smiles_token/tokens.txt" From 3ffd35989f9038759ba5950fd2ac45cea732c1c6 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 19:26:25 +0200 Subject: [PATCH 14/60] test if print changed files names work --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..7aa9337e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -506,7 +506,6 @@ p [Og] [Cr++] [Mo-5] -[W-4] [Re+] [Os+] [Ru-3] From 9295ced7318e599f5695db98ca33da2b7c861062 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 19:28:37 +0200 Subject: [PATCH 15/60] dev branch --- .github/workflows/token_consistency.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index febb87eb..66069f50 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -32,7 +32,7 @@ jobs: id: changed_files run: | # Get the list of changed files and save them to a file - git diff --name-only origin/main > changed_files.txt + git diff --name-only origin/dev > changed_files.txt # Print the names of changed files on separate lines echo "Changed files:" From a14cd25aa7ef341be66f227df5082eeffae61a1b Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sun, 20 Oct 2024 19:30:08 +0200 Subject: [PATCH 16/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 7aa9337e..3208d056 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -506,7 +506,6 @@ p [Og] [Cr++] [Mo-5] -[Re+] [Os+] [Ru-3] [Ru-4] From bf709d66c8ce2c9219cc4a2a07e5f97134fd7eff Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:18:29 +0200 Subject: [PATCH 17/60] Update token_consistency.yaml --- .github/workflows/token_consistency.yaml | 124 ++++++++++++++--------- 1 file changed, 75 insertions(+), 49 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index 66069f50..d50b02ae 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -27,11 +27,16 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history to ensure 'origin/dev' is available - name: Get list of changed files id: changed_files run: | - # Get the list of changed files and save them to a file + # Ensure origin/dev is set as the default branch reference + git fetch origin dev + + # Get the list of changed files compared to origin/dev and save them to a file git diff --name-only origin/dev > changed_files.txt # Print the names of changed files on separate lines @@ -40,56 +45,77 @@ jobs: echo "Changed File name : $line" done < changed_files.txt - - name: Set global variable for tokens.txt path - run: | - TOKENS_FILE_PATH="chebai/preprocessing/bin/smiles_token/tokens.txt" - echo "TOKENS_FILE_PATH=$TOKENS_FILE_PATH" >> $GITHUB_ENV - - - name: Get previous tokens.txt version + - name: Set global variable for multiple tokens.txt paths run: | - git fetch origin dev - git diff origin/dev -- $TOKENS_FILE_PATH > tokens_diff.txt || echo "No previous tokens.txt found" - - - name: Check for deleted or added lines in tokens.txt + # Define an array with all the paths of tokens.txt files + TOKENS_FILES=( + "chebai/preprocessing/bin/smiles_token/tokens.txt" + "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + "chebai/preprocessing/bin/selfies/tokens.txt" + "chebai/preprocessing/bin/protein_token/tokens.txt" + "chebai/preprocessing/bin/graph_properties/tokens.txt" + "chebai/preprocessing/bin/graph/tokens.txt" + "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + ) + echo "TOKENS_FILES=${TOKENS_FILES[*]}" >> $GITHUB_ENV + + - name: Process only changed tokens.txt files run: | - if [ -f tokens_diff.txt ]; then - - # Check for deleted lines (lines starting with '-') - deleted_lines=$(grep '^-' tokens_diff.txt | grep -v '^---' | sed 's/^-//' || true) - if [ -n "$deleted_lines" ]; then - echo "Error: Lines have been deleted from tokens.txt. file" - echo -e "Deleted Lines: \n$deleted_lines" - exit 1 - fi - - # Check for added lines (lines starting with '+') - added_lines=$(grep '^+' tokens_diff.txt | grep -v '^+++' | sed 's/^+//' || true) - if [ -n "$added_lines" ]; then - - # Count how many lines have been added - num_added_lines=$(echo "$added_lines" | wc -l) - - # Get last `n` lines (equal to num_added_lines) of tokens.tx - last_lines=$(tail -n "$num_added_lines" $TOKENS_FILE_PATH) - - # Check if the added lines are at the end of the file - if [ "$added_lines" != "$last_lines" ]; then - - # Find lines that were added but not appended at the end of the file - non_appended_lines=$(diff <(echo "$added_lines") <(echo "$last_lines") | grep '^<' | sed 's/^< //') - - echo "Error: New lines have been added, but they are not at the end of tokens.txt." - echo -e "Added lines that are not at end of file: \n$non_appended_lines" - exit 1 + # Convert the TOKENS_FILES environment variable into an array + TOKENS_FILES=(${TOKENS_FILES}) + + # Iterate over each token file path + for TOKENS_FILE_PATH in "${TOKENS_FILES[@]}"; do + # Check if the current token file path is in the list of changed files + if grep -q "$TOKENS_FILE_PATH" changed_files.txt; then + echo "----------------------- Processing $TOKENS_FILE_PATH -----------------------" + + # Get previous tokens.txt version + git fetch origin dev + git diff origin/dev -- $TOKENS_FILE_PATH > tokens_diff.txt || echo "No previous tokens.txt found for $TOKENS_FILE_PATH" + + # Check for deleted or added lines in tokens.txt + if [ -f tokens_diff.txt ]; then + + # Check for deleted lines (lines starting with '-') + deleted_lines=$(grep '^-' tokens_diff.txt | grep -v '^---' | sed 's/^-//' || true) + if [ -n "$deleted_lines" ]; then + echo "Error: Lines have been deleted from $TOKENS_FILE_PATH." + echo -e "Deleted Lines: \n$deleted_lines" + exit 1 + fi + + # Check for added lines (lines starting with '+') + added_lines=$(grep '^+' tokens_diff.txt | grep -v '^+++' | sed 's/^+//' || true) + if [ -n "$added_lines" ]; then + + # Count how many lines have been added + num_added_lines=$(echo "$added_lines" | wc -l) + + # Get last `n` lines (equal to num_added_lines) of tokens.txt + last_lines=$(tail -n "$num_added_lines" $TOKENS_FILE_PATH) + + # Check if the added lines are at the end of the file + if [ "$added_lines" != "$last_lines" ]; then + + # Find lines that were added but not appended at the end of the file + non_appended_lines=$(diff <(echo "$added_lines") <(echo "$last_lines") | grep '^<' | sed 's/^< //') + + echo "Error: New lines have been added to $TOKENS_FILE_PATH, but they are not at the end of the file." + echo -e "Added lines that are not at the end of the file: \n$non_appended_lines" + exit 1 + fi + fi + + if [ "$added_lines" == "" ]; then + echo "$TOKENS_FILE_PATH validation successful: No lines were deleted, and no new lines were added." + else + echo "$TOKENS_FILE_PATH validation successful: No lines were deleted, and new lines were correctly appended at the end." + fi + else + echo "No previous version of $TOKENS_FILE_PATH found." fi - fi - - if [ "$added_lines" == "" ]; then - echo "tokens.txt validation successful: No lines were deleted, and no new lines were added." else - echo "tokens.txt validation successful: No lines were deleted, and new lines were correctly appended at the end." + echo "$TOKENS_FILE_PATH was not changed, skipping." fi - - else - echo "No previous version of tokens.txt found." - fi + done From 7fdcd4ac500c02aee2dad039045cb6aa93ca9fc7 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:21:07 +0200 Subject: [PATCH 18/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 3208d056..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -506,6 +506,8 @@ p [Og] [Cr++] [Mo-5] +[W-4] +[Re+] [Os+] [Ru-3] [Ru-4] From 50022a61a810ad8928c27931cd694969de915b16 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:34:05 +0200 Subject: [PATCH 19/60] Update token_consistency.yaml --- .github/workflows/token_consistency.yaml | 27 ++++++++++-------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index d50b02ae..9cfc7c0a 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -1,24 +1,19 @@ name: Check consistency of tokens.txt file +paths: &TOKENS_FILES + - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + on: push: - paths: - - "chebai/preprocessing/bin/smiles_token/tokens.txt" - - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" - - "chebai/preprocessing/bin/selfies/tokens.txt" - - "chebai/preprocessing/bin/protein_token/tokens.txt" - - "chebai/preprocessing/bin/graph_properties/tokens.txt" - - "chebai/preprocessing/bin/graph/tokens.txt" - - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + paths: *TOKENS_FILES pull_request: - paths: - - "chebai/preprocessing/bin/smiles_token/tokens.txt" - - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" - - "chebai/preprocessing/bin/selfies/tokens.txt" - - "chebai/preprocessing/bin/protein_token/tokens.txt" - - "chebai/preprocessing/bin/graph_properties/tokens.txt" - - "chebai/preprocessing/bin/graph/tokens.txt" - - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + paths: *TOKENS_FILES jobs: check_tokens: From 8376098784c9d398c7acef6234acc06907952f19 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:35:26 +0200 Subject: [PATCH 20/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..282e4d20 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -514,7 +514,6 @@ p [63Cu] [Ag++] [V-4] -[V--] [V-] [51V] [Ni+4] From e253b305693f96f69159c7234e5b294822240616 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:37:54 +0200 Subject: [PATCH 21/60] Revert "Update token_consistency.yaml" This reverts commit 50022a61a810ad8928c27931cd694969de915b16. --- .github/workflows/token_consistency.yaml | 27 ++++++++++++++---------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index 9cfc7c0a..d50b02ae 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -1,19 +1,24 @@ name: Check consistency of tokens.txt file -paths: &TOKENS_FILES - - "chebai/preprocessing/bin/smiles_token/tokens.txt" - - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" - - "chebai/preprocessing/bin/selfies/tokens.txt" - - "chebai/preprocessing/bin/protein_token/tokens.txt" - - "chebai/preprocessing/bin/graph_properties/tokens.txt" - - "chebai/preprocessing/bin/graph/tokens.txt" - - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" - on: push: - paths: *TOKENS_FILES + paths: + - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" pull_request: - paths: *TOKENS_FILES + paths: + - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" jobs: check_tokens: From f257e7fea56005650bfcbd318d602e4bddac86eb Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:38:27 +0200 Subject: [PATCH 22/60] Update token_consistency.yaml --- .github/workflows/token_consistency.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index d50b02ae..ebd654ed 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -1,5 +1,9 @@ name: Check consistency of tokens.txt file +# **Note** : To add a new token file for checks, include its path in: +# - `on` -> `push` and `pull_request` sections +# - `jobs` -> `check_tokens` -> `steps` -> Set global variable for multiple tokens.txt paths -> `TOKENS_FILES` + on: push: paths: From 6ad06f0a68febc132980ef0056ab30ca23daa5d1 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:39:18 +0200 Subject: [PATCH 23/60] Revert "Update tokens.txt" This reverts commit 8376098784c9d398c7acef6234acc06907952f19. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 282e4d20..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -514,6 +514,7 @@ p [63Cu] [Ag++] [V-4] +[V--] [V-] [51V] [Ni+4] From a3ecc7f62fb0d6863a20d8295f8aaf9f73e4b36d Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:39:40 +0200 Subject: [PATCH 24/60] Update tokens.txt --- chebai/preprocessing/bin/protein_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/protein_token/tokens.txt b/chebai/preprocessing/bin/protein_token/tokens.txt index 72ad1b6d..bd11b8ed 100644 --- a/chebai/preprocessing/bin/protein_token/tokens.txt +++ b/chebai/preprocessing/bin/protein_token/tokens.txt @@ -11,6 +11,7 @@ N D K Y +s P C F From 387ca60c01f6bf9ea2011ff0aaec53e5ceef287e Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:41:32 +0200 Subject: [PATCH 25/60] Update tokens.txt --- chebai/preprocessing/bin/protein_token/tokens.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/chebai/preprocessing/bin/protein_token/tokens.txt b/chebai/preprocessing/bin/protein_token/tokens.txt index bd11b8ed..5f04cd0d 100644 --- a/chebai/preprocessing/bin/protein_token/tokens.txt +++ b/chebai/preprocessing/bin/protein_token/tokens.txt @@ -11,8 +11,6 @@ N D K Y -s -P C F W From da3eb4d4a0fdc9f74795d9c80c579d4037170ea6 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:42:26 +0200 Subject: [PATCH 26/60] Update tokens.txt --- chebai/preprocessing/bin/protein_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/protein_token/tokens.txt b/chebai/preprocessing/bin/protein_token/tokens.txt index 5f04cd0d..72ad1b6d 100644 --- a/chebai/preprocessing/bin/protein_token/tokens.txt +++ b/chebai/preprocessing/bin/protein_token/tokens.txt @@ -11,6 +11,7 @@ N D K Y +P C F W From 9d69eb950c5f2e304ec6feed55740d774b2af5b2 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:48:50 +0200 Subject: [PATCH 27/60] Update token_consistency.yaml --- .github/workflows/token_consistency.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index ebd654ed..e899e585 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -1,5 +1,8 @@ name: Check consistency of tokens.txt file +# Define the file paths under `paths` to trigger this check only when specific files are modified. +# This script will then execute checks only on files that have changed, rather than all files listed in `paths`. + # **Note** : To add a new token file for checks, include its path in: # - `on` -> `push` and `pull_request` sections # - `jobs` -> `check_tokens` -> `steps` -> Set global variable for multiple tokens.txt paths -> `TOKENS_FILES` @@ -31,13 +34,12 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v2 - with: - fetch-depth: 0 # Fetch all history to ensure 'origin/dev' is available +# with: +# fetch-depth: 0 # Fetch all history to ensure 'origin/dev' is available - name: Get list of changed files id: changed_files run: | - # Ensure origin/dev is set as the default branch reference git fetch origin dev # Get the list of changed files compared to origin/dev and save them to a file @@ -51,7 +53,7 @@ jobs: - name: Set global variable for multiple tokens.txt paths run: | - # Define an array with all the paths of tokens.txt files + # All token files that needs to checked must be included here too, same as in `paths`. TOKENS_FILES=( "chebai/preprocessing/bin/smiles_token/tokens.txt" "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" From dad562ae1f8a9e4311fc040a71c16c098c0dfd91 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:49:24 +0200 Subject: [PATCH 28/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..8089d1ae 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -513,7 +513,6 @@ p [Ru-4] [63Cu] [Ag++] -[V-4] [V--] [V-] [51V] From 66ba760d66342648e36c67fd098c154cb92f9772 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:50:30 +0200 Subject: [PATCH 29/60] Revert "Update tokens.txt" This reverts commit dad562ae1f8a9e4311fc040a71c16c098c0dfd91. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 8089d1ae..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -513,6 +513,7 @@ p [Ru-4] [63Cu] [Ag++] +[V-4] [V--] [V-] [51V] From 34792a1fc36f0f45a53dd4b480427de608177bf3 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:50:45 +0200 Subject: [PATCH 30/60] Update token_consistency.yaml --- .github/workflows/token_consistency.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index e899e585..fd36f8e0 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -34,8 +34,6 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v2 -# with: -# fetch-depth: 0 # Fetch all history to ensure 'origin/dev' is available - name: Get list of changed files id: changed_files From 30c7b661377f3c97b35dcdc5fca91fb97f134071 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:51:29 +0200 Subject: [PATCH 31/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 6f4c338e..81ded0a7 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -517,6 +517,7 @@ p [V--] [V-] [51V] +s [Ni+4] [Rh+3] [Th] From 6b0bcf066728d2002ac1708aaf78c18dc5344cc9 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Sat, 26 Oct 2024 11:56:03 +0200 Subject: [PATCH 32/60] add workflow for token files - workflow tested in dummy PR #62 --- .github/workflows/token_consistency.yaml | 125 +++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 .github/workflows/token_consistency.yaml diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml new file mode 100644 index 00000000..fd36f8e0 --- /dev/null +++ b/.github/workflows/token_consistency.yaml @@ -0,0 +1,125 @@ +name: Check consistency of tokens.txt file + +# Define the file paths under `paths` to trigger this check only when specific files are modified. +# This script will then execute checks only on files that have changed, rather than all files listed in `paths`. + +# **Note** : To add a new token file for checks, include its path in: +# - `on` -> `push` and `pull_request` sections +# - `jobs` -> `check_tokens` -> `steps` -> Set global variable for multiple tokens.txt paths -> `TOKENS_FILES` + +on: + push: + paths: + - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + pull_request: + paths: + - "chebai/preprocessing/bin/smiles_token/tokens.txt" + - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + - "chebai/preprocessing/bin/selfies/tokens.txt" + - "chebai/preprocessing/bin/protein_token/tokens.txt" + - "chebai/preprocessing/bin/graph_properties/tokens.txt" + - "chebai/preprocessing/bin/graph/tokens.txt" + - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + +jobs: + check_tokens: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Get list of changed files + id: changed_files + run: | + git fetch origin dev + + # Get the list of changed files compared to origin/dev and save them to a file + git diff --name-only origin/dev > changed_files.txt + + # Print the names of changed files on separate lines + echo "Changed files:" + while read -r line; do + echo "Changed File name : $line" + done < changed_files.txt + + - name: Set global variable for multiple tokens.txt paths + run: | + # All token files that needs to checked must be included here too, same as in `paths`. + TOKENS_FILES=( + "chebai/preprocessing/bin/smiles_token/tokens.txt" + "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt" + "chebai/preprocessing/bin/selfies/tokens.txt" + "chebai/preprocessing/bin/protein_token/tokens.txt" + "chebai/preprocessing/bin/graph_properties/tokens.txt" + "chebai/preprocessing/bin/graph/tokens.txt" + "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + ) + echo "TOKENS_FILES=${TOKENS_FILES[*]}" >> $GITHUB_ENV + + - name: Process only changed tokens.txt files + run: | + # Convert the TOKENS_FILES environment variable into an array + TOKENS_FILES=(${TOKENS_FILES}) + + # Iterate over each token file path + for TOKENS_FILE_PATH in "${TOKENS_FILES[@]}"; do + # Check if the current token file path is in the list of changed files + if grep -q "$TOKENS_FILE_PATH" changed_files.txt; then + echo "----------------------- Processing $TOKENS_FILE_PATH -----------------------" + + # Get previous tokens.txt version + git fetch origin dev + git diff origin/dev -- $TOKENS_FILE_PATH > tokens_diff.txt || echo "No previous tokens.txt found for $TOKENS_FILE_PATH" + + # Check for deleted or added lines in tokens.txt + if [ -f tokens_diff.txt ]; then + + # Check for deleted lines (lines starting with '-') + deleted_lines=$(grep '^-' tokens_diff.txt | grep -v '^---' | sed 's/^-//' || true) + if [ -n "$deleted_lines" ]; then + echo "Error: Lines have been deleted from $TOKENS_FILE_PATH." + echo -e "Deleted Lines: \n$deleted_lines" + exit 1 + fi + + # Check for added lines (lines starting with '+') + added_lines=$(grep '^+' tokens_diff.txt | grep -v '^+++' | sed 's/^+//' || true) + if [ -n "$added_lines" ]; then + + # Count how many lines have been added + num_added_lines=$(echo "$added_lines" | wc -l) + + # Get last `n` lines (equal to num_added_lines) of tokens.txt + last_lines=$(tail -n "$num_added_lines" $TOKENS_FILE_PATH) + + # Check if the added lines are at the end of the file + if [ "$added_lines" != "$last_lines" ]; then + + # Find lines that were added but not appended at the end of the file + non_appended_lines=$(diff <(echo "$added_lines") <(echo "$last_lines") | grep '^<' | sed 's/^< //') + + echo "Error: New lines have been added to $TOKENS_FILE_PATH, but they are not at the end of the file." + echo -e "Added lines that are not at the end of the file: \n$non_appended_lines" + exit 1 + fi + fi + + if [ "$added_lines" == "" ]; then + echo "$TOKENS_FILE_PATH validation successful: No lines were deleted, and no new lines were added." + else + echo "$TOKENS_FILE_PATH validation successful: No lines were deleted, and new lines were correctly appended at the end." + fi + else + echo "No previous version of $TOKENS_FILE_PATH found." + fi + else + echo "$TOKENS_FILE_PATH was not changed, skipping." + fi + done From ba800d944b4e3e48339631a7430b16460d4d0398 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Wed, 30 Oct 2024 19:37:21 +0100 Subject: [PATCH 33/60] add protein trigram token file to action workflow --- .github/workflows/token_consistency.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/token_consistency.yaml b/.github/workflows/token_consistency.yaml index fd36f8e0..06c3a42e 100644 --- a/.github/workflows/token_consistency.yaml +++ b/.github/workflows/token_consistency.yaml @@ -17,6 +17,7 @@ on: - "chebai/preprocessing/bin/graph_properties/tokens.txt" - "chebai/preprocessing/bin/graph/tokens.txt" - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + - "chebai/preprocessing/bin/protein_token_3_gram/tokens.txt" pull_request: paths: - "chebai/preprocessing/bin/smiles_token/tokens.txt" @@ -26,6 +27,7 @@ on: - "chebai/preprocessing/bin/graph_properties/tokens.txt" - "chebai/preprocessing/bin/graph/tokens.txt" - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + - "chebai/preprocessing/bin/protein_token_3_gram/tokens.txt" jobs: check_tokens: @@ -60,6 +62,7 @@ jobs: "chebai/preprocessing/bin/graph_properties/tokens.txt" "chebai/preprocessing/bin/graph/tokens.txt" "chebai/preprocessing/bin/deepsmiles_token/tokens.txt" + "chebai/preprocessing/bin/protein_token_3_gram/tokens.txt" ) echo "TOKENS_FILES=${TOKENS_FILES[*]}" >> $GITHUB_ENV From fca8c30859cfa1ba568e362e3c7cc79472bdca4f Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Wed, 30 Oct 2024 23:43:32 +0100 Subject: [PATCH 34/60] add py script to export constants to json --- .github/workflows/export_constants.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/workflows/export_constants.py diff --git a/.github/workflows/export_constants.py b/.github/workflows/export_constants.py new file mode 100644 index 00000000..aac4e647 --- /dev/null +++ b/.github/workflows/export_constants.py @@ -0,0 +1,15 @@ +import json + +import chebai.preprocessing.reader as dr + +# Define the constants you want to export +constants = { + "EMBEDDING_OFFSET": dr.EMBEDDING_OFFSET, + "CLS_TOKEN": dr.CLS_TOKEN, + "PADDING_TOKEN_INDEX": dr.PADDING_TOKEN_INDEX, + "MASK_TOKEN_INDEX": dr.MASK_TOKEN_INDEX, +} + +# Write constants to a JSON file +with open("constants.json", "w") as f: + json.dump(constants, f) From 4e4264d8896f3f326af60c284f38581b093f9f1a Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Wed, 30 Oct 2024 23:59:39 +0100 Subject: [PATCH 35/60] action for verifying constants --- .github/workflows/verify_constants.yml | 66 ++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/verify_constants.yml diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml new file mode 100644 index 00000000..02059f6d --- /dev/null +++ b/.github/workflows/verify_constants.yml @@ -0,0 +1,66 @@ +name: Verify Constants + +on: + push: + paths: + - "chebai/preprocessing/reader.py" + pull_request: + paths: + - "chebai/preprocessing/reader.py" + +jobs: + verify-constants: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Get list of changed files + id: changed_files + run: | + git fetch origin dev + + # Get the list of changed files compared to origin/dev and save them to a file + git diff --name-only origin/dev > changed_files.txt + + # Print the names of changed files on separate lines + echo "Changed files:" + while read -r line; do + echo "Changed File name : $line" + done < changed_files.txt + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Export constants + run: python export_constants.py + + - name: Load constants into environment variables + id: load_constants + run: | + constants=$(cat constants.json) + echo "$constants" | jq -r 'to_entries|map("export \(.key)=\(.value|tostring)")|.[]' >> $GITHUB_ENV + + - name: Verify constants + run: | + if grep -q "chebai/preprocessing/reader.py" changed_files.txt; then + if [ "$EMBEDDING_OFFSET" != "10" ]; then + echo "EMBEDDING_OFFSET does not match expected value!" + exit 1 + fi + if [ "$CLS_TOKEN" != "2" ]; then + echo "CLS_TOKEN does not match expected value!" + exit 1 + fi + if [ "$PADDING_TOKEN_INDEX" != "0" ]; then + echo "PADDING_TOKEN_INDEX does not match expected value!" + exit 1 + fi + if [ "$MASK_TOKEN_INDEX" != "1" ]; then + echo "MASK_TOKEN_INDEX does not match expected value!" + exit 1 + fi + fi From cacee60a02ae53103e4e7d7d1916ba4e55ca6bad Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:01:11 +0100 Subject: [PATCH 36/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 81ded0a7..443f2f93 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -48,7 +48,6 @@ N [Pt+2] [Hg] 2 -s [C@@H] [C@H] = From 9a93c0e208331b3c7e7f4c1efd4bcdc3d5fb9f95 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:02:04 +0100 Subject: [PATCH 37/60] Revert "Update tokens.txt" This reverts commit cacee60a02ae53103e4e7d7d1916ba4e55ca6bad. --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 443f2f93..81ded0a7 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -48,6 +48,7 @@ N [Pt+2] [Hg] 2 +s [C@@H] [C@H] = From 156634ef3a59431ad74b1b76b4fce6c509fb7e75 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:02:58 +0100 Subject: [PATCH 38/60] Update tokens.txt --- chebai/preprocessing/bin/smiles_token/tokens.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/chebai/preprocessing/bin/smiles_token/tokens.txt b/chebai/preprocessing/bin/smiles_token/tokens.txt index 81ded0a7..6f4c338e 100644 --- a/chebai/preprocessing/bin/smiles_token/tokens.txt +++ b/chebai/preprocessing/bin/smiles_token/tokens.txt @@ -517,7 +517,6 @@ p [V--] [V-] [51V] -s [Ni+4] [Rh+3] [Th] From 03041346d8c7e0106bc27518bf77e8dc305527a0 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:03:37 +0100 Subject: [PATCH 39/60] Update reader.py --- chebai/preprocessing/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index e220e1e4..4b06ae7d 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -8,7 +8,7 @@ from chebai.preprocessing.collate import DefaultCollator, RaggedCollator -EMBEDDING_OFFSET = 10 +EMBEDDING_OFFSET = 100 PADDING_TOKEN_INDEX = 0 MASK_TOKEN_INDEX = 1 CLS_TOKEN = 2 From 1f365bf64e9c5b159a81c7ffb3d1c2e1eeaf2b41 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:05:33 +0100 Subject: [PATCH 40/60] complete path of export file --- .github/workflows/verify_constants.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 02059f6d..93cbac4b 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -36,7 +36,7 @@ jobs: python-version: '3.x' - name: Export constants - run: python export_constants.py + run: python .github/workflows/export_constants.py - name: Load constants into environment variables id: load_constants From 37ce96078b0cead72b48fec6301a5c1b51501df7 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:08:07 +0100 Subject: [PATCH 41/60] set python path to root --- .github/workflows/verify_constants.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 93cbac4b..4a3c6735 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -16,6 +16,9 @@ jobs: - name: Checkout code uses: actions/checkout@v3 + - name: Set PYTHONPATH + run: echo "PYTHONPATH=$PWD" >> $GITHUB_ENV + - name: Get list of changed files id: changed_files run: | From aad1bca28a959b158de73d10993348381e742ba1 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:13:39 +0100 Subject: [PATCH 42/60] only import the constants instead of submodule --- .github/workflows/export_constants.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/export_constants.py b/.github/workflows/export_constants.py index aac4e647..6546d3a3 100644 --- a/.github/workflows/export_constants.py +++ b/.github/workflows/export_constants.py @@ -1,13 +1,18 @@ import json -import chebai.preprocessing.reader as dr +from chebai.preprocessing.reader import ( + CLS_TOKEN, + EMBEDDING_OFFSET, + MASK_TOKEN_INDEX, + PADDING_TOKEN_INDEX, +) # Define the constants you want to export constants = { - "EMBEDDING_OFFSET": dr.EMBEDDING_OFFSET, - "CLS_TOKEN": dr.CLS_TOKEN, - "PADDING_TOKEN_INDEX": dr.PADDING_TOKEN_INDEX, - "MASK_TOKEN_INDEX": dr.MASK_TOKEN_INDEX, + "EMBEDDING_OFFSET": EMBEDDING_OFFSET, + "CLS_TOKEN": CLS_TOKEN, + "PADDING_TOKEN_INDEX": PADDING_TOKEN_INDEX, + "MASK_TOKEN_INDEX": MASK_TOKEN_INDEX, } # Write constants to a JSON file From b5ffe28dd7e15207a5d24f72bc8fab200850f35c Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 00:16:06 +0100 Subject: [PATCH 43/60] install torch as chebai __init__ dependency --- .github/workflows/verify_constants.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 4a3c6735..e47fd613 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -38,6 +38,9 @@ jobs: with: python-version: '3.x' + - name: Install dependencies + run: pip install torch + - name: Export constants run: python .github/workflows/export_constants.py From a79423a4508e35a9e408031cd16c25ce75ed2676 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 11:53:29 +0100 Subject: [PATCH 44/60] install project dependencies --- .github/workflows/verify_constants.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index e47fd613..1d7e5586 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -39,7 +39,11 @@ jobs: python-version: '3.x' - name: Install dependencies - run: pip install torch + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade pip setuptools wheel + python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + python -m pip install -e . - name: Export constants run: python .github/workflows/export_constants.py From 9fdad04d557c456b1a7a3a61c3037087390c3c3b Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 11:57:54 +0100 Subject: [PATCH 45/60] set up right python versions --- .github/workflows/verify_constants.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 1d7e5586..6b4385e1 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -11,6 +11,10 @@ on: jobs: verify-constants: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "pypy3.9", "pypy3.10", "3.9", "3.10", "3.11" ] steps: - name: Checkout code @@ -33,10 +37,10 @@ jobs: echo "Changed File name : $line" done < changed_files.txt - - name: Set up Python - uses: actions/setup-python@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | From d1829682febf4403a6122f28d695a27838249293 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 12:54:51 +0100 Subject: [PATCH 46/60] Update verify_constants.yml --- .github/workflows/verify_constants.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 6b4385e1..c2d97352 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -18,7 +18,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set PYTHONPATH run: echo "PYTHONPATH=$PWD" >> $GITHUB_ENV From 4bb1c45e65850b9a913594b888fffe8cc9e3ce25 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 13:18:48 +0100 Subject: [PATCH 47/60] fix torch error by setting version ImportError: cannot import name 'T_co' from 'torch.utils.data.dataset' (/opt/hostedtoolcache/Python/3.10.15/x64/lib/python3.10/site-packages/torch/utils/data/dataset.py) torch version for above error: 2.5.1 --- .github/workflows/verify_constants.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index c2d97352..ad124d8a 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -46,7 +46,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade pip setuptools wheel - python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + python -m pip install torch==2.4.1 --index-url https://download.pytorch.org/whl/cpu python -m pip install -e . - name: Export constants From 125d4d63d5fc8a15a303fe20100fe53052b5d5e2 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 13:36:31 +0100 Subject: [PATCH 48/60] minor improvements in workflow --- .github/workflows/verify_constants.yml | 39 +++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index ad124d8a..7ac29e4e 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -14,7 +14,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "pypy3.9", "pypy3.10", "3.9", "3.10", "3.11" ] + python-version: [ +# Only use 3.10 as of now +# "3.9", + "3.10", +# "3.11" + ] steps: - name: Checkout code @@ -43,6 +48,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies + # Setting a fix version for torch due to an error with latest version (2.5.1) + # ImportError: cannot import name 'T_co' from 'torch.utils.data.dataset' run: | python -m pip install --upgrade pip python -m pip install --upgrade pip setuptools wheel @@ -60,21 +67,33 @@ jobs: - name: Verify constants run: | - if grep -q "chebai/preprocessing/reader.py" changed_files.txt; then - if [ "$EMBEDDING_OFFSET" != "10" ]; then - echo "EMBEDDING_OFFSET does not match expected value!" + file_name="chebai/preprocessing/reader.py" + if grep -q "$file_name" changed_files.txt; then + echo "----------------------- Checking file : $file_name ----------------------- " + + # Define expected values for constants + exp_embedding_offset="10" + exp_cls_token="2" + exp_padding_token_index="0" + exp_mask_token_index="1" + + # Verify constants match expected values + if [ "$EMBEDDING_OFFSET" != "$exp_embedding_offset" ]; then + echo "EMBEDDING_OFFSET ($EMBEDDING_OFFSET) does not match expected value ($exp_embedding_offset)!" exit 1 fi - if [ "$CLS_TOKEN" != "2" ]; then - echo "CLS_TOKEN does not match expected value!" + if [ "$CLS_TOKEN" != "$exp_cls_token" ]; then + echo "CLS_TOKEN ($CLS_TOKEN) does not match expected value ($exp_cls_token)!" exit 1 fi - if [ "$PADDING_TOKEN_INDEX" != "0" ]; then - echo "PADDING_TOKEN_INDEX does not match expected value!" + if [ "$PADDING_TOKEN_INDEX" != "$exp_padding_token_index" ]; then + echo "PADDING_TOKEN_INDEX ($PADDING_TOKEN_INDEX) does not match expected value ($exp_padding_token_index)!" exit 1 fi - if [ "$MASK_TOKEN_INDEX" != "1" ]; then - echo "MASK_TOKEN_INDEX does not match expected value!" + if [ "$MASK_TOKEN_INDEX" != "$exp_mask_token_index" ]; then + echo "MASK_TOKEN_INDEX ($MASK_TOKEN_INDEX) does not match expected value ($exp_mask_token_index)!" exit 1 fi + else + echo "$file_name not found in changed_files.txt; skipping check." fi From debf5b5d2bd706b1896fb909fe3079d4dae8e8f5 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 13:42:41 +0100 Subject: [PATCH 49/60] code to print all env variables --- .github/workflows/verify_constants.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 7ac29e4e..3f34a3f6 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -65,6 +65,9 @@ jobs: constants=$(cat constants.json) echo "$constants" | jq -r 'to_entries|map("export \(.key)=\(.value|tostring)")|.[]' >> $GITHUB_ENV + - name: Print all environment variables + run: printenv + - name: Verify constants run: | file_name="chebai/preprocessing/reader.py" From 020b040b3d05b17e6723001cf5a4cfc0e0797b85 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 13:51:42 +0100 Subject: [PATCH 50/60] add E_ suffix to each set env variable --- .github/workflows/verify_constants.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 3f34a3f6..2342c8b0 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -61,9 +61,10 @@ jobs: - name: Load constants into environment variables id: load_constants + # "E_" is appended as suffix to every constant, to protect overwriting other sys env variables with same name run: | constants=$(cat constants.json) - echo "$constants" | jq -r 'to_entries|map("export \(.key)=\(.value|tostring)")|.[]' >> $GITHUB_ENV + echo "$constants" | jq -r 'to_entries|map("E_\(.key)=\(.value|tostring)")|.[]' >> $GITHUB_ENV - name: Print all environment variables run: printenv @@ -81,19 +82,19 @@ jobs: exp_mask_token_index="1" # Verify constants match expected values - if [ "$EMBEDDING_OFFSET" != "$exp_embedding_offset" ]; then + if [ "$E_EMBEDDING_OFFSET" != "$exp_embedding_offset" ]; then echo "EMBEDDING_OFFSET ($EMBEDDING_OFFSET) does not match expected value ($exp_embedding_offset)!" exit 1 fi - if [ "$CLS_TOKEN" != "$exp_cls_token" ]; then + if [ "$E_CLS_TOKEN" != "$exp_cls_token" ]; then echo "CLS_TOKEN ($CLS_TOKEN) does not match expected value ($exp_cls_token)!" exit 1 fi - if [ "$PADDING_TOKEN_INDEX" != "$exp_padding_token_index" ]; then + if [ "$E_PADDING_TOKEN_INDEX" != "$exp_padding_token_index" ]; then echo "PADDING_TOKEN_INDEX ($PADDING_TOKEN_INDEX) does not match expected value ($exp_padding_token_index)!" exit 1 fi - if [ "$MASK_TOKEN_INDEX" != "$exp_mask_token_index" ]; then + if [ "$E_MASK_TOKEN_INDEX" != "$exp_mask_token_index" ]; then echo "MASK_TOKEN_INDEX ($MASK_TOKEN_INDEX) does not match expected value ($exp_mask_token_index)!" exit 1 fi From 3f5a7fef1c5c723e3b3ed0ef0f430d029be83e9a Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 13:52:35 +0100 Subject: [PATCH 51/60] Update verify_constants.yml --- .github/workflows/verify_constants.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 2342c8b0..4cd6ba08 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -83,19 +83,19 @@ jobs: # Verify constants match expected values if [ "$E_EMBEDDING_OFFSET" != "$exp_embedding_offset" ]; then - echo "EMBEDDING_OFFSET ($EMBEDDING_OFFSET) does not match expected value ($exp_embedding_offset)!" + echo "EMBEDDING_OFFSET ($E_EMBEDDING_OFFSET) does not match expected value ($exp_embedding_offset)!" exit 1 fi if [ "$E_CLS_TOKEN" != "$exp_cls_token" ]; then - echo "CLS_TOKEN ($CLS_TOKEN) does not match expected value ($exp_cls_token)!" + echo "CLS_TOKEN ($E_CLS_TOKEN) does not match expected value ($exp_cls_token)!" exit 1 fi if [ "$E_PADDING_TOKEN_INDEX" != "$exp_padding_token_index" ]; then - echo "PADDING_TOKEN_INDEX ($PADDING_TOKEN_INDEX) does not match expected value ($exp_padding_token_index)!" + echo "PADDING_TOKEN_INDEX ($E_PADDING_TOKEN_INDEX) does not match expected value ($exp_padding_token_index)!" exit 1 fi if [ "$E_MASK_TOKEN_INDEX" != "$exp_mask_token_index" ]; then - echo "MASK_TOKEN_INDEX ($MASK_TOKEN_INDEX) does not match expected value ($exp_mask_token_index)!" + echo "MASK_TOKEN_INDEX ($E_MASK_TOKEN_INDEX) does not match expected value ($exp_mask_token_index)!" exit 1 fi else From ceb4e70cb9c77e870d2ee66333b664a4c787f612 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:02:38 +0100 Subject: [PATCH 52/60] Update reader.py --- chebai/preprocessing/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index 4b06ae7d..bf6bba42 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -8,9 +8,9 @@ from chebai.preprocessing.collate import DefaultCollator, RaggedCollator -EMBEDDING_OFFSET = 100 +EMBEDDING_OFFSET = 10 PADDING_TOKEN_INDEX = 0 -MASK_TOKEN_INDEX = 1 +MASK_TOKEN_INDEX = 10 CLS_TOKEN = 2 From 9f565e1662d3e368123002cde1af7405ea9af537 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:04:16 +0100 Subject: [PATCH 53/60] debug embedding offset --- .github/workflows/verify_constants.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 4cd6ba08..8318bcb6 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -75,6 +75,11 @@ jobs: if grep -q "$file_name" changed_files.txt; then echo "----------------------- Checking file : $file_name ----------------------- " + # Debugging output to check environment variables + echo "Current Environment Variables:" + echo "E_EMBEDDING_OFFSET = $E_EMBEDDING_OFFSET" + echo "Expected: $exp_embedding_offset" + # Define expected values for constants exp_embedding_offset="10" exp_cls_token="2" From e8a28a64ae09641a0ce8ab7dbf31b46daf19863b Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:07:30 +0100 Subject: [PATCH 54/60] Update verify_constants.yml --- .github/workflows/verify_constants.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 8318bcb6..700ea5c1 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -75,17 +75,17 @@ jobs: if grep -q "$file_name" changed_files.txt; then echo "----------------------- Checking file : $file_name ----------------------- " - # Debugging output to check environment variables - echo "Current Environment Variables:" - echo "E_EMBEDDING_OFFSET = $E_EMBEDDING_OFFSET" - echo "Expected: $exp_embedding_offset" - # Define expected values for constants exp_embedding_offset="10" exp_cls_token="2" exp_padding_token_index="0" exp_mask_token_index="1" + # Debugging output to check environment variables + echo "Current Environment Variables:" + echo "E_EMBEDDING_OFFSET = $E_EMBEDDING_OFFSET" + echo "Expected: $exp_embedding_offset" + # Verify constants match expected values if [ "$E_EMBEDDING_OFFSET" != "$exp_embedding_offset" ]; then echo "EMBEDDING_OFFSET ($E_EMBEDDING_OFFSET) does not match expected value ($exp_embedding_offset)!" From c92b9aec00cf16549062939d756c11c752d772f5 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:08:09 +0100 Subject: [PATCH 55/60] Update reader.py --- chebai/preprocessing/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index bf6bba42..63a65a47 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -8,7 +8,7 @@ from chebai.preprocessing.collate import DefaultCollator, RaggedCollator -EMBEDDING_OFFSET = 10 +EMBEDDING_OFFSET = 100 PADDING_TOKEN_INDEX = 0 MASK_TOKEN_INDEX = 10 CLS_TOKEN = 2 From 156a771c1dcfe8b451ff67651e7b7fc1c92e79c0 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:10:12 +0100 Subject: [PATCH 56/60] Update export_constants.py --- .github/workflows/export_constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/export_constants.py b/.github/workflows/export_constants.py index 6546d3a3..cbc13f10 100644 --- a/.github/workflows/export_constants.py +++ b/.github/workflows/export_constants.py @@ -8,6 +8,7 @@ ) # Define the constants you want to export +# Any changes in the key names here should also follow the same change in verify_constants.yml code constants = { "EMBEDDING_OFFSET": EMBEDDING_OFFSET, "CLS_TOKEN": CLS_TOKEN, From 0d117f662201d797d8e45936cd5c88cae535162d Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:10:23 +0100 Subject: [PATCH 57/60] Update reader.py --- chebai/preprocessing/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index 63a65a47..e220e1e4 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -8,9 +8,9 @@ from chebai.preprocessing.collate import DefaultCollator, RaggedCollator -EMBEDDING_OFFSET = 100 +EMBEDDING_OFFSET = 10 PADDING_TOKEN_INDEX = 0 -MASK_TOKEN_INDEX = 10 +MASK_TOKEN_INDEX = 1 CLS_TOKEN = 2 From ffeacd25a7604d13502f8e48fa00067f0c557131 Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:15:36 +0100 Subject: [PATCH 58/60] Update export_constants.py --- .github/workflows/export_constants.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/export_constants.py b/.github/workflows/export_constants.py index cbc13f10..6421498a 100644 --- a/.github/workflows/export_constants.py +++ b/.github/workflows/export_constants.py @@ -16,6 +16,7 @@ "MASK_TOKEN_INDEX": MASK_TOKEN_INDEX, } -# Write constants to a JSON file -with open("constants.json", "w") as f: - json.dump(constants, f) +if __name__ == "__main__": + # Write constants to a JSON file + with open("constants.json", "w") as f: + json.dump(constants, f) From b62c254908081ae10c8e1832d7f47f49a3fe434d Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:16:17 +0100 Subject: [PATCH 59/60] Update reader.py --- chebai/preprocessing/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index e220e1e4..e1f17fb9 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -8,7 +8,7 @@ from chebai.preprocessing.collate import DefaultCollator, RaggedCollator -EMBEDDING_OFFSET = 10 +EMBEDDING_OFFSET = 102 PADDING_TOKEN_INDEX = 0 MASK_TOKEN_INDEX = 1 CLS_TOKEN = 2 From dd5ba41fe34dfd0ea73060d726057b31f87ff4ff Mon Sep 17 00:00:00 2001 From: aditya0by0 Date: Thu, 31 Oct 2024 14:19:52 +0100 Subject: [PATCH 60/60] Update verify_constants.yml --- .github/workflows/verify_constants.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/verify_constants.yml b/.github/workflows/verify_constants.yml index 700ea5c1..3246f64d 100644 --- a/.github/workflows/verify_constants.yml +++ b/.github/workflows/verify_constants.yml @@ -1,5 +1,13 @@ name: Verify Constants +# Define the file paths under `paths` to trigger this check only when specific files are modified. +# This script will then execute checks only on files that have changed, rather than all files listed in `paths`. + +# **Note** : To add a new file for checks, include its path in: +# - `on` -> `push` and `pull_request` sections +# - `jobs` -> `verify-constants` -> `steps` -> Verify constants -> Add a new if else for your file, with check logic inside it. + + on: push: paths: