diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 04ae609ba..da4cc9f28 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -53,40 +53,12 @@ jobs: done exit ${n} - # # # git workflow + # # # ml_model integrity - - name: Get latest release tag name + - name: Check ml_model.onnx integrity if: ${{ always() && steps.code_checkout.conclusion == 'success' }} run: | - if [ "pull_request" == "${{ github.event_name }}" ]; then - API_RELEASE_URL=$(echo "${{ github.event.pull_request.base.repo.releases_url }}") - else - API_RELEASE_URL=$(echo "${{ github.event.repository.releases_url }}") - fi - echo "'${API_RELEASE_URL}'" # dbg - API_RELEASE_URL=$(echo "${API_RELEASE_URL}" | sed 's|.....$||') - echo "'${API_RELEASE_URL}'" # dbg - API_RELEASE_URL=$(echo "${API_RELEASE_URL}/latest") - echo "'${API_RELEASE_URL}'" # dbg - LATEST_RELEASE_TAG=$( - curl \ - --silent \ - --header "Accept: application/vnd.github.v3+json" \ - --header "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ - ${API_RELEASE_URL} \ - | \ - jq \ - --raw-output \ - '.tag_name' \ - ) - echo "LATEST_RELEASE_TAG='${LATEST_RELEASE_TAG}'" - export GIT_ANCESTOR=${LATEST_RELEASE_TAG} - if bash cicd/git_workflow.sh; then - echo "GIT workflow OK" - else - echo "Please, rebase the branch after ${LATEST_RELEASE_TAG}" - exit 1 - fi + md5sum --binary credsweeper/ml_model/ml_model.onnx | grep 8cb870a200d7bc07893aacec38f54033 # # # Python setup diff --git a/cicd/README.md b/cicd/README.md index 436346de9..c26c01276 100644 --- a/cicd/README.md +++ b/cicd/README.md @@ -2,4 +2,3 @@ The following files are used for: * **benchmark.txt** - template scores to compare it with benchmark - * **git_workflow.sh** - checks whether branch in is pure rebased after latest release diff --git a/cicd/git_workflow.sh b/cicd/git_workflow.sh deleted file mode 100755 index e5a264e16..000000000 --- a/cicd/git_workflow.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -set -e - -if [ -z "${GIT_ANCESTOR}" ]; then - echo "GIT_ANCESTOR is empty!" - exit 1 -fi - -head_hash=$(git log --pretty=%H -n 1 HEAD) -ancestor_hash=$(git log --pretty=%H -n 1 ${GIT_ANCESTOR}) - -if ! git merge-base --is-ancestor ${ancestor_hash} ${head_hash}; then - echo "${ancestor_hash} is not ancestor of ${head_hash}" - exit 1 -fi - -declare -A commits - -function git_test() -{ - echo -e -n "\ntest for ${1}" - - if [ "${ancestor_hash}" == "${1}" ]; then - echo "This commit is searched ${ancestor_hash}" - return 0 - fi - - if [ -v commits[${1}] ]; then - echo -n " - already checked" - return 0 - else - echo -n " - need investigation" - commits[${1}]+=1 - fi - - local has_parents=false - for commit in $(git log --pretty=%P -n 1 ${1}); do - has_parents=true - echo -n " - parent: ${commit}" - if ! git_test ${commit}; then - echo " - commit ${commit} fail" - return 1 - fi - done - - if ! ${has_parents}; then - echo " - the end. Commit ${1} has no parents" - return 1 - fi - - echo "end" - return 0 -} - -if ! git_test ${head_hash}; then - echo "FAIL: ${head_hash} is not pure rebased to ${ancestor_hash}" - exit 1 -fi - -echo "OK" - -exit 0 diff --git a/credsweeper/ml_model/ml_model.onnx b/credsweeper/ml_model/ml_model.onnx index c22d4c879..bd64a7734 100644 Binary files a/credsweeper/ml_model/ml_model.onnx and b/credsweeper/ml_model/ml_model.onnx differ diff --git a/experiment/tf2onnx/requirements.txt b/experiment/tf2onnx/requirements.txt new file mode 100644 index 000000000..e83f5bc8f --- /dev/null +++ b/experiment/tf2onnx/requirements.txt @@ -0,0 +1,8 @@ +h5py==3.10.0 +keras==2.13.1 +numpy==1.23.5 +onnx==1.15.0 +protobuf==3.20.3 +tensorflow==2.13.1 +tf2onnx==1.16.0 +wrapt==1.14.1 diff --git a/experiment/tf2onnx/tf2onnx.sh b/experiment/tf2onnx/tf2onnx.sh new file mode 100755 index 000000000..f7d659a68 --- /dev/null +++ b/experiment/tf2onnx/tf2onnx.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -e + +# tensorflow model may be obtained like this: git restore -s be06d6059f0def4f0fdb50444c08db4ce542173e -- ml_model.h5 +# use virtual environment and the requirements.txt - there are very specific luke combination of packages verions +# python -m venv .venv +# . .venv/bin/activate +# python -m pip install --upgrade pip +# python -m pip install --requirement requirements.txt + +# [optional] thransform model form h5 to saved directory +python -c 'import tensorflow as tf;model=tf.keras.models.load_model("ml_model.h5");model.save("ml_model")' +# transform the model +python -m tf2onnx.convert --saved-model ml_model --output ml_model.onnx --verbose --rename-inputs feature_input,line_input +# md5sum for integrity +md5sum --binary ml_model.onnx