Skip to content

Commit

Permalink
Merge branch 'master' into softmax
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS committed Sep 13, 2020
2 parents a0afb47 + 91ef2ef commit e742507
Show file tree
Hide file tree
Showing 68 changed files with 1,159 additions and 525 deletions.
54 changes: 0 additions & 54 deletions .ci/setup.sh

This file was deleted.

2 changes: 0 additions & 2 deletions .ci/test.sh
Expand Up @@ -2,8 +2,6 @@

set -e

cd $BUILD_DIRECTORY

if [[ $TEST == "API" ]]; then
flake8 .
pytest -v tests/ --cov=m2cgen/ --ignore=tests/e2e/
Expand Down
38 changes: 8 additions & 30 deletions .github/workflows/main.yml
Expand Up @@ -24,19 +24,10 @@ jobs:
uses: actions/checkout@v1
with:
fetch-depth: 5
- name: Setup Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Setup dependencies and run API tests
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TEST: API
PYTHON: ${{ matrix.python }}
BUILD_DIRECTORY: ${{ github.workspace }}
run: |
bash $GITHUB_WORKSPACE/.ci/setup.sh
bash $GITHUB_WORKSPACE/.ci/test.sh
- name: Build Docker image
run: docker build . --file Dockerfile -t m2cgen-docker --build-arg python=${{ matrix.python }}
- name: Run API tests
run: docker run -v "$GITHUB_WORKSPACE":"/m2cgen" -e TEST=API -e GITHUB_ACTIONS -e GITHUB_RUN_ID -e GITHUB_REF -e GITHUB_REPOSITORY -e GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} m2cgen-docker bash /m2cgen/.ci/test.sh

e2e-test:
name: "Python ${{ matrix.python }} E2E tests"
Expand All @@ -58,20 +49,7 @@ jobs:
uses: actions/checkout@v1
with:
fetch-depth: 5
- name: Setup Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Setup dependencies and run E2E tests
env:
TEST: E2E
LANG: ${{ matrix.lang }}
PYTHON: ${{ matrix.python }}
BUILD_DIRECTORY: ${{ github.workspace }}
LC_ALL: en_US.UTF-8
run: |
sudo locale-gen $LC_ALL
sudo update-locale
bash $GITHUB_WORKSPACE/.ci/setup.sh
bash $GITHUB_WORKSPACE/.ci/test.sh
- name: Build Docker image
run: docker build . --file Dockerfile -t m2cgen-docker --build-arg python=${{ matrix.python }}
- name: Run E2E tests
run: docker run -v "$GITHUB_WORKSPACE":"/m2cgen" -e TEST=E2E -e LANG="${{ matrix.lang }}" m2cgen-docker bash /m2cgen/.ci/test.sh
44 changes: 32 additions & 12 deletions Dockerfile
@@ -1,26 +1,41 @@
FROM ubuntu:xenial
FROM ubuntu:bionic

ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
ARG python=3.8

ENV JAVA_HOME /usr/lib/jvm/zulu-8-amd64
ENV LC_ALL en_US.UTF-8
ENV TZ Etc/UTC

RUN apt-get update && \
apt-get install -y software-properties-common wget apt-transport-https && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get install --no-install-recommends -y \
gpg-agent \
dirmngr \
locales \
software-properties-common \
wget \
apt-transport-https && \
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
echo $TZ > /etc/timezone && \
locale-gen $LC_ALL && \
update-locale && \
add-apt-repository ppa:deadsnakes/ppa -y && \
wget -q https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \
dpkg -i packages-microsoft-prod.deb && \
wget -qO- https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
wget -qO- https://storage.googleapis.com/download.dartlang.org/linux/debian/dart_stable.list > /etc/apt/sources.list.d/dart_stable.list && \
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 0xB1998361219BD9C9 && \
add-apt-repository "deb http://repos.azulsystems.com/ubuntu stable main" -y && \
apt-get update && \
apt-get install --no-install-recommends -y \
git \
gcc \
g++ \
libc-dev \
libgomp1 \
python3.7 \
python${python}-dev \
python3-setuptools \
python3-pip \
python3.7-dev \
openjdk-8-jdk \
zulu-8 \
golang-go \
dotnet-sdk-3.1 \
powershell \
Expand All @@ -34,9 +49,14 @@ RUN apt-get update && \
WORKDIR /m2cgen

COPY requirements-test.txt ./
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 && \
pip3 install --upgrade pip && \
pip3 install --no-cache-dir Cython numpy && \
pip3 install --no-cache-dir -r requirements-test.txt
RUN update-alternatives --install /usr/bin/python python /usr/bin/python${python} 1 && \
python -m pip install --upgrade pip && \
pip install --no-cache-dir Cython numpy && \
pip install --no-cache-dir -r requirements-test.txt

CMD python3 setup.py develop && pytest -v -x --fast
ENV MKL_NUM_THREADS=2
ENV NUMEXPR_NUM_THREADS=2
ENV OMP_NUM_THREADS=2
ENV OPENBLAS_NUM_THREADS=2
ENV VECLIB_MAXIMUM_THREADS=2
ENV BLIS_NUM_THREADS=2
1 change: 1 addition & 0 deletions MANIFEST.in
Expand Up @@ -3,4 +3,5 @@ recursive-include m2cgen VERSION.txt
recursive-include m2cgen linear_algebra.*
recursive-include m2cgen log1p.*
recursive-include m2cgen tanh.*
recursive-include m2cgen atan.*
global-exclude *.py[cod]
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -12,7 +12,7 @@ docker-test-unit:
$(DOCKER_RUN_ARGS) bash -c "pytest -v --fast tests/ --ignore=tests/e2e/"

docker-generate-examples:
$(DOCKER_RUN_ARGS) bash -c "python3 setup.py develop && python3 tools/generate_code_examples.py generated_code_examples"
$(DOCKER_RUN_ARGS) bash -c "python setup.py develop && python tools/generate_code_examples.py generated_code_examples"

docker-flake8:
$(DOCKER_RUN_ARGS) bash -c "flake8 ."
Expand Down
4 changes: 4 additions & 0 deletions README.md
Expand Up @@ -131,3 +131,7 @@ A: If this error occurs while generating code using an ensemble model, try to re
**Q: Generation fails with `ImportError: No module named <module_name_here>` error while transpiling model from a serialized model object.**

A: This error indicates that pickle protocol cannot deserialize model object. For unpickling serialized model objects, it is required that their classes must be defined in the top level of an importable module in the unpickling environment. So installation of package which provided model's class definition should solve the problem.

**Q: Generated by m2cgen code provides different results for some inputs compared to original Python model from which the code were obtained.**

A: Some models force input data to be particular type during prediction phase in their native Python libraries. Currently, m2cgen works only with ``float64`` (``double``) data type. You can try to cast your input data to another type manually and check results again. Also, some small differences can happen due to specific implementation of floating-point arithmetic in a target language.
13 changes: 9 additions & 4 deletions m2cgen/assemblers/boosting.py
Expand Up @@ -151,7 +151,7 @@ def __init__(self, model):

def _assemble_tree(self, tree):
if "leaf" in tree:
return ast.NumVal(tree["leaf"])
return ast.NumVal(tree["leaf"], dtype=np.float32)

threshold = ast.NumVal(tree["split_condition"], dtype=np.float32)
split = tree["split"]
Expand Down Expand Up @@ -317,9 +317,14 @@ def _assemble_tree(self, tree):
op = ast.CompOpType.from_str_op(tree["decision_type"])
assert op == ast.CompOpType.LTE, "Unexpected comparison op"

# Make sure that if the "default_left" is true the left tree branch
# ends up in the "else" branch of the ast.IfExpr.
if tree["default_left"]:
missing_type = tree['missing_type']

if missing_type not in {"NaN", "None"}:
raise ValueError(f"Unknown missing_type: {missing_type}")

reverse_condition = missing_type == "NaN" and tree["default_left"]
reverse_condition |= missing_type == "None" and tree["threshold"] >= 0
if reverse_condition:
op = ast.CompOpType.GT
true_child = tree["right_child"]
false_child = tree["left_child"]
Expand Down
122 changes: 116 additions & 6 deletions m2cgen/assemblers/fallback_expressions.py
Expand Up @@ -40,18 +40,16 @@ def tanh(expr):
tanh_expr))


def sqrt(expr, to_reuse=False):
def sqrt(expr):
return ast.PowExpr(
base_expr=expr,
exp_expr=ast.NumVal(0.5),
to_reuse=to_reuse)
exp_expr=ast.NumVal(0.5))


def exp(expr, to_reuse=False):
def exp(expr):
return ast.PowExpr(
base_expr=ast.NumVal(math.e),
exp_expr=expr,
to_reuse=to_reuse)
exp_expr=expr)


def log1p(expr):
Expand All @@ -66,6 +64,118 @@ def log1p(expr):
utils.div(utils.mul(expr, ast.LogExpr(expr1p)), expr1pm1))


def atan(expr):
expr = ast.IdExpr(expr, to_reuse=True)
expr_abs = ast.AbsExpr(expr, to_reuse=True)

expr_reduced = ast.IdExpr(
ast.IfExpr(
utils.gt(expr_abs, ast.NumVal(2.4142135623730950488)),
utils.div(ast.NumVal(1.0), expr_abs),
ast.IfExpr(
utils.gt(expr_abs, ast.NumVal(0.66)),
utils.div(
utils.sub(expr_abs, ast.NumVal(1.0)),
utils.add(expr_abs, ast.NumVal(1.0))),
expr_abs)),
to_reuse=True)

P0 = ast.NumVal(-8.750608600031904122785e-01)
P1 = ast.NumVal(1.615753718733365076637e+01)
P2 = ast.NumVal(7.500855792314704667340e+01)
P3 = ast.NumVal(1.228866684490136173410e+02)
P4 = ast.NumVal(6.485021904942025371773e+01)
Q0 = ast.NumVal(2.485846490142306297962e+01)
Q1 = ast.NumVal(1.650270098316988542046e+02)
Q2 = ast.NumVal(4.328810604912902668951e+02)
Q3 = ast.NumVal(4.853903996359136964868e+02)
Q4 = ast.NumVal(1.945506571482613964425e+02)
expr2 = utils.mul(expr_reduced, expr_reduced, to_reuse=True)
z = utils.mul(
expr2,
utils.div(
utils.sub(
utils.mul(
expr2,
utils.sub(
utils.mul(
expr2,
utils.sub(
utils.mul(
expr2,
utils.sub(
utils.mul(
expr2,
P0
),
P1
)
),
P2
)
),
P3
)
),
P4
),
utils.add(
Q4,
utils.mul(
expr2,
utils.add(
Q3,
utils.mul(
expr2,
utils.add(
Q2,
utils.mul(
expr2,
utils.add(
Q1,
utils.mul(
expr2,
utils.add(
Q0,
expr2
)
)
)
)
)
)
)
)
)
)
)
z = utils.add(utils.mul(expr_reduced, z), expr_reduced)

ret = utils.mul(
z,
ast.IfExpr(
utils.gt(expr_abs, ast.NumVal(2.4142135623730950488)),
ast.NumVal(-1.0),
ast.NumVal(1.0)))
ret = utils.add(
ret,
ast.IfExpr(
utils.lte(expr_abs, ast.NumVal(0.66)),
ast.NumVal(0.0),
ast.IfExpr(
utils.gt(expr_abs, ast.NumVal(2.4142135623730950488)),
ast.NumVal(1.570796326794896680463661649),
ast.NumVal(0.7853981633974483402318308245))))
ret = utils.mul(
ret,
ast.IfExpr(
utils.lt(expr, ast.NumVal(0.0)),
ast.NumVal(-1.0),
ast.NumVal(1.0)))

return ret


def sigmoid(expr, to_reuse=False):
neg_expr = ast.BinNumExpr(ast.NumVal(0.0), expr, ast.BinNumOpType.SUB)
exp_expr = ast.ExpExpr(neg_expr)
Expand Down

0 comments on commit e742507

Please sign in to comment.