diff --git a/.github/ISSUE_TEMPLATE/1-bug.yml b/.github/ISSUE_TEMPLATE/1-bug.yml
new file mode 100644
index 0000000..5b156ca
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1-bug.yml
@@ -0,0 +1,96 @@
+name: "🐛 Bug report"
+description: Report errors or unexpected behavior
+title: "[Bug]: "
+labels: ["type/bug", "triage-needed"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this bug report, please make sure to [search for existing issues](https://github.com/InternRobotics/InternUtopia/issues) before filing a new one!
+
+  - type: textarea
+    id: bug-description
+    attributes:
+      label: Bug Description
+      placeholder: |
+        A clear and concise description of what the bug is.
+        Try to isolate the issue to help the community to reproduce it easily and increase chances for a fast fix.
+    validations:
+      required: true
+
+  - type: textarea
+    id: steps-to-reproduce
+    attributes:
+      label: Steps to Reproduce
+      placeholder: |
+        Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
+
+        <!-- Please post terminal logs, minimal example to reproduce, or command to run under three backticks (```) to allow code formatting.
+
+        ```
+        Paste your error here
+        ```
+
+        For more information on this, check: https://www.markdownguide.org/extended-syntax/#fenced-code-blocks
+
+        -->
+      value: |
+        Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
+
+        <!-- Please post terminal logs, minimal example to reproduce, or command to run under three backticks (```) to allow code formatting.
+
+        ```
+        Paste your error here
+        ```
+
+        For more information on this, check: https://www.markdownguide.org/extended-syntax/#fenced-code-blocks
+
+        -->
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected-behavior
+    attributes:
+      label: Expected Behavior
+      placeholder: "A clear and concise description of what you expected to happen."
+    validations:
+      required: true
+
+  - type: textarea
+    id: screenshots-videos
+    attributes:
+      label: Screenshots/Videos
+      placeholder: "If applicable, add screenshots and/or a video to help explain your problem."
+
+  - type: textarea
+    id: desktop-device
+    attributes:
+      label: Environment
+      placeholder: |
+        - OS: [e.g. Ubuntu 22.04]
+        - GPU/CPU [e.g. A100, RTX 4090, i9-14900K]
+        - GPU-driver version
+      value: |
+        - OS: [e.g. Ubuntu 22.04]
+        - GPU/CPU [e.g. A100, RTX 4090, i9-14900K]
+        - GPU-driver version
+    validations:
+      required: true
+
+  - type: textarea
+    id: version
+    attributes:
+      label: Release version or Commit ID
+      placeholder: |
+        Please provide:
+         - a) **version number** of the release causing the issue, OR
+         - b) **SHA/hash** of the latest commit if working from git. You can get this by running the `git rev-parse HEAD` command on your current branch.
+    validations:
+      required: true
+
+  - type: textarea
+    id: additional-context
+    attributes:
+      label: Additional Context
+      placeholder: "Add any other context about the problem here."
diff --git a/.github/ISSUE_TEMPLATE/2-enhancement.yml b/.github/ISSUE_TEMPLATE/2-enhancement.yml
new file mode 100644
index 0000000..3752c0f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2-enhancement.yml
@@ -0,0 +1,39 @@
+name: "🚀 Enhancement"
+description: Suggest a new feature request or improvement on the project
+title: '[Enhancement]: '
+labels:
+  - type/enhancement
+  - triage-needed
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        A clear and concise description of what new feature or behavior you would like to see. If applicable, please describe the current behavior as well.
+
+  - type: textarea
+    id: suggestion
+    attributes:
+      label: What feature or enhancement are you proposing?
+    validations:
+      required: true
+
+  - type: textarea
+    id: motivation
+    attributes:
+      label: Motivation
+      description: What is your motivation for adding / enhancing this feature, optimally described in the form of a concrete user story or use case.
+      value: |
+        <!--  Please outline the motivation for the proposal. Summarize the core use cases and user problems and needs you are trying to solve.
+
+        Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]".
+
+        If this is related to another GitHub issue, please link here too. -->
+    validations:
+      required: false
+
+  - type: textarea
+    id: additionalinfo
+    attributes:
+      label: Additional information
+      description: If you think that any additional information would be useful please provide them here.
diff --git a/.github/ISSUE_TEMPLATE/3-question.yml b/.github/ISSUE_TEMPLATE/3-question.yml
new file mode 100644
index 0000000..0b17ba7
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/3-question.yml
@@ -0,0 +1,16 @@
+name: "🙏 Question"
+description: Ask a question
+title: "[Question]: "
+labels: ["type/question", "triage-needed"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please make sure to [search for existing issues](https://github.com/InternRobotics/InternUtopia/issues) before filing a new one!
+
+  - type: textarea
+    attributes:
+      label: Question
+      description: Describe your question in detail.
+    validations:
+      required: true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..cd17d8e
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,69 @@
+name: PR CI
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    branches: [ main, master, develop ]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+
+concurrency:
+  group: pr-${{ github.event.pull_request.number }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    runs-on: self-hosted
+    if: ${{ github.event.pull_request.draft == false }} # no test on draft
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.10']
+        os: [linux]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: recursive
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Run pre-commit on diff only
+        if: ${{ github.event_name == 'pull_request' }}
+        shell: bash -l {0}
+        run: |
+          export PATH=/root/miniconda3/bin:$PATH
+          source /root/miniconda3/etc/profile.d/conda.sh
+          conda activate internutopia
+
+          git fetch origin ${{ github.base_ref }}
+          pre-commit run --from-ref origin/${{ github.base_ref }} --to-ref HEAD
+
+      - name: Run tests
+        shell: bash -l {0}
+        run: |
+          # conda
+          export PATH=/root/miniconda3/bin:$PATH
+          source /root/miniconda3/etc/profile.d/conda.sh
+          conda activate internutopia
+
+          # link data
+          mkdir data
+          ln -s /cpfs/user/wangyukai/mp3d_data/vln_pe data/vln_pe
+          ln -s /cpfs/user/wangyukai/mp3d_data/Embodiments data/Embodiments
+          ln -s /cpfs/user/wangyukai/mp3d_data/scene_data data/scene_data
+          ln -s /cpfs/user/wangyukai/checkpoints checkpoints
+
+          # run tests
+          /root/miniconda3/envs/internutopia/bin/python -c "import torch,sys;print(sys.executable);print('cuda:',torch.cuda.is_available())"
+          /root/miniconda3/envs/internutopia/bin/python -m pytest -q -W ignore --timeout=900 --timeout-method=signal
diff --git a/.gitignore b/.gitignore
index d202a63..73cfe4b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -148,4 +148,5 @@ logs/
 *.png
 *.ckpt
 /results/
-checkpoints
\ No newline at end of file
+checkpoints
+internnav/model/basemodel/LongCLIP/
diff --git a/.gitmodules b/.gitmodules
index fdc4a9d..e307b6b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,3 +5,5 @@
 [submodule "internnav/model/basemodel/LongCLIP"]
 	path = internnav/model/basemodel/LongCLIP
 	url = https://github.com/beichenzbc/Long-CLIP
+	commit = 3966af9ae9331666309a22128468b734db4672a7
+	ignore = untracked
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9dee0f4..0e05562 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,6 @@
+exclude: |
+  ^internnav/model/basemodel/LongCLIP/
+
 repos:
   - repo: https://github.com/PyCQA/autoflake
     rev: v2.2.0
@@ -26,69 +29,15 @@ repos:
     rev: v2.2.1
     hooks:
       - id: codespell
-        exclude: |
-            (?x)(
-                ^toolkits/grscenes_scripts/README.md|
-                ^toolkits/indoor_scenes_generation/infinigen/infinigen_examples/constraints
-            )
-  # - repo: https://github.com/gitleaks/gitleaks
-  #   rev: v8.24.0
-  #   hooks:
-  #     - id: gitleaks
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.1.0
     hooks:
       - id: trailing-whitespace
       - id: check-yaml
       - id: end-of-file-fixer
-        exclude: '^(.*/lcmtypes/.*)'
       - id: requirements-txt-fixer
-      - id: double-quote-string-fixer
-        exclude: '^(.*/lcmtypes/.*)'
       - id: check-merge-conflict
       - id: fix-encoding-pragma
         args: ["--remove"]
       - id: mixed-line-ending
         args: ["--fix=lf"]
-
-  # - repo: https://github.com/PyCQA/isort
-  #   rev: 5.11.5
-  #   hooks:
-  #     - id: isort
-  # - repo: https://github.com/psf/black
-  #   rev: 22.10.0
-  #   hooks:
-  #     - id: black
-  #       args: [--line-length=79]
-  # - repo: https://github.com/PyCQA/flake8
-  #   rev: 4.0.1
-  #   hooks:
-  #     - id: flake8
-  # - repo: https://github.com/codespell-project/codespell
-  #   rev: v2.2.1
-  #   hooks:
-  #     - id: codespell
-  #       exclude: |
-  #           (?x)(
-  #               ^toolkits/grscenes_scripts/README.md|
-  #               ^toolkits/indoor_scenes_generation/infinigen/infinigen_examples/constraints
-  #           )
-  # - repo: https://github.com/gitleaks/gitleaks
-  #   rev: v8.24.0
-  #   hooks:
-  #     - id: gitleaks
-  # - repo: https://github.com/pre-commit/pre-commit-hooks
-  #   rev: v3.1.0
-  #   hooks:
-  #     - id: trailing-whitespace
-  #     - id: check-yaml
-  #     - id: end-of-file-fixer
-  #       exclude: '^(.*/lcmtypes/.*)'
-  #     - id: requirements-txt-fixer
-  #     - id: double-quote-string-fixer
-  #       exclude: '^(.*/lcmtypes/.*)'
-  #     - id: check-merge-conflict
-  #     - id: fix-encoding-pragma
-  #       args: ["--remove"]
-  #     - id: mixed-line-ending
-  #       args: ["--fix=lf"]
diff --git a/pyproject.toml b/pyproject.toml
index c6b12da..10baee6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,3 +8,13 @@ lcmtypes
 [tool.isort]
 profile = "black"
 skip_glob = '**/lcmtypes/**'
+
+[tool.pytest.ini_options]
+testpaths = [
+  "tests"
+]
+addopts = "-ra --color=yes --maxfail=1"
+markers = [
+  "slow: marks tests as slow",
+  "gpu: requires GPU"
+]
diff --git a/requirements/agent.txt b/requirements/agent.txt
deleted file mode 100644
index 49135e2..0000000
--- a/requirements/agent.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-flask
-pydantic
diff --git a/requirements/eval.txt b/requirements/eval.txt
index 3521b85..0d8028c 100644
--- a/requirements/eval.txt
+++ b/requirements/eval.txt
@@ -1,3 +1,3 @@
+ansi2txt==0.2.0
 pydantic>2.0
-requests
-ansi2txt
\ No newline at end of file
+requests==2.32.3
diff --git a/requirements/test.txt b/requirements/test.txt
new file mode 100644
index 0000000..11f4ffc
--- /dev/null
+++ b/requirements/test.txt
@@ -0,0 +1,5 @@
+coverage==7.5.4
+pytest==7.3.1
+pytest-cov==4.1.0
+pytest-timeout==2.4.0
+tomli==2.0.1
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..a1c6517
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,28 @@
+import pytest
+
+
+@pytest.fixture
+def tmp_cfg(tmp_path):
+    p = tmp_path / "config.yaml"
+    p.write_text("hello: world\n")
+    return p
+
+
+# global hook: skip mark
+def pytest_runtest_setup(item):
+    if "gpu" in item.keywords:
+        try:
+            import torch
+
+            if not torch.cuda.is_available():
+                pytest.skip("No CUDA for gpu-marked test")
+        except Exception:
+            pytest.skip("Torch not available")
+    if "ray" in item.keywords:
+        try:
+            import ray
+
+            ray.init()
+            assert ray.is_initialized()
+        except Exception:
+            pytest.skip("ray not available")
diff --git a/tests/function_test/e2e_test.py b/tests/function_test/e2e_test.py
new file mode 100644
index 0000000..67b48b8
--- /dev/null
+++ b/tests/function_test/e2e_test.py
@@ -0,0 +1,66 @@
+import json
+import os
+import subprocess
+import sys
+
+import pytest
+
+
+def common_body(cmd_line):
+    with subprocess.Popen(
+        cmd_line,
+        stdin=subprocess.PIPE,
+        stderr=sys.stderr,
+        close_fds=True,
+        stdout=sys.stdout,
+        universal_newlines=True,
+        shell=True,
+        bufsize=1,
+    ) as cmd:
+        cmd.communicate()
+        assert cmd.returncode == 0, f'real exit code is {cmd.returncode}'
+
+
+def update_jsonl_from_json(json_file_path, jsonl_file_path, update_item):
+    with open(json_file_path, 'r', encoding='utf-8') as json_file:
+        data = json.load(json_file)
+        data = {**update_item, **data}
+    if not isinstance(data, list):
+        data = [data]
+    with open(jsonl_file_path, 'a', encoding='utf-8') as jsonl_file:
+        for item in data:
+            json_line = json.dumps(item, ensure_ascii=False)
+            jsonl_file.write(json_line + '\n')
+
+
+def teardown_function(function):
+    if os.path.exists('./test_result.json'):
+        case_info = {}
+        test_name = function.__name__
+        case_info['case_info'] = test_name + '_' + os.environ.get('JOB_ID')
+        update_jsonl_from_json('./test_result.json', '../total_result.jsonl', case_info)
+    else:
+        print('Warning! There is no test_result.json')
+
+
+"""""" """""" """""" """""" """""" """""" """""" """
+Test
+""" """""" """""" """""" """""" """""" """""" """"""
+
+
+@pytest.mark.cpu
+def test_server():
+    start_command = 'python ./tests/function_test/test_server.py'
+    common_body(start_command)
+
+
+@pytest.mark.gpu
+def test_challenge():
+    start_command = 'python ./tests/function_test/test_challenge.py'
+    common_body(start_command)
+
+
+@pytest.mark.ray
+def test_challenge_ray():
+    start_command = 'python ./tests/function_test/test_challenge_ray.py'
+    common_body(start_command)
diff --git a/tests/function_test/test_challenge.py b/tests/function_test/test_challenge.py
new file mode 100644
index 0000000..ab492a5
--- /dev/null
+++ b/tests/function_test/test_challenge.py
@@ -0,0 +1,104 @@
+'''
+Test the evaluator eval logic without model involve.
+The main progress:
+    Init => warm up => fake one action
+'''
+
+import importlib.util
+import subprocess
+import sys
+import time
+
+import numpy as np
+
+from internnav.configs.evaluator.default_config import get_config
+from internnav.evaluator import Evaluator
+from internnav.utils import progress_log_multi_util
+
+
+def main():
+    from enum import Enum
+
+    class runner_status_code(Enum):
+        NORMAL = 0
+        WARM_UP = 1
+        NOT_RESET = 3
+        TERMINATED = 2
+        STOP = 4
+
+    def load_eval_cfg(config_path, attr_name='eval_cfg'):
+        spec = importlib.util.spec_from_file_location("eval_config_module", config_path)
+        config_module = importlib.util.module_from_spec(spec)
+        sys.modules["eval_config_module"] = config_module
+        spec.loader.exec_module(config_module)
+        return getattr(config_module, attr_name)
+
+    evaluator_cfg = load_eval_cfg('scripts/eval/configs/challenge_cfg.py', attr_name='eval_cfg')
+    cfg = get_config(evaluator_cfg)
+    evaluator = Evaluator.init(cfg)
+
+    print('--- VlnPeEvaluator start ---')
+    obs, reset_info = evaluator.env.reset()
+    for info in reset_info:
+        if info is None:
+            continue
+        progress_log_multi_util.trace_start(
+            trajectory_id=evaluator.now_path_key(info),
+        )
+
+    obs = evaluator.warm_up()
+    evaluator.fake_obs = obs[0][evaluator.robot_name]
+    action = [{evaluator.robot_name: {'stand_still': []}} for _ in range(evaluator.env_num * evaluator.proc_num)]
+    obs = evaluator._obs_remove_robot_name(obs)
+    evaluator.runner_status = np.full(
+        (evaluator.env_num * evaluator.proc_num),
+        runner_status_code.NORMAL,
+        runner_status_code,
+    )
+    evaluator.runner_status[[info is None for info in reset_info]] = runner_status_code.TERMINATED
+
+    while evaluator.env.is_running():
+        obs, action = evaluator.get_action(obs, action)
+        obs, terminated = evaluator.env_step(action)
+        env_term, reset_info = evaluator.terminate_ops(obs, reset_info, terminated)
+        break
+
+    evaluator.env.close()
+
+
+def start_server():
+    server_cmd = [
+        sys.executable,
+        "internnav/agent/utils/server.py",
+        "--config",
+        "scripts/eval/configs/challenge_cfg.py",
+    ]
+
+    proc = subprocess.Popen(
+        server_cmd,
+        stdout=None,
+        stderr=None,
+    )
+    return proc
+
+
+if __name__ == '__main__':
+    try:
+        proc = start_server()
+        time.sleep(3)
+        main()
+    except Exception as e:
+        print(f'exception is {e}')
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+    finally:
+        if proc and proc.poll() is None:
+            print("Shutting down server...")
+            proc.terminate()
+            try:
+                proc.wait(timeout=10)
+            except subprocess.TimeoutExpired:
+                print("Force killing server...")
+                proc.kill()
diff --git a/tests/function_test/test_challenge_ray.py b/tests/function_test/test_challenge_ray.py
new file mode 100644
index 0000000..2b35afb
--- /dev/null
+++ b/tests/function_test/test_challenge_ray.py
@@ -0,0 +1,106 @@
+'''
+Test the evaluator eval logic with ray, set proc_num = 4.
+The main progress:
+    Init => warm up => one action
+'''
+
+import importlib.util
+import subprocess
+import sys
+import time
+
+import numpy as np
+
+from internnav.configs.evaluator.default_config import get_config
+from internnav.evaluator import Evaluator
+from internnav.utils import progress_log_multi_util
+
+
+def main():
+    from enum import Enum
+
+    class runner_status_code(Enum):
+        NORMAL = 0
+        WARM_UP = 1
+        NOT_RESET = 3
+        TERMINATED = 2
+        STOP = 4
+
+    def load_eval_cfg(config_path, attr_name='eval_cfg'):
+        spec = importlib.util.spec_from_file_location("eval_config_module", config_path)
+        config_module = importlib.util.module_from_spec(spec)
+        sys.modules["eval_config_module"] = config_module
+        spec.loader.exec_module(config_module)
+        return getattr(config_module, attr_name)
+
+    evaluator_cfg = load_eval_cfg('scripts/eval/configs/challenge_cfg.py', attr_name='eval_cfg')
+    evaluator_cfg.task.task_settings["use_distributed"] = True
+    evaluator_cfg.task.task_settings["proc_num"] = 4
+    cfg = get_config(evaluator_cfg)
+    evaluator = Evaluator.init(cfg)
+
+    print('--- VlnPeEvaluator start ---')
+    obs, reset_info = evaluator.env.reset()
+    for info in reset_info:
+        if info is None:
+            continue
+        progress_log_multi_util.trace_start(
+            trajectory_id=evaluator.now_path_key(info),
+        )
+
+    obs = evaluator.warm_up()
+    evaluator.fake_obs = obs[0][evaluator.robot_name]
+    action = [{evaluator.robot_name: {'stand_still': []}} for _ in range(evaluator.env_num * evaluator.proc_num)]
+    obs = evaluator._obs_remove_robot_name(obs)
+    evaluator.runner_status = np.full(
+        (evaluator.env_num * evaluator.proc_num),
+        runner_status_code.NORMAL,
+        runner_status_code,
+    )
+    evaluator.runner_status[[info is None for info in reset_info]] = runner_status_code.TERMINATED
+
+    while evaluator.env.is_running():
+        obs, action = evaluator.get_action(obs, action)
+        obs, terminated = evaluator.env_step(action)
+        env_term, reset_info = evaluator.terminate_ops(obs, reset_info, terminated)
+        break
+
+    evaluator.env.close()
+
+
+def start_server():
+    server_cmd = [
+        sys.executable,
+        "internnav/agent/utils/server.py",
+        "--config",
+        "scripts/eval/configs/challenge_cfg.py",
+    ]
+
+    proc = subprocess.Popen(
+        server_cmd,
+        stdout=None,
+        stderr=None,
+    )
+    return proc
+
+
+if __name__ == '__main__':
+    try:
+        proc = start_server()
+        time.sleep(3)
+        main()
+    except Exception as e:
+        print(f'exception is {e}')
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+    finally:
+        if proc and proc.poll() is None:
+            print("Shutting down server...")
+            proc.terminate()
+            try:
+                proc.wait(timeout=10)
+            except subprocess.TimeoutExpired:
+                print("Force killing server...")
+                proc.kill()
diff --git a/tests/function_test/test_server.py b/tests/function_test/test_server.py
new file mode 100644
index 0000000..5bdfcee
--- /dev/null
+++ b/tests/function_test/test_server.py
@@ -0,0 +1,49 @@
+"""
+Test if the server starts successfully and is still alive after sleep.
+"""
+import subprocess
+import sys
+import time
+
+
+def start_server():
+    server_cmd = [
+        sys.executable,
+        "internnav/agent/utils/server.py",
+        "--config",
+        "scripts/eval/configs/challenge_cfg.py",
+    ]
+
+    proc = subprocess.Popen(
+        server_cmd,
+        stdout=None,
+        stderr=None,
+        start_new_session=True,
+    )
+    return proc
+
+
+if __name__ == '__main__':
+    try:
+        proc = start_server()
+        time.sleep(5)
+
+        # Raise if process exited
+        if proc.poll() is not None:
+            raise RuntimeError(f"❌ Server exited too early with code {proc.returncode}")
+        print("✅ Server is still alive after 5 seconds.")
+
+        if proc and proc.poll() is None:
+            print("Shutting down server...")
+            proc.terminate()
+            try:
+                proc.wait(timeout=10)
+            except subprocess.TimeoutExpired:
+                raise RuntimeError("❌ Server failed to shut down within 10 seconds.")
+
+    except Exception as e:
+        print(f'exception is {e}')
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/tests/unit_test/test_basic.py b/tests/unit_test/test_basic.py
new file mode 100644
index 0000000..cf7cd49
--- /dev/null
+++ b/tests/unit_test/test_basic.py
@@ -0,0 +1,32 @@
+import math
+
+import pytest
+
+
+def add(a, b):
+    return a + b
+
+
+def test_add_works():
+    assert add(1, 2) == 3
+
+
+@pytest.mark.parametrize("x,expected", [(0, 0.0), (math.pi, 0.0)])
+def test_sin(x, expected):
+    assert math.isclose(math.sin(x), expected, abs_tol=1e-9)
+
+
+@pytest.mark.slow
+def test_slow_example():
+    assert sum(range(10000)) > 0
+
+
+@pytest.mark.gpu
+def test_gpu_feature():
+    pytest.importorskip("torch")
+    import torch
+
+    if not torch.cuda.is_available():
+        pytest.skip("No CUDA available")
+    x = torch.tensor([1.0], device="cuda")
+    assert float(x.item()) == 1.0
diff --git a/tests/unit_test/test_evaluator_unit.py b/tests/unit_test/test_evaluator_unit.py
new file mode 100644
index 0000000..829da0a
--- /dev/null
+++ b/tests/unit_test/test_evaluator_unit.py
@@ -0,0 +1,22 @@
+import numpy as np
+import pytest
+
+from internnav.evaluator.vln_pe_evaluator import transform_action_batch
+
+
+@pytest.mark.slow
+def test_transform_action_batch_discrete():
+    origin = [(np.array([0]),), (np.array([-1]),), (np.array([3]),)]
+    out = transform_action_batch(origin, flash=False)
+    assert out == [
+        {'h1': {'stop': []}},
+        {'h1': {'stand_still': []}},
+        {'h1': {'move_by_discrete': [3]}},
+    ]
+
+
+@pytest.mark.slow
+def test_transform_action_batch_flash():
+    origin = [(np.array([5]),)]
+    out = transform_action_batch(origin, flash=True)
+    assert out == [{'h1': {'move_by_flash': [5]}}]