TabbyML · yan91083 · Nov 15, 2023 · Nov 15, 2023 · Nov 17, 2023 · Nov 18, 2023
diff --git a/python/tabby-eval/.gitignore b/python/tabby-eval/.gitignore
@@ -0,0 +1,3 @@
+tmp*
+tabby_data_pipeline.egg-info
+log.txt
diff --git a/python/tabby-eval/README.md b/python/tabby-eval/README.md
@@ -0,0 +1,48 @@
+# tabby_data_pipeline
+
+This is a [Dagster](https://dagster.io/) project scaffolded with [`dagster project scaffold`](https://docs.dagster.io/getting-started/create-new-project).
+
+## Getting started
+
+First, install your Dagster code location as a Python package. By using the --editable flag, pip will install your Python package in ["editable mode"](https://pip.pypa.io/en/latest/topics/local-project-installs/#editable-installs) so that as you develop, local code changes will automatically apply.
+
+```bash
+pip install -e ".[dev]"
+```
+
+Then, start the Dagster UI web server:
+
+```bash
+dagster dev
+```
+
+Open http://localhost:3000 with your browser to see the project.
+
+You can start writing assets in `tabby_data_pipeline/assets.py`. The assets are automatically loaded into the Dagster code location as you define them.
+
+## Development
+
+
+### Adding new Python dependencies
+
+You can specify new Python dependencies in `setup.py`.
+
+### Unit testing
+
+Tests are in the `tabby_data_pipeline_tests` directory and you can run tests using `pytest`:
+
+```bash
+pytest tabby_data_pipeline_tests
+```
+
+### Schedules and sensors
+
+If you want to enable Dagster [Schedules](https://docs.dagster.io/concepts/partitions-schedules-sensors/schedules) or [Sensors](https://docs.dagster.io/concepts/partitions-schedules-sensors/sensors) for your jobs, the [Dagster Daemon](https://docs.dagster.io/deployment/dagster-daemon) process must be running. This is done automatically when you run `dagster dev`.
+
+Once your Dagster Daemon is running, you can start turning on schedules and sensors for your jobs.
+
+## Deploy on Dagster Cloud
+
+The easiest way to deploy your Dagster project is to use Dagster Cloud.
+
+Check out the [Dagster Cloud Documentation](https://docs.dagster.cloud) to learn more.
diff --git a/python/tabby-eval/edit_distance_analysis.ipynb b/python/tabby-eval/edit_distance_analysis.ipynb
diff --git a/python/tabby-eval/pyproject.toml b/python/tabby-eval/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.dagster]
+module_name = "tabby_data_pipeline"
diff --git a/python/tabby-eval/setup.cfg b/python/tabby-eval/setup.cfg
@@ -0,0 +1,2 @@
+[metadata]
+name = tabby_data_pipeline
diff --git a/python/tabby-eval/setup.py b/python/tabby-eval/setup.py
@@ -0,0 +1,17 @@
+from setuptools import find_packages, setup
+
+setup(
+    name="tabby_data_pipeline",
+    packages=find_packages(exclude=["tabby_data_pipeline_tests"]),
+    install_requires=[
+        "dagster",
+        "dagster-cloud",
+        "dagstermill",
+        "papermill-origami>=0.0.8",
+        "pandas",
+        "matplotlib",
+        "seaborn",
+        "scikit-learn",
+    ],
+    extras_require={"dev": ["dagster-webserver", "pytest"]},
+)
diff --git a/python/tabby-eval/tabby_data_pipeline/__init__.py b/python/tabby-eval/tabby_data_pipeline/__init__.py
@@ -0,0 +1,17 @@
+from dagster import Definitions, load_assets_from_modules
+
+from dagstermill import ConfigurableLocalOutputNotebookIOManager
+
+
+from . import assets, create_csv
+
+all_assets = load_assets_from_modules([assets, create_csv])
+
+defs = Definitions(
+    assets=all_assets,
+    resources = {
+        "output_notebook_io_manager": ConfigurableLocalOutputNotebookIOManager()
+    }
+)
+
+
diff --git a/python/tabby-eval/tabby_data_pipeline/analyze.py b/python/tabby-eval/tabby_data_pipeline/analyze.py
@@ -0,0 +1,78 @@
+import json
+
+
+def get_bracket_lang_statement(completion):
+    end_idx = None
+    for i in range(len(completion)):
+        if completion[i] in [";", "{", "}"]:
+            end_idx = i
+            break
+    return completion[:end_idx+1] if end_idx else completion
+
+
+def postprocess_code_lines(prompt, target, language):
+    try:
+        if language in ["java", "csharp", "typescript"]:
+            return get_bracket_lang_statement(target)
+        elif language == "python":
+            return target.split("\n")[0]
+    except Exception:
+        return target
+
+
+def analyze(model, language, file):
+
+    line_match = 0
+    statement_match = 0
+
+    input_file = f"./data/{model}/{language}/{file}"
+    output_file = f"./data/{model}/{language}/result_{file}"
+
+    with open(output_file, 'w') as fout:
+        with open(input_file) as fin:
+            for line in fin:
+                obj = json.loads(line)
+                result = {}
+                prediction = ""
+
+                for k in obj.keys():
+                    if k == "prediction":
+                        prediction = str(obj[k])
+                        break
+                    elif k == "error":
+                        break
+                    else:
+                        result[k] = obj[k]
+
+                tabby_eval = {}
+                if file == "line_completion.jsonl":
+                    tabby_eval["raw_prompt"] = obj["prompt"]
+                else:
+                    tabby_eval["raw_prompt"] = obj["crossfile_context"]["text"] + obj["prompt"]
+
+                tabby_eval["prediction"] = prediction
+
+                groundtruth = obj["groundtruth"]
+
+                tabby_eval["first_line_prediction"] = prediction.split("\n")[0]
+                tabby_eval["first_line_groundtruth"] = groundtruth.split("\n")[0]
+                if tabby_eval["first_line_prediction"] == tabby_eval["first_line_groundtruth"]:
+                    tabby_eval["first_line_matched"] = True
+                    line_match += 1
+                else:
+                    tabby_eval["first_line_matched"] = False
+
+                tabby_eval["first_statement_prediction"] = postprocess_code_lines(tabby_eval["raw_prompt"], prediction, language)
+                tabby_eval["first_statement_groundtruth"] = postprocess_code_lines(tabby_eval["raw_prompt"], groundtruth, language)
+                if tabby_eval["first_statement_prediction"] == tabby_eval["first_statement_groundtruth"]:
+                    tabby_eval["first_statement_matched"] = True
+                    statement_match += 1
+                else:
+                    tabby_eval["first_statement_matched"] = False
+
+                result["tabby_eval"] = tabby_eval
+
+                json.dump(result, fout)
+                fout.write("\n")
+
+
diff --git a/python/tabby-eval/tabby_data_pipeline/assets.py b/python/tabby-eval/tabby_data_pipeline/assets.py
@@ -0,0 +1,145 @@
+import os
+import subprocess
+
+from dagster import (
+    AssetExecutionContext,
+    MetadataValue,
+    asset,
+    StaticPartitionsDefinition,
+    MultiPartitionsDefinition,
+)
+from . import analyze
+
+
+@asset
+def baseline() -> str:
+    return "line_completion.jsonl"
+
+@asset
+def bm25() -> str:
+    return "line_completion_rg1_bm25.jsonl"
+
+@asset
+def oracle() -> str:
+    return "line_completion_oracle_bm25.jsonl"
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),
+
+        }
+    ))
+def predict_baseline(context: AssetExecutionContext, baseline: str) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+    my_env = os.environ.copy()
+    my_env["MODEL_ID"] = model_id
+
+    context.add_output_metadata(metadata={"model_id": MetadataValue.md(model_id)})
+
+    files = baseline
+
+    p = subprocess.Popen(["modal", "run", "./modal/predict.py","--language", language, "--files", files], env=my_env)
+    p.wait()
+    context.add_output_metadata(metadata={'modal run': MetadataValue.md("success!")})
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),
+
+        }
+    ))
+def predict_bm25(context: AssetExecutionContext, bm25: str) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+    my_env = os.environ.copy()
+    my_env["MODEL_ID"] = model_id
+
+    context.add_output_metadata(metadata={"model_id": MetadataValue.md(model_id)})
+
+    files = bm25
+
+    p = subprocess.Popen(["modal", "run", "./modal/predict.py","--language", language, "--files", files], env=my_env)
+    p.wait()
+    context.add_output_metadata(metadata={'modal run': MetadataValue.md("success!")})
+
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),
+
+        }
+    ))
+def predict_oracle(context: AssetExecutionContext, oracle: str) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+    my_env = os.environ.copy()
+    my_env["MODEL_ID"] = model_id
+
+    context.add_output_metadata(metadata={"model_id": MetadataValue.md(model_id)})
+
+    files = oracle
+
+    p = subprocess.Popen(["modal", "run", "./modal/predict.py","--language", language, "--files", files], env=my_env)
+    p.wait()
+    context.add_output_metadata(metadata={'modal run': MetadataValue.md("success!")})
+
+
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),       
+        }
+    ), deps=[predict_baseline])
+def matching_baseline(context) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+
+    model = model_id.split("/")[-1]
+    analyze.analyze(model, language, 'line_completion.jsonl')
+
+
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),       
+        }
+    ), deps=[predict_bm25])
+def matching_bm25(context) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+
+    model = model_id.split("/")[-1]
+    analyze.analyze(model, language, 'line_completion_rg1_bm25.jsonl')
+
+
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),       
+        }
+    ), deps=[predict_oracle])
+def matching_oracle(context) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+
+    model = model_id.split("/")[-1]
+    analyze.analyze(model, language, 'line_completion_oracle_bm25.jsonl')
diff --git a/python/tabby-eval/tabby_data_pipeline/create_csv.py b/python/tabby-eval/tabby_data_pipeline/create_csv.py
@@ -0,0 +1,55 @@
+import json
+import pandas as pd
+
+from dagster import (
+    asset,
+    AssetIn,
+    file_relative_path
+    )
+from dagstermill import define_dagstermill_asset
+
+
+
+models = ["StarCoder-1B", "StarCoder-3B", "StarCoder-7B", "CodeLlama-7B", "CodeLlama-13B", "WizardCoder-1B", "WizardCoder-3B", "DeepseekCoder-1.3B", "DeepseekCoder-6.7B"]
+languages = {"csharp": "C#", "java": "Java", "python": "Python", "typescript": "Typescript"}
+files = ["line_completion.jsonl", 'line_completion_rg1_bm25.jsonl', 'line_completion_oracle_bm25.jsonl']
+total_records = {'python': 2665, 'java': 2139, 'typescript': 3356, 'csharp': 1768}
+
+headers = ['Model', 'Dataset', 'Records', 'baseline', 'bm25', 'oracle']
+
+stat = []
+def get_match(model, language, file):
+    count = 0
+    with open(f"./data/{model}/{language}/result_{file}") as f:
+        for line in f:
+            obj = json.loads(line)
+            if obj["tabby_eval"]["first_line_matched"]:
+                count += 1
+
+    return count
+
+@asset
+def create_csv():
+    for model in models:
+        for language in languages.keys():
+            x = [model, languages[language], total_records[language]]
+            for f in files:
+                x.append(get_match(model, language, f))
+
+            stat.append(x)
+
+    df = pd.DataFrame(stat, columns=headers)
+    print(df)
+
+    df.to_csv('./tabby_data_pipeline/tabby.csv', index=False)
+
+
+@asset(deps=[create_csv])
+def tabby_dataset():
+    return pd.read_csv(file_relative_path(__file__,'tabby.csv'))
+
+tabby_jupyter_notebook = define_dagstermill_asset(
+    name = 'tabby_jupyter',
+    notebook_path = file_relative_path(__file__, "tabby_eval.ipynb"),
+    ins={"df": AssetIn("tabby_dataset")},
+)