# Imports

In [1]:
import logging
import os

from dotenv import load_dotenv

from core.enums import EnvironmentKey
from cmg.evaluators import CommitMessageGenerator
from cmg import evaluator
from core import (
    few_shot_high_level_cmg_chain,
    low_level_cmg_chain,
    zero_shot_high_level_cmg_chain,
    high_level_context_chain
)
from core.models import CommitDataModel
from datapreparation import context_generator, example_generator

# Initialization

In [2]:
COMMIT_DATA_JSON_FILE_PATH = os.path.join("data", "cmg", "commits.evaluation.json")
CONTEXT_DATA_PATH = os.path.join("data", "context")

DEFAULT_CONTEXT_GENERATION_OUTPUT_PATH = os.path.join(
    "data", "context"
)
DEFAULT_HIGH_LEVEL_CONTEXT_OUTPUT_PATH = os.path.join(
    "out", "evaluation", "highlevelcontext"
)
DEFAULT_CMG_OUTPUT_PATH = os.path.join("out", "evaluation", "cmg")
DEFAULT_DIFF_CLASSIFICATION_OUTPUT_PATH = os.path.join(
    "out", "evaluation", "diffclassification"
)

GENERATORS = [
    CommitMessageGenerator(
        "Zero-Shot High-Level Generator", zero_shot_high_level_cmg_chain
    ),
    CommitMessageGenerator(
        "Few-Shot High-Level Generator", few_shot_high_level_cmg_chain
    ),
    CommitMessageGenerator("Low-Level Generator", low_level_cmg_chain),
]

load_dotenv(dotenv_path=".env.evaluation", verbose=True, override=True)
logging.basicConfig(level=logging.DEBUG)

In [3]:
CONTEXT_GENERATION_OUTPUT_PATH = os.getenv(
        EnvironmentKey.CONTEXT_GENERATION_OUTPUT_PATH.value,
        DEFAULT_CONTEXT_GENERATION_OUTPUT_PATH,
    )

HIGH_LEVEL_CONTEXT_OUTPUT_PATH = os.getenv(
        EnvironmentKey.HIGH_LEVEL_CONTEXT_OUTPUT_PATH.value,
        DEFAULT_HIGH_LEVEL_CONTEXT_OUTPUT_PATH,
    )

CMG_OUTPUT_PATH = os.getenv(
        EnvironmentKey.CMG_OUTPUT_PATH.value, DEFAULT_CMG_OUTPUT_PATH
    )

DIFF_CLASSIFICATION_OUTPUT_PATH = os.getenv(
        EnvironmentKey.DIFF_CLASSIFICATION_OUTPUT_PATH.value,
        DEFAULT_DIFF_CLASSIFICATION_OUTPUT_PATH,
    )

In [4]:
def get_commits(path: str) -> list[CommitDataModel]:
        with open(path, "r", encoding="utf-8") as file:
            json_string = file.read()

        return CommitDataModel.from_json(json_string)

COMMITS = get_commits(COMMIT_DATA_JSON_FILE_PATH)

# Classify Diff

In [9]:
evaluator.classify_diffs(zero_shot_high_level_cmg_chain, COMMITS, DIFF_CLASSIFICATION_OUTPUT_PATH)

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Classify the Git diff into one of the following six software maintenance activities: feat, fix, perf, test, refactor, or chore. Return the activity that best matches the code changes. Refer to the definitions below for each activity.\n\nfeat: introducing new features into the system.\nfix: fixing existing bugs or issues in the system.\nperf: improving the performance of the system.\ntest: adding, modifying, or deleting test cases.\nrefactor: changes made to the internal structure of software to make it easier to understand and cheaper to modify without changing its observable behavior, including code styling.\nchore: regular maintenance tasks, such as updating dependencies or build tasks.\n\nAvoid adding any additional comments or annotations to the classification.\n\n> Git diff: diff --git a/clients/src/main/java/org/apache/kafka/common/secur

# Generate Context

In [5]:
context_generator.generate_context(COMMITS, CONTEXT_GENERATION_OUTPUT_PATH)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): issues.apache.org:443
DEBUG:urllib3.connectionpool:https://issues.apache.org:443 "GET /jira/rest/api/2/serverInfo HTTP/11" 200 229
DEBUG:urllib3.connectionpool:https://issues.apache.org:443 "GET /jira/rest/api/2/issue/DRILL-8400 HTTP/11" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): issues.apache.org:443
DEBUG:urllib3.connectionpool:https://issues.apache.org:443 "GET /jira/rest/api/2/serverInfo HTTP/11" 200 None
DEBUG:urllib3.connectionpool:https://issues.apache.org:443 "GET /jira/rest/api/2/issue/ZOOKEEPER-3160 HTTP/11" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): issues.apache.org:443
DEBUG:urllib3.connectionpool:https://issues.apache.org:443 "GET /jira/rest/api/2/serverInfo HTTP/11" 200 None
DEBUG:urllib3.connectionpool:https://issues.apache.org:443 "GET /jira/rest/api/2/issue/OPENNLP-1620 HTTP/11" 200 None


# Get High Level Context

In [6]:
evaluator.get_high_level_contexts(
    high_level_context_chain, 
    COMMITS, 
    CONTEXT_DATA_PATH, 
    HIGH_LEVEL_CONTEXT_OUTPUT_PATH)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.smith.langchain.com:443
DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Given a Git diff and the relevant source code, write a concise summary of the code changes in a way that a non-technical person can understand. The query text must summarize the code changes in two very brief sentences.\n\nGit diff:\ndiff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java\nindex c021ebca1..1ce138c0e 100644\n--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java\n+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java\n@@ -192,7 +192,7 @@ public class HiveStoragePlugin extends AbstractStoragePlugin

DEBUG:urllib3.connectionpool:https://api.smith.langchain.com:443 "POST /runs/multipart HTTP/11" 202 33


# Generate Commit Message

In [5]:
evaluator.evaluate(GENERATORS, COMMITS, CONTEXT_DATA_PATH, CMG_OUTPUT_PATH)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.smith.langchain.com:443
DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Classify the Git diff into one of the following six software maintenance activities: feat, fix, perf, test, refactor, or chore. Return the activity that best matches the code changes. Refer to the definitions below for each activity.\n\nfeat: introducing new features into the system.\nfix: fixing existing bugs or issues in the system.\nperf: improving the performance of the system.\ntest: adding, modifying, or deleting test cases.\nrefactor: changes made to the internal structure of software to make it easier to understand and cheaper to modify without changing its observable behavior, including code styling.\nchore: regular maintenance tasks, such as updating dependencies or build tasks.\n\nAvoid adding any additional comments or annotations to the cla

DEBUG:urllib3.connectionpool:https://api.smith.langchain.com:443 "POST /runs/multipart HTTP/11" 202 33
