In [67]:
import os
from pathlib import Path


def generate_github_actions_workflow(dockerfiles: list[Path], language_model_id: str):
    if len(dockerfiles) == 0 or not dockerfiles:
        return None
    dockerfiles = list(set(dockerfiles))
    workflow = {
        'name': 'Build and Publish Docker Images',
        'on': {'push': {'paths': [f'dataset/*/out/reproduction/{language_model_id}']}},
        'jobs': {
            'build-and-push': {
                'runs-on': 'ubuntu-latest',
                'steps': [
                    {'name': 'Checkout code', 'uses': 'actions/checkout@v4', "with": {"sparse-checkout": "dataset"}},
                    {'name': 'Set up Docker Buildx', 'uses': 'docker/setup-buildx-action@v2'},
                    {
                        'name': 'Login to GitHub Container Registry',
                        'uses': 'docker/login-action@v1',
                        'with': {
                            'registry': 'ghcr.io',
                            'username': '${{ github.actor }}',
                            'password': '${{ secrets.GITHUB_TOKEN }}'
                        }
                    }
                ]
            }
        }
    }

    for dockerfile in dockerfiles:
        commit_hash = dockerfile.parent.parent.parent.parent.name
        image_name = f"ghcr.io/${{{{ github.repository_owner }}}}/{language_model_id}-{commit_hash}-reproduction"
        
        workflow['jobs']['build-and-push']['steps'].append({
            'name': f'Build and push {commit_hash}',
            'uses': 'docker/build-push-action@v2',
            'with': {
                'context': str(dockerfile.parent.relative_to(os.path.abspath(""))),
                'file': str(dockerfile.relative_to(os.path.abspath(""))),
                'push': True,
                'tags': image_name
            }
        })

    return workflow

In [68]:
from collections import defaultdict
import json
import os
from pathlib import Path
import shutil

import yaml


from masterthesis.agent.GitAgent import GitAgent
from masterthesis.agent.aider.AdvancedDiffAgent import UnifiedDiffCoder
import subprocess


# for trial_no in range(0, 12):


# hash = "14fc5fa696f499cac48401b3a86882b3bf7d9b82"

data_path = Path(os.path.abspath(""))/"dataset"



dockerfile_paths_by_language_model = defaultdict(list)
for path in data_path.rglob(f"out/*-execution-errors.json"):
    experiment_number = path.parts[-1].split("-execution-errors.json")[0]
    language_model_id = "-".join(experiment_number.split("-")[:-1])
    trial_no = experiment_number.split("-")[-1]
    commit_hash_path = path.parts[-3]


    with open(
        path,
        "r",
    ) as f:
        data = json.load(f)

    # Initialize the counter
    count = 0

    if commit_hash_path not in data.keys():
        data = {commit_hash_path: data}

    # Iterate through the items in the dictionary
    for commit_hash, result_item in data.items():

        if commit_hash != commit_hash_path:
            continue

        # Check if error is None and output is an array with length of one or more
        if (
            result_item["error"] is None and "compiled" in result_item and result_item["compiled"]
        ):  # and isinstance(result_item["output"], list):
            count += 1
            print(commit_hash)

            data_path = Path(os.path.abspath("")) / "dataset" / commit_hash

            reproduction_path = data_path / "out" / "reproduction" / language_model_id
            if reproduction_path.parent.exists():
                shutil.rmtree(reproduction_path.parent)
            os.makedirs(reproduction_path, exist_ok=True)

            repo_path = data_path / "repo"

            repo_slug = data_path / "repo_slug.txt"

            with open(repo_slug, "r") as f:
                repo_slug = f.read().strip()
                project = repo_slug.split("/")[1]

            git_agent = GitAgent(repo_path, commit_hash, repo_slug)
            git_agent.discard_changes()

            diffAgent = UnifiedDiffCoder(repo_path)


            
            if "output" in result_item and result_item["output"] is not None:
                print(result_item["output"])
                assert len(result_item["output"]) < 3
                patch = result_item["output"][0]
            elif "raw_output" in result_item:
                patch = result_item["raw_output"]

            

            if not patch:
                print("No patch")
                count -= 1
                shutil.rmtree(reproduction_path)
                continue

            

            if isinstance(patch, list):
                patch = patch[0]
                if isinstance(patch, list):
                    patch = patch[0]
            print(patch)
            assert isinstance(patch, str)

            paths = diffAgent.get_paths(patch)

            assert len(paths) == 1

            path = paths[0]

            with open(reproduction_path / "patch.txt", "w") as f:
                f.write(patch)

            os.makedirs(reproduction_path / Path(path).parent, exist_ok=True)

            with open(reproduction_path / path, "w") as f:
                success, content = diffAgent.apply_edits(patch)
                if not success:
                    print("Failed to apply patch")
                    count -= 1
                    shutil.rmtree(reproduction_path)
                    continue
                f.write(content)

            docker_file = f"""FROM ghcr.io/chains-project/breaking-updates:{commit_hash}-breaking
COPY {path} /{project}/{path}"""

            dockerfile_path: Path = reproduction_path / "Dockerfile"
            dockerfile_paths_by_language_model[language_model_id].append(dockerfile_path)
            with open(dockerfile_path, "w") as f:
                f.write(docker_file)

            

            replicate_script = f"""#!/bin/bash
docker build -t {commit_hash}-reproduction {reproduction_path}
docker run ghcr.io/chains-project/breaking-updates:{commit_hash}-pre > pre.txt
docker run {commit_hash}-reproduction > post.txt
"""
            with open(reproduction_path / "replicate.sh", "w") as f:
                f.write(replicate_script)
            os.chmod(reproduction_path / "replicate.sh", 0o755)

            # repro_path = Path(os.path.abspath(""))/f"dataset/{commit_hash}/out/reproduction"
            # post_path = repro_path / "post.txt"
            # pre_path = repro_path / "pre.txt"

            # if not pre_path.exists() and not post_path.exists():
            #     out = subprocess.run(["./replicate.sh"], cwd=str(reproduction_path), shell=True)
            #     print(out)

    workflow = generate_github_actions_workflow(dockerfile_paths_by_language_model[language_model_id], language_model_id)
    if workflow is None:
        continue
    with open(f".github/workflows/build-images-{language_model_id}.yml", "w") as f:
        yaml.dump(workflow, f, sort_keys=False)
    # Output the count
    print(f"TrialNumber {trial_no} with count:", count)




6c53cd904bd66fc79af8687571e607c259226b81
['```diff\n--- extensions/struts2/test/com/google/inject/struts2/Struts2FactoryTest.java\n+++ extensions/struts2/test/com/google/inject/struts2/Struts2FactoryTest.java\n@@ -5,7 +5,7 @@\n package com.google.inject.struts2;\n \n-import org.apache.struts2.dispatcher.ng.filter.StrutsPrepareAndExecuteFilter;\n+import org.apache.struts2.dispatcher.filter.StrutsPrepareAndExecuteFilter;\n import com.google.inject.AbstractModule;\n import com.google.inject.Guice;\n import com.google.inject.Injector;\n```']
```diff
--- extensions/struts2/test/com/google/inject/struts2/Struts2FactoryTest.java
+++ extensions/struts2/test/com/google/inject/struts2/Struts2FactoryTest.java
@@ -5,7 +5,7 @@
 package com.google.inject.struts2;
 
-import org.apache.struts2.dispatcher.ng.filter.StrutsPrepareAndExecuteFilter;
+import org.apache.struts2.dispatcher.filter.StrutsPrepareAndExecuteFilter;
 import com.google.inject.AbstractModule;
 import com.google.inject.Guice;
 import 