In [None]:
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.abspath(""))))

import json
import utils as common_utils
import random
import logging
import create_dataset.utils as utils
import create_dataset.break_code.v1 as break_code_v1
import create_dataset.break_code.v2 as break_code_v2
from dotenv import load_dotenv
from pathlib import Path

In [None]:
load_dotenv()

In [None]:
nixpkgs = os.getenv("NIXPKGS")
assert nixpkgs, "You need to define the environment variable NIXPKGS as the file path to a checkout of Nixpkgs"
nixpkgs = Path(nixpkgs)
assert nixpkgs.is_dir()
print(f"Using Nixpkgs checkout at {nixpkgs}")

In [None]:
db = utils.DB("dataset.db")

In [None]:
utils.reset_nixpkgs_git(nixpkgs)

In [None]:
nixpkgs_rev_str = utils.read_nixpkgs_rev(nixpkgs)

In [None]:
nixpkgs_rev_bin = utils.nixpkgs_rev_str_to_bin(nixpkgs_rev_str)

In [None]:
packages = utils.get_all_pkgs_cached(db, nixpkgs_rev_bin, nixpkgs)

Metadata for every package:
- Package name
- Package attribute path
- File path
- Nixpkgs revision
- Platform
- Code before
- Code after
- Error message

TODO:
- Think about garbage collection
- Parse code to allow more intelligently modifying it
- Post-Process the data:
    - Reduce the amout of examples with syntax errors
    - If a single file is being edited many times, perhaps the file path is not accurate and we should ignore this

In [None]:
print(f"There are {len(packages)} packages")

### Find a random package in Nixpkgs

In [None]:
package_list = tuple(packages.keys()) # tuple() is needed because random.choice() cannot handle dict_keys
random_package_attr = random.choice(package_list)
print(f"Random package: {random_package_attr}")

In [None]:
package_metadata = utils.extract_metadata(nixpkgs, random_package_attr, packages[random_package_attr])

In [None]:
package_metadata

In [None]:
# Check that it still compiles
(build_result, compile_time_ns) = utils.nix_build(nixpkgs, package_metadata)

if build_result.returncode != 0:
    raise Exception("The package does not currently compile!")
    # TODO: pick another package

In [None]:
nix_code = utils.read_nix_code(nixpkgs, package_metadata)
#print(nix_code)

In [None]:
code_breaking_algorithm = 2

match code_breaking_algorithm:
    case 1:
        broken_nix_code = break_code_v1.execute(nix_code)
    case 2:
        broken_nix_code = break_code_v2.execute(nix_code)
    case _:
        raise Exception(f"Invalid code_breaking_algorithm: {code_breaking_algorithm}")


In [None]:
utils.write_nix_code(nixpkgs, package_metadata, broken_nix_code)

In [None]:
# Try to compile the package and get the error message
(build_result, compile_time_ns) = utils.nix_build(nixpkgs, package_metadata)

if build_result.returncode == 0:
    raise Exception(f"We did not manage to break the code")

print("The nix-build command failed, let's see what the error was.")
error_message = common_utils.parse_error_from_nix_output(build_result.stderr)
print(error_message)

In [None]:
#import importlib
#importlib.reload(utils)

In [None]:
"""
for a in build_result.stderr.splitlines():
    a = a.lstrip("@nix ")
    b = json.loads(a)
    if b["action"] != "msg":
        continue
    print(b)
    print(b["msg"])
    print(b["raw_msg"])
"""

In [None]:
results = (package_metadata, nixpkgs_rev_bin, code_breaking_algorithm, nix_code, broken_nix_code, build_result.stderr, error_message, compile_time_ns)
db.save_to_dataset(results)