In [1]:
import json

In [2]:
def txt_to_dzn(path, sl=None):
    # print(path)
    name = path.split(".")[0]
    with open(path) as f:
        data = json.load(f)
    
    
    alphabet = "{"
    for char in data["alphabet"]:
        alphabet += f"{char},"
    alphabet = alphabet[:-1]
    alphabet += "}"
    alphabet = f"Alphabet = {alphabet};\n"
    # print(alphabet)
    
    num_strings = data["num_strings"]
    ns = f"ns = {num_strings};\n"
    # print(ns)
    
    max_length = max(data["str_length"])
    nc = f"nc = {max_length};\n"
    # print(nc)
    
    sol_length = max_length
    if sl is not None:
        sol_length = sl
        
    sl = f"sl = {sol_length};\n"
    
    neighbours = "[| "
    for s in data["strings"]:
        for i in range(max(max_length, len(s))):
            if i < len(s):
                neighbours += f"I({s[i]}),"
            else:
                neighbours += f"Null,"
        neighbours = neighbours[:-1]
        neighbours += " | "
    neighbours = "neighbours = " + neighbours[:-1] + "];\n"
    # print(neighbours)
    
    with open(f'{name}.dzn', 'w') as f_new:
        f_new.write(alphabet)
        f_new.write(ns)
        f_new.write(nc)
        f_new.write(sl)
        f_new.write(neighbours)

In [3]:
import os
def all_txt_to_dzn(path_to_instances, sl=None):
    for root, dirs, files in os.walk(path_to_instances):
        for file in files:
            if file.endswith(".txt"):
                path = os.path.join(root, file)
                txt_to_dzn(path, sl=sl)

In [4]:
import subprocess
import time
import pandas as pd
import os

In [5]:
def test(path_to_instances, timelimit = 60*1000, solvers = ["Gecode", "Chuffed"]):

    results = pd.DataFrame(columns=["file", "best", "score", "time", "solver", "max_str_length", "n_str"])
    for solver in solvers:
        for root, dirs, files in os.walk(path_to_instances):
            for file in sorted(files):
                filename = file.split(".")[0]
                if file.endswith(".dzn"):
                    print(root + "/" +  filename)

                    with open(root + "/" +  filename + ".txt") as f:
                        data = json.load(f)

                    t0 = time.time()
                    out = subprocess.check_output([
                        "minizinc",
                        "Minimum_editing_distance.mzn",
                        root + "/" +  file,
                        "--all-solutions",
                        "--time-limit", str(timelimit),
                        "--solver", solver,
                        "--random-seed", "0"
                    ])

                    t1 = time.time()

                    parsed_out = str(out).split("Results: {")[-1].split("}")[0].split(" ") # get last solution
                    best = parsed_out[1]
                    score = int(parsed_out[3])
                    print(f"Found {best} with score {score} in {t1-t0:.2f}s")

                    results = results.append({"file": filename,
                                              "best": best,
                                              "score": score,
                                              "time": t1-t0,
                                              "solver": solver,
                                              "max_str_length": max(data["str_length"]),
                                              "n_str": data["num_strings"]
                                             }, ignore_index=True) 
                    
    return results

In [None]:
path_to_instances = "benchmark_str_length"
all_txt_to_dzn(path_to_instances, sl=10)
results1 = test(path_to_instances)
results1.to_csv("benchmark_str_length.csv")

benchmark_str_length/p1_10_10-0
Found AAAAAAACA with score 42 in 1.58s
benchmark_str_length/p1_10_10-1
Found AAACACAACA with score 50 in 2.10s
benchmark_str_length/p1_10_10-2
Found AAAACAAAAA with score 40 in 1.40s
benchmark_str_length/p1_10_10-3
Found ACAAACAAA with score 38 in 1.39s
benchmark_str_length/p1_10_10-4
Found CAAAAAAA with score 36 in 1.54s
benchmark_str_length/p1_10_11-0
Found ACAAAAAACA with score 44 in 1.51s
benchmark_str_length/p1_10_11-1
Found ACAAAAACA with score 46 in 1.74s
benchmark_str_length/p1_10_11-2
Found ACAAAACAC with score 54 in 2.30s
benchmark_str_length/p1_10_11-3
Found AAAAACAAAA with score 42 in 1.62s
benchmark_str_length/p1_10_11-4
Found CAAACAAACA with score 48 in 1.85s
benchmark_str_length/p1_10_12-0
Found AACAAACAAA with score 44 in 1.53s
benchmark_str_length/p1_10_12-1
Found CAAAAACAAA with score 42 in 1.41s
benchmark_str_length/p1_10_12-2
Found AAAAACAAAA with score 42 in 1.41s
benchmark_str_length/p1_10_12-3
Found AACAAACAAA with score 40 in 1.48

Found  with score 14 in 0.47s
benchmark_str_length/p2_10_4-0
Found AT with score 20 in 0.96s
benchmark_str_length/p2_10_4-1
Found A with score 18 in 1.07s
benchmark_str_length/p2_10_4-2
Found GA with score 25 in 2.14s
benchmark_str_length/p2_10_4-3
Found A with score 23 in 6.09s
benchmark_str_length/p2_10_4-4
Found CG with score 24 in 0.91s
benchmark_str_length/p2_10_5-0
Found GAC with score 21 in 0.95s
benchmark_str_length/p2_10_5-1
Found GC with score 27 in 2.29s
benchmark_str_length/p2_10_5-2
Found A with score 21 in 1.05s
benchmark_str_length/p2_10_5-3
Found A with score 18 in 0.67s
benchmark_str_length/p2_10_5-4
Found TAC with score 27 in 1.53s
benchmark_str_length/p2_10_6-0
Found TC with score 30 in 6.49s
benchmark_str_length/p2_10_6-1
Found CTA with score 26 in 0.94s
benchmark_str_length/p2_10_6-2
Found TCG with score 33 in 10.17s
benchmark_str_length/p2_10_6-3
Found AG with score 26 in 1.34s
benchmark_str_length/p2_10_6-4
Found TA with score 31 in 2.41s
benchmark_str_length/p2_

In [None]:
results1

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
p1 = results1[results.file.str.contains("p1")]

gc = p1[p1.solver == "Gecode"].groupby("max_str_length").mean()
ch = p1[p1.solver == "Chuffed"].groupby("max_str_length").mean()

plt.plot(gc, label="Gecode")
plt.plot(ch, label="Chuffed")
plt.suptitle("Same string length")
plt.xlabel("max string length")
plt.ylabel("time (s)")
plt.legend()
plt.savefig("str_length_p1.pdf")

In [None]:
p2 = results1[results.file.str.contains("p2")]

gc = p2[p2.solver == "Gecode"].groupby("max_str_length").mean()
ch = p2[p2.solver == "Chuffed"].groupby("max_str_length").mean()

plt.plot(gc, label="Gecode")
plt.plot(ch, label="Chuffed")
plt.suptitle("Mixed string length")
plt.xlabel("max string length")
plt.ylabel("time (s)")
plt.legend()
plt.savefig("str_length_p2.pdf")

In [None]:
path_to_instances = "benchmark_num_str"
all_txt_to_dzn(path_to_instances)
results2 = test(path_to_instances, sl=10)
results2.to_csv("benchmark_num_str.csv")

In [None]:
p1 = results2[results.file.str.contains("p1")]

gc = p1[p1.solver == "Gecode"].groupby("n_str").mean()
ch = p1[p1.solver == "Chuffed"].groupby("n_str").mean()

plt.plot(gc, label="Gecode")
plt.plot(ch, label="Chuffed")
plt.suptitle("Same string length")
plt.xlabel("Numer of strings")
plt.ylabel("time (s)")
plt.legend()
plt.savefig("num_str.pdf")

In [None]:
problem = "instances/p2/p2_15_20-0"
results3 = pd.DataFrame(columns=["sol_length", "best", "score", "time"])
for sol_length in range(1,21):
    txt_to_dzn(problem + ".txt", sl=sol_length)
    timelimit = 60*1000

    t0 = time.time()
    out = subprocess.check_output([
                            "minizinc",
                            "Minimum_editing_distance.mzn",
                            problem + ".dzn",
                            "--all-solutions",
                            "--time-limit", str(timelimit),
                            "--solver", "Gecode",
                            "--random-seed", "0"
                        ])

    t1 = time.time()
    parsed_out = str(out).split("Results: {")[-1].split("}")[0].split(" ") # get last solution
    best = parsed_out[1]
    score = int(parsed_out[3])
    print(f"Solution length: {sol_length}. Found {best} with score {score} in {t1-t0:.2f}s")
    results = results.append({"sol_length": sol_length, "best": best, "score": score, "time":t1-t0},
                             ignore_index=True)

In [None]:
fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel("Max solution length")
ax1.set_ylabel("score", color=color)
ax1.plot(results.sol_length, results.score, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:orange'
ax2.set_ylabel("time (s)", color=color)  # we already handled the x-label with ax1
ax2.plot(results3.sol_length, results3.time, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.savefig("score_vs_time.pdf")