In [None]:
import matplotlib as mpl
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
import seaborn as sns

import multiprocessing
import os
from pathlib import Path

os.makedirs("figures", exist_ok=True)

In [None]:
# Shared Matplotlib styling
plt.rcParams.update({"font.size": 14})

legend_face_color = "#B5B5B5"
legend_framealpha = 0.7
legend_edge_color = "black"
fontweight = 500
legend_padding = 0.5

mpl.rcParams["font.weight"] = fontweight

dimensions_to_plot = [2, 3, 5, 9]
# Original colors (length=100)
dimension_colors = [
    "#4d9221",
    "#2b8cbe",
    "#FFFF00",
    "#FFA500",
    "#d73027",
    "#FF69B4",
    "#b2abd2",
    "#222222",
    "#800080",
]
# New colors
dimension_colors = list(sns.color_palette("colorblind").as_hex())


def add_legend(ax=None, loc="upper right"):
    if ax is None:
        ax = plt.gca()
    plot_size = np.array([ax.bbox.width, ax.bbox.height])
    distance = 10
    legend_x = 1 - distance / plot_size[0]
    legend_y = 1 - distance / plot_size[1]
    if "left" in loc:
        legend_x = distance / plot_size[0]
    if "lower" in loc:
        legend_y = distance / plot_size[1]

    if loc == "outside":
        loc = "center left"
        legend_x = 1 + distance / plot_size[0]
        legend_y = 0.5

    fig = ax.get_figure()
    legend = ax.legend(
        loc=loc,
        bbox_to_anchor=(legend_x, legend_y),
        facecolor=legend_face_color,
        ncols=1,
        framealpha=legend_framealpha,
        edgecolor=legend_edge_color,
        borderpad=legend_padding,
    )
    fig.canvas.draw_idle()  # make sure the style can be copied
    bbox_style = legend.get_frame().get_boxstyle()
    bbox_style.pad = legend_padding

In [None]:
df = pd.read_csv("required_samples.csv")

col_x = "length"
col_y = "samples_required"

fitting_params = dict(d=[], m=[], b=[], r2=[])


def r2_score(y, y_hat):
    if (y - y_hat).sum() == 0:
        return 1
    rss = np.sum([(_y[0] - _y[1]) ** 2 for _y in zip(y, y_hat)])
    tss = np.sum([(_y - np.mean(y)) ** 2 for _y in y])
    return 1 - rss / tss


def fit(df, c="k", ax=plt.gca(), plot=False):
    x = df[col_x]
    y_log = np.log(df[col_y])
    m, b = np.polyfit(x, y_log, 1)
    fitted = lambda x: np.maximum(m * x + b, 0)  # noqa: E731
    r2 = r2_score(y_log, fitted(x))
    x = np.linspace(x.min(), x.max(), 100)
    if plot:
        plt.plot(x, np.exp(fitted(x)), "--", c=c)
    return m, b, r2

for dimension in range(2, 10):
    c = dimension_colors[dimension - 2]
    for lattice in df["lattice"].unique():
        if lattice != "primitive":
            continue  # Skip non-primitive lattices (won't be part of this paper)
        df_plot = df.copy()
        df_plot = df_plot[
            (df_plot["lattice"] == lattice) & (df_plot["dimension"] == dimension)
        ]
        df_plot_grouped = df_plot.groupby(col_x, as_index=False)
        if len(df_plot) == 0:
            continue
        median = df_plot_grouped.agg(
            # Remove all lengths with any NaN values to avoid fitting on skewed value (too low)
            {col_y: lambda y: np.nan if y.isna().any() else y.median()}
        ).dropna()
        fitting_params["d"].append(dimension)
        plot = True  # dimension in dimensions_to_plot
        m, b, r2 = fit(median, plot=plot)
        fitting_params["m"].append(m)
        fitting_params["b"].append(b)
        fitting_params["r2"].append(r2)
        if plot:
            plt.scatter(
                median[col_x],
                median[col_y],
                c=c,
                label=f"{dimension}D",
                zorder=100,
            )
plt.xlabel("Length")
plt.ylabel("Required samples")
plt.plot([], [], "k--", label="fit")
add_legend(loc="upper left")
plt.yscale("log")
# plt.grid("major")
plt.tight_layout()
plt.gcf().set_size_inches((10, 5.5))
plt.savefig("figures/required_samples.pdf", dpi=300, bbox_inches="tight")

In [None]:
df = pd.DataFrame(fitting_params)
print(df)
# print(df.to_latex(index=False))


def fit(col_y):
    m, b = np.polyfit(1 / df["d"], df[col_y], 1)
    r2 = r2_score(df[col_y], m / df["d"] + b)
    x = np.linspace(df["d"].min(), df["d"].max(), 100)
    plt.plot(x, m * 1 / x + b, "k--")
    return m, b, r2


for c in ["m", "b"]:
    m, b, r2 = fit(c)
    label = f"{c}={m:.4f}*1/d{'+' if b > 0 else ''}{b:.4f}"
    print(label, f"R^2={r2:.4f}")
    color = "blue" if c == "m" else "red"
    plt.scatter(
        df["d"],
        df[c],
        label=f"${c}={m:.4f}\\frac{{1}}{{d}}{'+' if b > 0 else ''}{b:.4f}$",
        c=color,
        marker="s" if c == "m" else "^",
        zorder=100,
    )

add_legend(loc="lower right")
plt.xlabel("Dimension")
plt.tight_layout()
plt.gcf().set_size_inches(4.75, 3.5)
plt.ylim(-1.6, 0.2)
plt.savefig("figures/required_samples_parameters.pdf", dpi=300)

# Valid Conformations

In [None]:
input_file = "required_samples.csv"
df = pd.read_csv(input_file).dropna()
df = df[df["lattice"] == "primitive"]
for dimension in [2, 3]:
    row = df[df["dimension"] == dimension].sort_values("length", ascending=False)
    i = row.index[0]
    output_file = f"figures/longest_valid_{dimension}d.png"
    !./view_conformation.py --input {input_file} --conformation {i} --output {output_file}
    !convert -trim {output_file} {output_file} || echo "Could not crop image (Install ImageMagick!)"
    print(
        "Saved image of conformation of",
        row["length"].values[0],
        "amino acids to",
        output_file,
    )

# Valid and Invalid Conformations

In [None]:
dimensions_lengths_conformations = [
    # 2D
    (2, 20, 0),  # valid
    (2, 20, 10),  # 1 collision
    (2, 20, 88),  # 3 collisions
    # 3D
    (3, 20, 0),  # valid
    (3, 20, 34),  # 1 collision
    (3, 20, 164),  # 3 collisions
]

for i, (dimension, length, conformation) in enumerate(dimensions_lengths_conformations):
    input_file = f"conformations/{dimension}D/{length}.csv"
    output_file = f"figures/conformation_{i+1}_{dimension}d.png"
    !./view_conformation.py --input {input_file} --conformation {conformation} --output {output_file}
    !convert -trim {output_file} {output_file} || echo "Could not crop image (Install ImageMagick!)"
    print(
        "Saved image of conformation",
        conformation,
        "of",
        length,
        "amino acids in",
        dimension,
        "to",
        output_file,
    )