In [None]:
import os
import copy
import re
import glob
import string
from collections import namedtuple
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde

%matplotlib inline

AMINO_ACID_COLORS = {
    "ALA": "#c8c8c8", "LEU": "#0f820f", "ASH": "#e60a0a",
    "ARG": "#145aff", "LYS": "#145aff", "GLH": "#e60a0a",
    "ASN": "#00dcdc", "MET": "#e6e600", "HIP": "#145aff",
    "ASP": "#e60a0a", "PHE": "#3232aa", "SEP": "#fa9600",
    "CYS": "#e6e600", "PRO": "#c8c8c8", "S1P": "#fa9600",
    "GLN": "#00dcdc", "SER": "#00dcdc", "TPO": "#fa9600",
    "GLU": "#e60a0a", "THR": "#00dcdc", "T1P": "#fa9600",
    "GLY": "#c8c8c8", "TRP": "#b45ab4", "PTR": "#fa9600",
    "HIS": "#8282d2", "TYR": "#3232aa", "Y1P": "#fa9600",
    "ILE": "#0f820f", "VAL": "#0f820f"
}

residues = [
    'ALA', 'ARG', 'ASN', 'ASP', 'ASH', 
    'CYS', 'GLN', 'GLU', 'GLH', 'GLY',
    'HIS', 'HIP', 'ILE', 'LEU', 'LYS',
    'MET', 'PHE', 'PRO', 'TRP', 'VAL',
    'SER', 'S1P', 'SEP',
    'THR', 'T1P', 'TPO',
    'TYR', 'Y1P', 'PTR'
]

def suppressPandasChainAssignmentWarning(func):
    def _inner_(*args, **kwargs):
        with pd.option_context("mode.chained_assignment", None):
            result = func(*args, **kwargs)
        return result
    return _inner_

@suppressPandasChainAssignmentWarning
def read_constava(*input_files, filter_by_method: str = None, delta_neighbor: int = 0):
    _data = pd.DataFrame()
    for fpath in (os.path.abspath(fp) for fp in input_files):
        df = pd.read_csv(fpath, header=0)
        if filter_by_method is not None:
            df = df.loc[df["Method"] == filter_by_method]
        # Assign sequence
        df["sequence"] = "-".join(df["ResName"])
        # Get adjacent residue names
        if delta_neighbor > 0:
            df = df.loc[df["Method"] == "window(3)"]
            resname = df["ResName"]
            resname.index += delta_neighbor
            df = df.join(resname, how="left", rsuffix=f"-{delta_neighbor}")
            resname = df["ResName"]
            resname.index -= delta_neighbor
            df = df.join(resname, how="left", rsuffix=f"+{delta_neighbor}")
        # Concatenate to remaining data
        _data = pd.concat([_data, df], ignore_index=True)
    return _data


attributes = ["ConStaVa", "coreSheet", "surrSheet", "Other", "Turn", "surrHelix", "coreHelix"]
ylabels = {
    "ConStaVa":  "Conf. state variability",
    "surrSheet": "Sheet(surr) propensity",
    "coreSheet": "Sheet(core) propensity",
    "Other":     "Coil propensity",
    "Turn":      "Turn propensity",
    "surrHelix": "Helix(surr) propensity",
    "coreHelix": "Helix(core) propensity",
}

In [None]:
df6 = read_constava(*[fn for fn in glob.glob("../6peptides/gg*gg/analy/constava.csv") if "GLY" in fn], 
                    filter_by_method="window(3)", delta_neighbor=1)
df6 = df6.loc[(df6["#ResIndex"] == 4) | (df6["#ResIndex"] == 5)]

df7 = read_constava(*[fn for fn in glob.glob("../7peptides/gg*gg/analy/constava.csv") if "GLY" in fn], 
                    filter_by_method="window(3)", delta_neighbor=2)
df7 = df7.loc[(df7["#ResIndex"] == 4) | (df7["#ResIndex"] == 6)]

df8 = read_constava(*[fn for fn in glob.glob("../8peptides/gg*gg/analy/constava.csv") if "GLY" in fn],
                    filter_by_method="window(3)", delta_neighbor=3)
df8 = df8.loc[(df8["#ResIndex"] == 4) | (df8["#ResIndex"] == 7)]

df9 = read_constava(*[fn for fn in glob.glob("../9peptides/gg*gg/analy/constava.csv") if "GLY" in fn],
                    filter_by_method="window(3)", delta_neighbor=4)
df9 = df9.loc[(df9["#ResIndex"] == 4) | (df9["#ResIndex"] == 8)]

# Effect on adjacent residues

In [None]:
df6 = read_constava(*glob.glob("../6peptides/gg*gg/analy/constava.csv"), filter_by_method="window(3)", delta_neighbor=1)
df6 = df6.loc[(df6["#ResIndex"] == 4) | (df6["#ResIndex"] == 5)]

df7 = read_constava(*glob.glob("../7peptides/gg*gg/analy/constava.csv"), filter_by_method="window(3)", delta_neighbor=2)
df7 = df7.loc[(df7["#ResIndex"] == 4) | (df7["#ResIndex"] == 6)]

df8 = read_constava(*glob.glob("../8peptides/gg*gg/analy/constava.csv"), filter_by_method="window(3)", delta_neighbor=3)
df8 = df8.loc[(df8["#ResIndex"] == 4) | (df8["#ResIndex"] == 7)]

df9 = read_constava(*glob.glob("../9peptides/gg*gg/analy/constava.csv"), filter_by_method="window(3)", delta_neighbor=4)
df9 = df9.loc[(df9["#ResIndex"] == 4) | (df9["#ResIndex"] == 8)]

In [None]:
df, k = df6, 1

dynamics_adjacency = {}

for attr in attributes:
    
    head1 = np.zeros((29,29))
    tail1 = np.zeros((29,29))
    
    # Note: axis=0 is effector residue
    #       axis=1 is observed (effected) residue
    for i, resi in enumerate(residues):
        for j, resj in enumerate(residues):
            head1[i,j] = df.loc[(df["ResName"] == resj) & (df["#ResIndex"] == 4) & (df[f"ResName+{k}"] == resi)][attr]
            tail1[i,j] = df.loc[(df["ResName"] == resj) & (df["#ResIndex"] == 4+k) & (df[f"ResName-{k}"] == resi)][attr]

    # Normalize against effect of glycine
    head1 -= head1[residues.index("GLY")]
    tail1 -= tail1[residues.index("GLY")]
    
    dynamics_adjacency[attr] = (head1, tail1)

In [None]:
fig, axs = plt.subplots(6,1, figsize=(7,10), dpi=100, gridspec_kw={
    "left": 0., "right": 1., "bottom": 0., "top": 1., "hspace":.15})


for i, (attr, ax) in enumerate(zip(["coreHelix", "surrHelix", "Turn", "coreSheet", "surrSheet", "Other"], axs.flatten())):
    head1, tail1 = dynamics_adjacency[attr]

    ax.set_facecolor("0.95")
    ax.grid(True, "major", "both", color="w", linewidth=1)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.xaxis.set_tick_params(length=0)
    ax.set_xticks(np.arange(30))
    ax.set_xticklabels([])
    ax.set_xlim([-.5, 29.5])
    ax.set_ylabel({
        "coreHelix": "$\it{Core \: helix}$ propensity", 
        "surrHelix": "$\it{Surr. \: helix}$ propensity", 
        "coreSheet": "$\it{Core \: sheet}$ propensity", 
        "surrSheet": "$\it{Surr. \: sheet}$ propensity", 
        "Turn": "$\it{Turn}$ propensity", 
        "Other": "$\it{Other}$ propensity"}[attr])
    
    xvals = np.arange(.47,29.,1.)
    quants = np.quantile(head1, [.25, .5, .75], axis=1)
    vn = ax.violinplot(head1.tolist(), positions=xvals, widths=.8, showmedians=False, showextrema=False)
    for bd in vn["bodies"]:
        m = np.mean(bd.get_paths()[0].vertices[:, 0])
        bd.get_paths()[0].vertices[:, 0] = np.clip(bd.get_paths()[0].vertices[:, 0], -np.inf, m)
        bd.set_facecolor(mpl.cm.Pastel1(.2))
        bd.set_edgecolor(mpl.cm.Set1(.2))
        bd.set_linewidth(.5)
        bd.set_alpha(1)
        bd.set_zorder(3)
    ax.vlines(xvals-.02, quants[0], quants[2], color=mpl.cm.Set1(.2), linewidth=2., zorder=6)
    ax.scatter(xvals-.04, quants[1], s=10, color=mpl.cm.Set1(.2), marker=5, linewidths=0, zorder=7)
    ax.scatter(xvals-.02, quants[1], s=2, marker="D", color="w", edgecolors=[mpl.cm.Set1(.2)], linewidths=.2, zorder=7)
    
    xvals = np.arange(.53,29.,1.)
    quants = np.quantile(tail1, [.25, .5,  .75], axis=1)
    vc = ax.violinplot(tail1.tolist(), positions=xvals, widths=.8, showmedians=False, showextrema=False)
    for bd in vc["bodies"]:
        m = np.mean(bd.get_paths()[0].vertices[:, 0])
        bd.get_paths()[0].vertices[:, 0] = np.clip(bd.get_paths()[0].vertices[:, 0], m, np.inf)
        bd.set_facecolor(mpl.cm.Pastel1(0))
        bd.set_edgecolor(mpl.cm.Set1(0))
        bd.set_linewidth(.5)
        bd.set_alpha(1)
        bd.set_zorder(3)
    ax.vlines(xvals+.02, quants[0], quants[2], color=mpl.cm.Set1(0), linewidth=2., zorder=6)
    ax.scatter(xvals+.04, quants[1], s=10, color=mpl.cm.Set1(0), marker=4, linewidths=0, zorder=7)
    ax.scatter(xvals+.02, quants[1], s=2, marker="D", color="w", edgecolors=[mpl.cm.Set1(0)], linewidths=.2, zorder=7)
    
    
    label_pos = np.stack([np.arange(.5,29.,1.), np.max([head1, tail1], axis=(0,2))], axis=1)
    label_pos = ax.transData.transform(label_pos)
    label_pos = ax.transAxes.inverted().transform(label_pos)
    for pos, resn in zip(label_pos, residues):
        ax.text(*pos, resn, transform=ax.transAxes, size=10, color="0.5", ha="center", va="bottom", rotation=60)
    
    ax.text(-.12, 1, string.ascii_uppercase[i], size=16, va="center", transform=ax.transAxes)

# fig.savefig(f"figures/adjAA_6_violinplot_dpi600.pdf", dpi=600, bbox_inches="tight")
# fig.savefig(f"figures/adjAA_6_violinplot_dpi600.png", dpi=600, bbox_inches="tight")

In [None]:
praekw = {
    "patch_artist": True,
    "boxprops": {
        "color": "k",
        "linewidth": .8},
    "whiskerprops": {
        "color": "k",
        "linewidth": .8},
    "flierprops": {
        "color": "k",
        "marker": "_",
        "markersize": 3},
    "capprops": {
        "color": "k",
        "linewidth": .8},
    "medianprops": {"color": "k"}
}
postkw = copy.deepcopy(praekw)



for attr in attributes:
    ylabel = ylabels[attr]
    praeMtx, postMtx = dynamics_adjacency[attr]

    vmax = max(np.max(np.abs(praeMtx)), np.max(np.abs(postMtx)))
    norma = mpl.colors.Normalize(-vmax, vmax)

    fig, axs = plt.subplots(2,2, figsize=(12, 8), dpi=96,
                           gridspec_kw={"wspace": .3, "hspace":.3, "height_ratios": [3,2]})

    ax = axs[0,0]
    im = ax.imshow(praeMtx.T, origin="upper", cmap="RdBu_r", norm=norma)
    ax.set_xlabel("Residue $i+1$ (cause)")
    ax.set_xticks(np.arange(len(residues)))
    ax.set_xticklabels(residues, rotation=-90, size=8, ha="center")
    ax.set_ylabel("Residue $i$ (observation)")
    ax.set_yticks(np.arange(len(residues)))
    ax.set_yticklabels(residues, size=8, ha="right")

    ax = axs[0,1]
    im = ax.imshow(postMtx.T, origin="upper", cmap="RdBu_r", norm=norma)
    ax.set_xlabel("Residue $i-1$ (cause)")
    ax.set_xticks(np.arange(len(residues)))
    ax.set_xticklabels(residues, rotation=-90, size=8, ha="center")
    ax.set_ylabel("Residue $i$ (observation)")
    ax.set_yticks(np.arange(len(residues)))
    ax.set_yticklabels(residues, size=8, ha="right")

    gs = ax.get_gridspec()
    axs[1,0].remove()
    axs[1,1].remove()

    ax = fig.add_subplot(gs[1,:])
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.grid(True, "major", "both", linewidth=.3, linestyle="-", color="grey")
    
    bplot1 = ax.boxplot(praeMtx.T, positions=np.arange(-.13, 28, 1), widths=.3, **praekw, zorder=5)
    bplot2 = ax.boxplot(postMtx.T, positions=np.arange(.13, 29, 1),  widths=.3, **postkw, zorder=4)
    for bplot, colo in [(bplot1, "0.98"),
                        (bplot2, "0.8")]:
        for patch in bplot["boxes"]: patch.set_facecolor(colo)

    ymax = max(np.max(praeMtx), np.max(postMtx))
    for x, resn in enumerate(residues):
        #if x % 2 == 0:
        y = max(np.max(praeMtx[x]), np.max(postMtx[x]))
        ax.text(x, y, resn+"\n", ha="center", va="bottom", color="grey", size=9)
        #else:
        #    y = min(np.min(praeMtx[x]), np.min(postMtx[x]))
        #    ax.text(x, y-.001, resn, ha="center", va="top")
    ax.set_title("Residue $i \pm 1$")
    ax.set_xticks(np.arange(-.5, 29, 1), labels=[])
    ax.xaxis.set_tick_params(length=0)
    ax.set_ylabel(ylabel)
    ax.set_xlim([-1,29])

    plt.colorbar(im, ax=fig.axes, use_gridspec=False, shrink=.4)
    #fig.savefig(f"figures/dynamics/adjXaa_{attr}_dpi600.pdf", dpi=600, bbox_inches="tight")