In [None]:
import collections
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# define color pallete (cvd-friendly)
blue = '#005AB5'
red = '#DC3220'
gray = '#D0D0D0'

In [None]:
# because metric
def cm2inch(*tupl):
    inch = 2.54
    if isinstance(tupl[0], tuple):
        return tuple(i/inch for i in tupl[0])
    else:
        return tuple(i/inch for i in tupl)

In [None]:
# species names: scientific to common
namedict = {
    'Homo sapiens': 'Human',
    'Anas platyrhynchos': 'Duck',
    'Bos taurus': 'Cow',
    'Camelus dromedarius': 'Dromedary',
    'Canis lupus familiaris': 'Dog',
    'Capra hircus': 'Goat',
    'Carassius auratus': 'Goldfish',
    'Cavia porcellus': 'Guinea pig',
    'Columba livia': 'Pidgeon',
    'Crocodylus porosus': 'Crocodile',
    'Equus asinus': 'Donkey',
    'Equus caballus': 'Horse',
    'Erinaceus europaeus': 'Hedgehog',
    'Felis catus': 'Cat',
    'Gallus gallus': 'Chicken',
    'Macaca mulatta': 'Macaque',
    'Manis javanica': 'Pangolin',
    'Mesocricetus auratus': 'Hamster',
    'Mus musculus': 'Mouse',
    'Mustela putorius furo': 'Ferret',
    'Oryctolagus cuniculus': 'Rabbit',
    'Ovis aries': 'Sheep',
    'Paguma larvata': 'Civet',
    'Pan troglodytes': 'Chimpanzee',
    'Panthera tigris altaica': 'Siberian Tiger',
    'Pongo abelii': 'Orangutan',
    'Rattus norvegicus': 'Rat',
    'Rhinolophus sinicus': 'Horseshoe Bat',
    'Serinus canaria': 'Canary',
    'Sus scrofa': 'Pig'
}

In [None]:
rootdir = pathlib.Path('.').resolve(strict=True)
datadir = rootdir.parent / 'refinement'
modeldir = datadir / 'best_models_10'

In [None]:
df = pd.read_csv(datadir / 'seq.analysis', index_col='Species')
df.drop(index='ACE2_HUMAN_6M17', inplace=True)

In [None]:
# Collect HADDOCK scores from models of each species
def calc_haddock_score(model):
    """Calculates haddock score from model header info"""
    
    with model.open('rt') as structure:
        for line in structure:
            line = line.strip()

            if line.startswith('REMARK energies:'):
                sep = line.index(':') + 1
                fields = list(map(float, line[sep:].split(',')))
                e_vdw = fields[5]
                e_elec = fields[6]

            elif line.startswith('REMARK Desolvation energy:'):
                sep = line.index(':') + 1
                e_desolv = float(line[sep:])

    # Calculate HS
    score = 1.0*e_vdw + \
            0.2*e_elec + \
            1.0*e_desolv

    return score

score_dict = collections.defaultdict(list)

for pdbfile in modeldir.rglob('*w.pdb'):
    fname = pdbfile.name
    species = ' '.join(fname.split('_')[1:-1]).capitalize()
   
    score = calc_haddock_score(pdbfile)
    score_dict[species].append(score)

# Calculate stats of scores per species
score_average = {
    key: sum(values)/len(values) for key, values in score_dict.items()
}

score_max = {
    key: max(values) for key, values in score_dict.items()
}

score_min = {
    key: min(values) for key, values in score_dict.items()
}

# Add values to df
df['HS_average'] = df.index.map(score_average)
df['HS_max'] = df.index.map(score_max)
df['HS_min'] = df.index.map(score_min)

df.drop(columns=['HS'], inplace=True, errors='ignore')  # drop original HS column.

df.head(2)

In [None]:
# Sort by increasing haddock score
df.sort_values(by='HS_average', inplace=True, ascending=False)

In [None]:
# Masks
human = ['Homo sapiens']
human_df = df.loc[human, :]

positive = [
    'Homo sapiens',
    'Felis catus',
    'Manis javanica',
    'Mesocricetus auratus',
    'Mustela putorius furo',
    'Paguma larvata',
    'Panthera tigris altaica',
    'Rhinolophus sinicus',
    'Bos taurus',
    'Ovis aries',
    'Camelus dromedarius',
    'Oryctolagus cuniculus',
    'Equus caballus', 
]
positive_df = df.loc[positive, :]

negative = [
    'Anas platyrhynchos',
    'Gallus gallus',
    'Mus musculus',
    'Cavia porcellus',
    'Rattus norvegicus',
]
negative_df = df.loc[negative, :]

## Figure

In [None]:
df['Seq Sim Full']

In [None]:
fig, ax1 = plt.subplots(
    nrows=1, ncols=1,
    figsize=cm2inch(10, 15),  # w,h
    constrained_layout=True,
    dpi=600  # comment out for PDF output
)

# Panel
errmin = df['Seq Sim Full']
errmax = [0 for _ in df['Seq Sim Full']]
c1 = ax1.errorbar(
    df['Seq Sim Full'],
    df.index,
    xerr=[errmin, errmax],
    xlolims=False,
    color=gray,
    marker=None,
    capsize=2,
    capthick=2,
    linewidth=0,
    elinewidth=2
)

# Remove lower error caps / swap upper error to circles
_, (lcaps, ucaps), _ = c1
lcaps.set_marker(None)
ucaps.set_marker('o')

# Replot positive species
errmin = positive_df['Seq Sim Full']
errmax = [0 for _ in positive_df['Seq Sim Full']]
c1 = ax1.errorbar(
    positive_df['Seq Sim Full'],
    positive_df.index,
    xerr=[errmin, errmax],
    xlolims=False,
    color=blue,
    marker=None,
    capsize=2,
    capthick=2,
    linewidth=0,
    elinewidth=2
)

# Remove lower error caps / swap upper error to circles
_, (lcaps, ucaps), _ = c1
lcaps.set_marker(None)
ucaps.set_marker('o')

# Replot negative species
errmin = negative_df['Seq Sim Full']
errmax = [0 for _ in negative_df['Seq Sim Full']]
c1 = ax1.errorbar(
    negative_df['Seq Sim Full'],
    negative_df.index,
    xerr=[errmin, errmax],
    xlolims=False,
    color=red,
    marker=None,
    capsize=2,
    capthick=2,
    linewidth=0,
    elinewidth=2
)

# Remove lower error caps / swap upper error to circles
_, (lcaps, ucaps), _ = c1
lcaps.set_marker(None)
ucaps.set_marker('o')

ax1.set_xlabel(
    'Global ACE2 Seq. Similarity (%)',
)

# Adjust x labels for similarity
# ax1.set_xticks(np.arange(0, 105, 10))

# rename and color y labels
ylabels = list(map(namedict.get, df.index))
ax1.set_yticklabels(
    ylabels,
    ha='center',
    position=(-0.15, 0),
    fontsize=8
);

for s in positive:
    idx = df.index.get_loc(s)
    ax1.get_yticklabels()[idx].set_color(blue)

for s in negative:
    idx = df.index.get_loc(s)
    ax1.get_yticklabels()[idx].set_color(red)

# Adjust margins
# ax1.margins(x=0)

# Add grids
ax1.grid(
    axis='x',
    which='major',
    color='lightgray',
    linestyle=':',
    linewidth=0.5
)

In [None]:
#fig.savefig('Figure_S1-new.pdf');