In [15]:
########################################################################################################
# Hydrophobycity computation functions. From Jeremie Alexander.
########################################################################################################
from helperfunction import const

def calc_base_h(seq: str) -> float:
    s = seq.upper()
    n = len(s)
    if n == 0:
        return 0.0
    H = 0.0

    # position-specific coefficients
    for i, aa in enumerate(s):
        if aa not in const.hydrophobicity_info:
            raise ValueError(f"Unknown residue '{aa}' in '{seq}'")
        if i == 0:
            key = "Rc1"
        elif i == 1:
            key = "Rc2"
        elif i == n - 1:
            key = "Rn"
        elif i == n - 2:
            key = "Rn1"
        else:
            key = "Rc"
        H += const.hydrophobicity_info[aa][key]

    # nearest-neighbor penalties around H/R/K
    for i, aa in enumerate(s):
        if aa in ("H", "R", "K"):
            for j in (i - 1, i + 1):
                if 0 <= j < n and s[j] in const.nn_penalty:
                    H -= const.nn_penalty[s[j]]

    # proline run penalties
    i = 0
    while i < n:
        if s[i] == "P":
            j = i
            while j < n and s[j] == "P":
                j += 1
            run = j - i
            if run >= 4:
                H -= 5.0
            elif run == 3:
                H -= 3.5
            elif run == 2:
                H -= 1.2
            i = j
        else:
            i += 1
    return H


def apply_length_weight(H: float, n: int) -> float:
    if n < 8:
        KL = 1.0 - 0.055 * (8 - n)
    elif n > 20:
        KL = 1.0 / (1.0 + 0.027 * (n - 20))
    else:
        KL = 1.0
    return H * KL


def overall_penalty(H: float) -> float:
    if H <= 20:
        return H
    if H <= 30:
        return H - 0.27 * (H - 18.0)
    if H <= 40:
        return H - 0.33 * (H - 18.0)
    if H <= 50:
        return H - 0.38 * (H - 18.0)
    return H - 0.447 * (H - 18.0)


def calc_hydrophobicity(seq: str) -> float:
    s = (seq or "").strip().upper()
    if not s or "X" in s:
        return float("nan")
    base = calc_base_h(s)
    base = apply_length_weight(base, len(s))
    return round(overall_penalty(base), 4)





In [17]:
# read the pocket residues and calculate hydrophobicity
import json 

with open("pocket_residues.json", "r") as f:
    pocket_residues = json.load(f)
    
print(pocket_residues)
residue_index = []
for chain_id in pocket_residues:
    for resi in pocket_residues[chain_id]:
        residue_index.append(f"{resi}")

# extract sequence from pdb file
pdb_file = "/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_hydrophobicity/WT_AcrR_EST_model_0.pdb"
residue_dict = cotimedutils.generate_fasta(pdb_file)
print(residue_dict)

# from dict parse the sequence
pocket_sequence = ""

for idx in residue_index:
    # adjust format e.g. "12" to 12
    idx = int(idx)

    if idx in residue_dict:
        pocket_sequence += residue_dict[idx]
    else:
        print(f"Residue {idx} not found in sequence dict.")

print("Pocket sequence:", pocket_sequence)

hydrophobicity = calc_hydrophobicity(pocket_sequence)
print("Pocket hydrophobicity:", hydrophobicity)

# whole sequence hydrophobicity
full_sequence = "".join([residue_dict[idx] for idx in sorted(residue_dict.keys())])
full_hydrophobicity = calc_hydrophobicity(full_sequence)
print("Full sequence hydrophobicity:", full_hydrophobicity)

{'WT_AcrR_EST_model_0_A': [128, 97, 98, 131, 132, 70, 105, 108, 109, 115, 152, 153, 155, 156, 94], 'WT_AcrR_EST_model_0_B': [352]}
{1: 'M', 2: 'S', 3: 'S', 4: 'S', 5: 'A', 6: 'P', 7: 'E', 8: 'R', 9: 'V', 10: 'R', 11: 'P', 12: 'G', 13: 'R', 14: 'G', 15: 'G', 16: 'I', 17: 'L', 18: 'D', 19: 'A', 20: 'A', 21: 'T', 22: 'R', 23: 'L', 24: 'F', 25: 'A', 26: 'T', 27: 'H', 28: 'G', 29: 'V', 30: 'S', 31: 'G', 32: 'T', 33: 'S', 34: 'L', 35: 'Q', 36: 'Q', 37: 'I', 38: 'A', 39: 'G', 40: 'A', 41: 'A', 42: 'G', 43: 'I', 44: 'T', 45: 'K', 46: 'A', 47: 'A', 48: 'V', 49: 'Y', 50: 'H', 51: 'H', 52: 'F', 53: 'P', 54: 'T', 55: 'K', 56: 'E', 57: 'E', 58: 'V', 59: 'V', 60: 'A', 61: 'A', 62: 'V', 63: 'L', 64: 'A', 65: 'P', 66: 'A', 67: 'L', 68: 'E', 69: 'A', 70: 'I', 71: 'D', 72: 'A', 73: 'L', 74: 'V', 75: 'R', 76: 'T', 77: 'A', 78: 'E', 79: 'A', 80: 'H', 81: 'D', 82: 'E', 83: 'P', 84: 'R', 85: 'T', 86: 'R', 87: 'T', 88: 'E', 89: 'A', 90: 'A', 91: 'I', 92: 'I', 93: 'G', 94: 'L', 95: 'A', 96: 'D', 97: 'Q', 98: 

In [23]:
# load protein and calculate hydrophobicity for pocket region

from helperfunction import cotimedutils

pdb_file = "/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_hydrophobicity/WT_AcrR_EST_model_0.pdb"


cotimedutils.get_pocket_residues(6)


{'pose2_A': [128,
  97,
  67,
  132,
  131,
  70,
  105,
  108,
  109,
  115,
  118,
  152,
  153,
  155,
  94,
  127],
 'pose2_B': [352],
 'pose8_A': [128,
  97,
  98,
  131,
  132,
  70,
  105,
  108,
  109,
  115,
  118,
  119,
  152,
  153,
  155,
  156,
  125,
  94],
 'pose5_A': [128, 131, 70, 105, 108, 109, 110, 115, 119, 153, 155, 156],
 'pose5_B': [301, 351],
 'pose1_A': [128,
  131,
  70,
  73,
  105,
  108,
  109,
  115,
  118,
  119,
  152,
  153,
  155,
  94],
 'pose6_A': [128, 131, 132, 70, 105, 108, 109, 115, 119, 152, 153, 155, 94],
 'pose6_B': [353, 301, 351],
 'pose3_A': [128,
  67,
  131,
  132,
  70,
  105,
  108,
  109,
  110,
  115,
  118,
  152,
  153,
  155,
  156,
  94],
 'pose3_B': [301, 351],
 'pose7_A': [128, 131, 132, 70, 105, 108, 109, 115, 118, 152, 153, 155, 94],
 'pose7_B': [352, 301, 351]}