In [1]:
from pymol import cmd
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable
import matplotlib.colors as mcolors
import os
import glob
from pathlib import Path

In [2]:
def read_contact_data(csv_file: str, cut_off: float) -> pd.DataFrame:
    """ Read data from cpptraj and return a filtered dataframe
    """
    df = pd.read_csv(csv_file, delim_whitespace=True)
    df = df.astype({'#Res1': 'int32',
               '#Res2' : 'int32',
                'TotalFrac' : 'float32'})
    df = df.query(f'`TotalFrac` > {cut_off}')
    return df

def normalize_contact_scale(df: pd.DataFrame, min: float = 2.5, max: float = 15) -> pd.DataFrame: #CONTROL DE GROSOR DE LINEA
    """ Perform lineal scaling of contacts within the ranges min and max 
    """
    
    reshaped_data = df['Contacts'].values.reshape(-1,1)
    scaler = MinMaxScaler(feature_range=(min, max))
    normalized_data = scaler.fit_transform(reshaped_data)
    df['NormCont'] = normalized_data
    return df

def get_color_from_value(value: float, min: float = 0.2, max: float = 20) -> str: #CONTROL DE LA ESCALA DE COLOR
    """Get a HEX code for viridis color in a scale from  min to max
    """
    cmap = plt.cm.viridis
    norm = Normalize(vmin=min, vmax=max)
    color_rgba = cmap(norm(value))
    color_hex = mcolors.to_hex(color_rgba)
    color_hex = f"0x{color_hex[1:]}"
    return color_hex

def process_pdb_file(df: pd.DataFrame, pdb_file:str, 
                     *, save_session: bool = True) -> None:
    """ Takes a pdb file and a  nativecontacts cpptraj resout, return a pymol session with 
    bonds proportional to the N° of contacts and color according to the fraction 
    """
    entry = Path(pdb_file).stem
    cmd.reinitialize()
    cmd.load(pdb_file)
    cmd.set('cartoon_flat_sheets', '0')
    cmd.bg_color('white')
    cmd.set('dash_gap', '0')
    cmd.set('cartoon_transparency', '0.5')
    
    for i, row in df.iterrows():
        resi1 = str(int(row['#Res1']))
        resi2 = str(int(row['#Res2']))
        width = row['NormCont']
        color_dist = get_color_from_value(row['TotalFrac'])
        cmd.select('CA1', f'resi {resi1} and name CA and {entry}')
        cmd.select('CA2', f'resi {resi2} and name CA and {entry}')
        cmd.distance(f'interaction{resi1}-{resi2}_{entry}', 'CA1', 'CA2')
        cmd.set('dash_width', f'{width}', f'interaction{resi1}-{resi2}_{entry}')
        cmd.color(f'{color_dist}', f'interaction{resi1}-{resi2}_{entry}')
    
    cmd.group(f'hf_interactions_{entry}', 'interaction*')
    cmd.hide('label')

    if save_session:
        cmd.save(f'HF_contacts_{entry}.pse')   

In [3]:
##CARGA TODOS LOS RESOUT Y PDBS, LOS PDBS DEBEN ESTAR SIN AGUA NI SAL!!!##
resout_files = glob.glob('resout*')
pdb_files = glob.glob('*.pdb')

In [4]:
##GENERA UNA SESSION DE PYMOL PARA CADA PROTEINA
for pdb, resout in zip(pdb_files, resout_files):
    df = read_contact_data(resout, 0.2)
    df = normalize_contact_scale(df)
    process_pdb_file(df, pdb)
        

 PyMOL not running, entering library mode (experimental)
