In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import MDAnalysis as mda

In [None]:
def plot_distributions(df, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, xlim=None, title='Distribution of pred for filtered atom indices'):
    # Ensure all lists have the same length
    assert len(ATOM_NAMES) == len(ATOM_RESNAMES) == len(ATOM_RESNUMBERS), "All filter lists must have the same length"

    # Initialize a color map
    colors = plt.cm.get_cmap('tab10', len(ATOM_NAMES))

    df['atom_index'] = df.groupby('batch').cumcount()

    # Plot all distributions together
    for i in range(len(ATOM_NAMES)):
        ATOM_NAME = ATOM_NAMES[i]
        ATOM_RESNAME = ATOM_RESNAMES[i]
        ATOM_RESNUMBER = ATOM_RESNUMBERS[i]

        # Apply filters
        filtered_atoms = u.atoms
        if ATOM_NAME is not None:
            filtered_atoms = filtered_atoms[filtered_atoms.names == ATOM_NAME]
        if ATOM_RESNAME is not None:
            filtered_atoms = filtered_atoms[filtered_atoms.resnames == ATOM_RESNAME]
        if ATOM_RESNUMBER is not None:
            filtered_atoms = filtered_atoms[filtered_atoms.resnums == ATOM_RESNUMBER]

        # Get the atom indices
        filtered_atom_indices = filtered_atoms.indices

        # Filter the dataframe based on the filtered atom indices
        filtered_df = df[df['atom_index'].isin(filtered_atom_indices)]

        # Normalize the area under the curve to 1
        kde = filtered_df['pred'].plot(kind='kde', alpha=0.8, color=colors(i), label=f'{ATOM_NAME}-{ATOM_RESNAME}-{ATOM_RESNUMBER}')
        kde.set_ylabel('Density')

    plt.xlabel('pred')
    plt.title(title)
    plt.legend()
    
    # Set axis range if provided
    if xlim is not None: plt.xlim(xlim)
    
    plt.show()

def plot_joined_distributions(df1: pd.DataFrame, df2: pd.DataFrame, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, atom_types=None, xlim=None, ylim=None, offset=0, title='Distribution of pred for filtered atom indices'):
    import warnings
    warnings.filterwarnings('ignore')
    from scipy.stats import gaussian_kde

    df1['atom_index'] = df1.groupby('batch').cumcount()
    df2['atom_index'] = df2.groupby('batch').cumcount()

    if atom_types is None:
        # Ensure all lists have the same length
        assert len(ATOM_NAMES) == len(ATOM_RESNAMES) == len(ATOM_RESNUMBERS), "All filter lists must have the same length"

        # Initialize a color map
        colors = plt.cm.get_cmap('tab10', len(ATOM_NAMES))
        # cmaps = ['Blues', 'Greens', 'Reds', 'Purples']
    else:
        df1 = df1.iloc[df1.index[df1['node_type'] == atom_types[0]]]
        df2 = df2.iloc[df2.index[df2['node_type'] == atom_types[1]]]
        unique_atom_indices_2 = df2['atom_index'].unique()
        unique_atom_indices_1 = df1['atom_index'].unique()[:len(unique_atom_indices_2)]
        assert len(unique_atom_indices_1) == len(unique_atom_indices_2)
        # Initialize a color map
        colors = plt.cm.get_cmap('tab10', 10)#len(unique_atom_indices_1))


    # Plot all distributions together
    for i in range(len(ATOM_NAMES) if atom_types is None else len(unique_atom_indices_1)):
        if i >= 10:
            break
        if atom_types is None:
            ATOM_NAME = ATOM_NAMES[i]
            ATOM_RESNAME = ATOM_RESNAMES[i]
            ATOM_RESNUMBER = ATOM_RESNUMBERS[i]

            # Apply filters
            filtered_atoms = u.atoms
            if ATOM_NAME is not None:
                filtered_atoms1 = filtered_atoms[filtered_atoms.names == ATOM_NAME[0]]
                filtered_atoms2 = filtered_atoms[filtered_atoms.names == ATOM_NAME[1]]
            if ATOM_RESNAME is not None:
                filtered_atoms1 = filtered_atoms1[filtered_atoms1.resnames == ATOM_RESNAME]
                filtered_atoms2 = filtered_atoms2[filtered_atoms2.resnames == ATOM_RESNAME]
            if ATOM_RESNUMBER is not None:
                filtered_atoms1 = filtered_atoms1[filtered_atoms1.resnums == ATOM_RESNUMBER]
                filtered_atoms2 = filtered_atoms2[filtered_atoms2.resnums == ATOM_RESNUMBER]
            
            # Get the atom indices
            filtered_atom_indices1 = filtered_atoms1.indices
            filtered_atom_indices2 = filtered_atoms2.indices

            # Filter the dataframe based on the filtered atom indices
            filtered_df1 = df1[df1['atom_index'].isin(filtered_atom_indices1)]
            filtered_df2 = df2[df2['atom_index'].isin(filtered_atom_indices2)]
        else:
            # Filter the dataframe based on the filtered atom indices
            filtered_df1 = df1[df1['atom_index'] == unique_atom_indices_1[i]]
            filtered_df2 = df2[df2['atom_index'] == unique_atom_indices_2[i]]
            label = filtered_df1['atom_fullname'].iloc[0] + filtered_df2['atom_fullname'].iloc[0]

        # Create a new dataset with columns x and y
        # Reset indices to ensure they match
        filtered_df1 = filtered_df1.reset_index(drop=True)
        filtered_df2 = filtered_df2.reset_index(drop=True)
        upto = min(len(filtered_df1), len(filtered_df2))
        
        # Create a new dataset with columns x and y
        new_df = pd.DataFrame({'x': filtered_df2['pred'][:upto], 'y': filtered_df1['pred'][:upto]})

        # Plot 2D scatter plot
        plt.scatter(new_df['x'], new_df['y'], alpha=0.2, marker='.', label=f'{ATOM_NAME}-{ATOM_RESNAME}-{ATOM_RESNUMBER}' if atom_types is None else label, color=colors(i))
        # Plot 2D density using seaborn kdeplot
        # g = sns.kdeplot(data=new_df, x='x', y='y', levels=20, alpha=0.5, color=colors(i), label=f'{ATOM_NAME}-{ATOM_RESNAME}-{ATOM_RESNUMBER} Contour')
        # Plot 2D density using scipy kde

        x = new_df['x']
        y = new_df['y']
        xy = np.vstack([x, y])
        kde = gaussian_kde(xy)

        # Create a grid for contour plot
        xi, yi = np.mgrid[x.min():x.max():100j, y.min():y.max():100j]
        zi = kde(np.vstack([xi.flatten(), yi.flatten()]))

        # Plot contours
        levels = np.linspace(zi.min(), zi.max(), 20)
        for j, level in enumerate(levels):
            alpha = 1.0 if j >= len(levels) - 5 else 0.1
            color = 'black' if j >= len(levels) - 2 else colors(i)
            plt.contour(xi, yi, zi.reshape(xi.shape), levels=[level], colors=[color], alpha=alpha)
        
    if xlim is not None: plt.xlim(xlim)
    if ylim is not None: plt.ylim(ylim)

    plt.gca().invert_xaxis()
    plt.gca().invert_yaxis()
    
    plt.title(title)
    leg = plt.legend()
    for lh in leg.legendHandles: 
        lh.set_alpha(1)
    plt.show()

In [None]:
# Define filters
ATOM_TYPES = [56, 63]

# Read the CSV file
df = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/carazolol.inactive.leu/LEU.N.H.scalar.mlp.logs/20250130-140623/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.carazolol.inactive.leu/6KR8_CAU_whole_Dani.gro')

plot_joined_distributions(df.copy(), df.copy(), u, None, None, None, atom_types=ATOM_TYPES, offset=28, title='Models: LEU.N+LEU.H System: carazolol',)# xlim=(1.4,2.4), ylim=(16,20)

In [None]:
# Define filters
ATOM_TYPES = [7, 12]

# Read the CSV file
df = pd.read_csv('/scratch/angiod/CSNet/test/beta2/BMRB.ensemble/MET.CE.HE.mlp.logs/20250219-121647/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_joined_distributions(df.copy(), df.copy(), u, None, None, None, atom_types=ATOM_TYPES, title='Models: MET.CE+MET.HE1 System: APO.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
# Define filters
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.CE.HE.dih.logs/20250203-105944/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_joined_distributions(df.copy(), df.copy(), u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: APO.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
# Define filters
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df_C = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.logs/20250123-154500/out.csv')
df_H = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.dspp.logs/20250123-172941/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_joined_distributions(df_C, df_H, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: APO.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
# Define filters
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df_C = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.logs/20250123-154500/out.csv')
df_H = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.dspp.logs/20250123-174533/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_joined_distributions(df_C, df_H, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: APO.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
# Define filters
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df_C = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.logs/20250123-154500/out.csv')
df_H = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.dspp.v3.logs/20250123-180618/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_joined_distributions(df_C, df_H, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: APO.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [36, 82, 215, 279]

df = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/BI-167107.active/MET.HE1.logs/20250123-152858/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.BI-167107.active.met/protein_active_withoutNb.gro')
plot_joined_distributions(df, df, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: APO.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]
df_CE = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/carazolo.inactive/MET.CE.logs/20250115-130936/out.csv')
df_HE1 = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/carazolo.inactive/MET.HE1.logs/20250115-172038/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.carazolo.inactive/protein_inactive_CAU_4GBR.gro')
plot_joined_distributions(df_CE, df_HE1, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: carazolo.inactive', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
ATOM_NAMES = [['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1'], ['CE', 'HE1']]
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [36, 82, 215, 279]
df_CE = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/BI-167107.active/MET.CE.logs/20250115-131807/out.csv')
df_HE1 = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/BI-167107.active/MET.HE1.logs/20250115-173053/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.BI-167107.active/protein_active_withoutNb.gro')
plot_joined_distributions(df_CE, df_HE1, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, title='Models: MET.CE+MET.HE1 System: BI-167107.active', xlim=(1.4,2.4), ylim=(16,20))

In [None]:
# Define filters
ATOM_NAMES = ['HE1', 'HE1', 'HE1', 'HE1']
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df_HE1 = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.HE1.dspp.nodetypes.logs/20250124-105437/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_distributions(df_HE1, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, xlim=(1.4,2.4), title='Model: MET.HE1 System: APO.inactive')

In [None]:
# Define filters
ATOM_NAMES = ['CE', 'CE', 'CE', 'CE']
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df_CE = pd.read_csv('/scratch/angiod/CSNet/test/beta2/BMRB.ensemble/MET.CE.HE.mlp.logs/20250219-121647/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_distributions(df_CE, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, xlim=None, title='Model: MET.CE System: APO.inactive')

In [None]:
# Define filters
ATOM_NAMES = ['CE', 'CE', 'CE', 'CE']
ATOM_RESNAMES = ['MET', 'MET', 'MET', 'MET']
ATOM_RESNUMBERS = [8, 54, 187, 251]

# Read the CSV file
df_CE = pd.read_csv('/storage_common/angiod/CSNet/test/beta2/APO.inactive.met/MET.CE.scalar.mlp.logs/20250129-125440/out.csv')
u = mda.Universe('/storage_common/angiod/MDSimulations/Chiara/beta2.APO.inactive.met/run_protein.gro')

plot_distributions(df_CE, u, ATOM_NAMES, ATOM_RESNAMES, ATOM_RESNUMBERS, xlim=None, title='Model: MET.CE System: APO.inactive')