# Fingerprints?
In this notebook, we will find out whether a central/contact pair has a unique fingerprint.

The fingerprint will be: how many atoms there are on a distance from the closest atom of the central group. We will see if strong interactions have more atoms that are closer (inside the vdw radius) to the central group or not. 

In [None]:
import sys

sys.path.append('..//scripts//')

In [None]:
# %matplotlib notebook

import matplotlib.pyplot as plt

# allows for automatic reloading of imports and makes it unncessecary to restart the kernel
# whenever a function is changed
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

from constants.paths import RADII_CSV

from helpers.geometry_helpers import make_coordinate_df
from classes.Settings import Settings, Radii

central_groups = ["ArCI", "REt", "RCOMe", "RNO2", "NO3", "RC6F5", "H2O", "RC6H5"]
contact_groups = ["ArCH", "C2CH2", "CCH3", "CF", "R2CO", "RC6H5", "RCN", "XH", "XH"]
to_count =       ["H",    "H",      "H",   "F", "O",     "centroid", "N", "H", "O"]

# Overall fingerprint
With average fragment. 

In [None]:
for central in central_groups:
    for atom, contact in zip(to_count, contact_groups):
           
        print(central, contact, atom)
        datafile = datafile = "..\\data\\" + central + "\\" + central + "_" + contact + "_vdw.5.cor"

        settings = Settings(WORKDIR="..\\..", coordinate_file=datafile)
        settings.set_atom_to_count(atom)

        df = pd.read_csv(settings.get_aligned_csv_filename(), header=0)
        avg_frag = pd.read_csv(settings.outputfile_prefix + "_avg_fragment.csv", header=0)

        # grab only the atoms that are in the contact groups
        df_central = df[df['label'] == '-']
        
        radii = Radii(RADII_CSV)
        coordinate_df = make_coordinate_df(df_central, settings, avg_frag, radii)

        coordinate_df['moved'] = coordinate_df['distance'] - coordinate_df['vdw_closest_atom'] - coordinate_df['longest_vdw']
            
        print(coordinate_df['vdw_closest_atom'].mean())
        fig = plt.figure(figsize=(9,5))
        fig.subplots_adjust(bottom=0.25)
        plt.title(f"Fingerprint of {central} (closest atom)--{contact} ({atom})")

        test_neg = coordinate_df[coordinate_df["moved"] < 0]
        test_pos = coordinate_df[coordinate_df["moved"] >= 0]
        
        plt.figtext(0.15, 0.11, f"Negative fraction: {len(test_neg)/len(coordinate_df) * 100 :.2f}%, Mean: {test_neg['moved'].mean() :.2f}$\AA$")
        plt.figtext(0.15, 0.06, f"Positive fraction: {len(test_pos)/len(coordinate_df) * 100 :.2f}%, Mean: {test_pos['moved'].mean() :.2f}$\AA$")
        plt.figtext(0.15, 0.01, f"Overall mean: {coordinate_df['moved'].mean() :.2f}$\AA$")
        
        plt.xlabel("VDW overlap")
        plt.ylabel("Fraction")
        
        plt.grid('on')
        
        plt.xlim(-2, 3)
        
        plt.vlines(0, 0, 0.15, color="black", label="VDW radius closest atom central")
        
        plt.vlines(coordinate_df['longest_vdw'].max(), 0, 0.15, color ="lightgreen", label="VDW radii")
        plt.vlines(coordinate_df['longest_vdw'].max() + 0.5, 0, 0.15, color ="green", label="VDW radii + 0.5")
        print(coordinate_df['longest_vdw'].max())
        
        heights, bins = np.histogram(coordinate_df.moved, bins='auto')
        heights = (heights/sum(heights))
        
        plt.bar(bins[:-1], heights, width=(max(bins) - min(bins))/len(bins)+0.01)
    
        plt.legend()
        
        plt.savefig(f'../../results/fingerprints/{central}_{contact}_{atom}_fingerprint.png')
        plt.close(fig)
        

In [None]:
def extra_distances(coordinate_df, labels, settings, avg_fragment):
    labels_list = labels.split("-")
    avg_fragment = avg_fragment[avg_fragment.label.isin(labels_list)]
    
    coordinate_df = distances_vdw_central(coordinate_df, avg_fragment, labels)
    
    return coordinate_df


def distances_vdw_central(coordinate_df, avg_fragment, labels):
    length = len(coordinate_df)

    distances = np.zeros(length)
    atoms_vdw = np.zeros(length)

    points_avg_f = np.array([avg_fragment.x, avg_fragment.y, avg_fragment.z]).T
    vdw_radii = np.array(avg_fragment.vdw_radius)

    xcoord = np.array(coordinate_df.x)
    ycoord = np.array(coordinate_df.y)
    zcoord = np.array(coordinate_df.z)

    atoms_vdw, distances = p_dist_calc(atoms_vdw, distances,
                                       xcoord, ycoord, zcoord,
                                       length, points_avg_f, vdw_radii)

    coordinate_df.loc[:, "distance" + labels] = distances
    coordinate_df.loc[:, "vdw_closest_atom" + labels] = atoms_vdw

    return coordinate_df

from numba import jit

@jit(nopython=True)
def p_dist_calc(closest_atoms_vdw, closest_distances, xcoord, ycoord, zcoord, length, points_avg_f, vdw_radii):
    for idx in range(length):

        # grab x, y and z of current contact from np arrays
        contact_point = np.array([xcoord[idx], ycoord[idx], zcoord[idx]])

        # set distance to infinite so you'll find a lower distance soon
        min_dist = 1000000000
        min_atom_vdw = None

        # calc distance with every avg fragment point, remember shortest one
        for i, avg_fragment_point in enumerate(points_avg_f):
            t_dist = np.sqrt(np.sum((avg_fragment_point - contact_point)**2, axis=0))

            # also remember the vdw radius of the closest atom
            if t_dist < min_dist:
                min_dist = t_dist
                min_atom_vdw = vdw_radii[i]

        closest_distances[idx] = min_dist
        closest_atoms_vdw[idx] = min_atom_vdw

    return closest_atoms_vdw, closest_distances

In [None]:
for atom, contact in zip(to_count, contact_groups):
           
    print(central, contact, atom)
    datafile = datafile = "..\\data\\" + central + "\\" + central + "_" + contact + "_vdw.5.cor"

    settings = Settings(WORKDIR="..\\..", coordinate_file=datafile)
    settings.set_atom_to_count(atom)

    df = pd.read_csv(settings.get_aligned_csv_filename(), header=0)
    avg_frag = pd.read_csv(settings.outputfile_prefix + "_avg_fragment.csv", header=0)

    # grab only the atoms that are in the contact groups
    df_central = df[df['label'] == '-']

    radii = Radii(RADII_CSV)
    coordinate_df = make_coordinate_df(df_central, settings, avg_frag, radii)

    # for each atom(s) in the labels, calculate a fingerprint
    for vdw, labelset in zip(vdws, labels):
        print(labelset)
        coordinate_df = extra_distances(coordinate_df, labelset, settings, avg_frag)

        coordinate_df['moved'] = coordinate_df['distance' + labelset] - coordinate_df['vdw_closest_atom' + labelset] - radii.get_vdw_radius(vdw)

        fig = plt.figure(figsize=(9,5))
        fig.subplots_adjust(bottom=0.25)
        plt.title(f"Fingerprint of {central} ({vdw})--{contact} ({atom})")

        test_neg = coordinate_df[coordinate_df["moved"] < 0]
        test_pos = coordinate_df[coordinate_df["moved"] >= 0]

        plt.figtext(0.15, 0.11, f"Negative fraction: {len(test_neg)/len(coordinate_df) * 100 :.2f}%, Mean: {test_neg['moved'].mean() :.2f}$\AA$")
        plt.figtext(0.15, 0.06, f"Positive fraction: {len(test_pos)/len(coordinate_df) * 100 :.2f}%, Mean: {test_pos['moved'].mean() :.2f}$\AA$")
        plt.figtext(0.15, 0.01, f"Overall mean: {coordinate_df['moved'].mean() :.2f}$\AA$")

        plt.xlabel("VDW overlap ($\AA$)")
        plt.ylabel("Fraction")

        plt.grid('on')

        plt.xlim(-2, 3)

        plt.vlines(0, 0, 0.15, color="black", label="VDW radius atom central")

        plt.vlines(radii.get_vdw_radius(vdw), 0, 0.15, color ="lightgreen", label="VDW radii")
        plt.vlines(radii.get_vdw_radius(vdw) + 0.5, 0, 0.15, color ="green", label="VDW radii + 0.5")

        heights, bins = np.histogram(coordinate_df.moved, bins='auto')
        heights = (heights/sum(heights))

        plt.bar(bins[:-1], heights, width=(max(bins) - min(bins))/len(bins)+0.01)

        plt.legend()
        
        plt.savefig(f'../../results/fingerprints/{central}_{contact}_{atom}_fingerprint_{labelset}.png')
        plt.close(fig)        