# Calculate all directionalities
In this notebook, you can calculate all the directionalities of the groups of which the data is already in the data folder and the densities are already calculated. All the volumes also have to be calculated already. It will make a table containing the directionalities of all groups, sorted and colored on which ones are most directional or not. 

You can pick a resolution on which to do this, as long as this resolution has already been calculated. A threshold can also be picked. For this, no pre-calculations are required.

$\textit{NB: You can only do this if all the files of the contact pairs have the "standard" filename.}$

In [None]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sys

# so we can import scripts from the scripts folder although it is not a child repository
sys.path.append('..//scripts//')

from classes.Settings import Settings
from constants.paths import WORKDIR

# Here you can define the central and contact groups and the reference point from the contact group 

In [None]:
central_groups = ["ArCI", "REt", "RCOMe", "RNO2", "NO3", "RC6F5", "RC6H5", "H2O"] #
contact_groups = ["CF", "RCN", "R2CO", "XH", "XH", "CCH3", "C2CH2", "RC6H5", "ArCH"]
contact_rps =       ["F",   "N",    "O",   "H", "O",  "H",     "H", "centroid", "H"]

resolution = 0.3
cluster_frac = 0.1

In [None]:
volumes_total = pd.read_csv('../../results/volumes_total.csv')
volumes_free = pd.read_csv('../../results/volumes_free.csv')
amounts = pd.read_csv('../../results/amounts_structures.csv')

def calc_all(volumes):
    """ Calculates all the directionalities for the contact pairs that are given in the previous cell. The
        volume is given as input value so it can be investigated whether it is better to use the total volume
        than the free volume. """

    # make an empty table containing all the contact pairs
    dir_df = pd.DataFrame(index=central_groups, columns=[x+"-"+y for x,y in zip(contact_groups, contact_rps)])
    
    # loop over all pairs
    for central in central_groups:
        for contact_rp, contact in zip(contact_rps, contact_groups):

            # get the data file and make a settings object
            datafile = "..\\data\\" + central + "\\" + central + "_" + contact + "_vdw.5.cor"

            settings = Settings(WORKDIR, datafile)
            settings.set_contact_reference_point(contact_rp)
            settings.set_resolution(resolution)
    
            # read in the density df containing the bins with 
            density_df = pd.read_hdf(settings.get_density_df_filename(), settings.get_density_df_key())   
            
            print(density_df[contact_rp].sum())
            
            # if there is enough datapoints for a reliable directionality
            if density_df[contact_rp].sum() > 2000:
                
                # normalize the fraction
                density_df['datafrac_normalized'] = density_df[contact_rp] / density_df[contact_rp].sum()

                # calculate threshold with given cluster fraction
                threshold = density_df.datafrac_normalized.max() * cluster_frac

                # take the bins that are fuller than that treshold and calc it's volume and datafrac
                in_cluster = density_df[density_df.datafrac_normalized >= threshold]
                datafrac = in_cluster.datafrac_normalized.sum()
                Vcluster = len(in_cluster) * resolution**3

                # take the available volume from the volume df and calc directionality
                Vavailable = volumes.loc[(volumes.central == central) & (volumes.contact == contact) & (volumes.contact_rp == contact_rp), 'volume'].item()          
                directionality = datafrac / Vcluster * (Vavailable/2) 
            
                # save the directionality in the table so we can plot it later
                dir_df.loc[dir_df.index == central, contact + "-" + contact_rp] = directionality
                
                print(density_df[contact_rp].sum(), datafrac, Vcluster, Vavailable, directionality, end='\n\n')
            
    return dir_df

directionalities_free_vol = calc_all(volumes_free)
directionalities_total_vol = calc_all(volumes_total)

In [None]:
def sort_df(df):
    """ Sorts df based on mean value of row/column. Highest value will most likely end up in the right upper
        corner, lowest value in the lower left corner. """
    
    # sort columns
    df = df.reindex(df.mean().sort_values(ascending=True).index, axis=1)

    # transpose to sort rows
    df = df.transpose()
    df = df.reindex(df.mean().sort_values(ascending=False).index, axis=1)
    
    # transpose back
    df = df.transpose()
    
    return df

directionalities_free_vol = sort_df(directionalities_free_vol)
directionalities_total_vol = sort_df(directionalities_total_vol)

# saving the directionality tables
If you want to make another table, you need to also change some variables in the cell below.

In [None]:
def make_directionality_table(df, title, pltname=""):
    """ Take the sorted df and make it into a nice table, with a colorscheme for high and low values. """
    
    fig, ax = plt.subplots(figsize=(8,3))
    plt.title(title)

    # make a mask because else the NaN values are white
    mask = df.isnull()
    g = sns.heatmap(df.astype('float'), cmap=sns.diverging_palette(145, 300, s=60, as_cmap=True),
                    annot=True,
                    mask=mask,
                    fmt='.3f',
                    linewidth =0.1)
    
    # color nans light grey
    g.set_facecolor('lightgrey')

    plt.xlabel("Contact group")
    plt.xticks(rotation=30)

    plt.ylabel("Central group")
    plt.yticks(rotation=0)

    fig.subplots_adjust(bottom=0.25)
    plt.show()
    
    # save the plot if a plotname is given
    if not pltname == "":
        plt.savefig(pltname, format='svg')

# set the names here
title_free = "Directionalities free volume"
title_total = "Directionalities total volume"
pltname_free = '../../results/plots/directionalities_10_03_rcome_ret_kmeans_res05_free_volume.svg'
pltname_total = '../../results/plots/directionalities_10_03_rcome_ret_kmeans_res05_total_volume.svg'

# make and save the tables here
make_directionality_table(directionalities_free_vol, title=title_free, pltname=pltname_free)
make_directionality_table(directionalities_total_vol, title=title_total, pltname=pltname_total)

# This is the end of this notebook. 
You can change the settings and re-run it if you want to plot/save other directionality tables