In [None]:
import sys
import csv
import os

sys.path.append('..//scripts//')

from helpers.density_helpers import find_available_volume, prepare_df
from classes.Settings import Settings, Radii

from constants.paths import WORKDIR, RADII_CSV

central_groups = ["H2O", "ArCI", "REt", "RNO2", "RCOMe", "NO3", "RC6F5", "RC6H5"]
contact_groups = ["ArCH", "C2CH2", "CCH3", "CF", "R2CO", "RC6H5", "RCN", "XH", "XH"]
to_count =       ["H",    "H",      "H",   "F", "O",     "centroid", "N", "H", "O"]

In [None]:
%matplotlib notebook
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

import numpy as np

In [None]:
def calc_directionality(to_count_contact, contact_group, central_group, cluster_frac, resolution, volumes):

    datafile = "..\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"

    settings = Settings('..\..', datafile)
    settings.set_atom_to_count(to_count_contact)
    settings.set_resolution(resolution)

    density_df = pd.read_hdf(settings.get_density_df_filename(), settings.get_density_df_key())

    density_df['datafrac_normalized'] = density_df[to_count_contact] / density_df[to_count_contact].sum()

    threshold = density_df.datafrac_normalized.max() * cluster_frac
    in_cluster = density_df[density_df.datafrac_normalized >= threshold]

    Vavailable = volumes.loc[(volumes.central == central_group) & (volumes.contact == contact_group) & (volumes.to_count == to_count_contact), 'volume'].item()  

    datafrac = in_cluster.datafrac_normalized.sum()
    Vcluster = len(in_cluster) * resolution**3

    directionality = datafrac / Vcluster * Vavailable

    return directionality


In [None]:
def points_in_cluster(to_count_contact, contact_group, central_group, cluster_frac, resolution):

    datafile = "..\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"

    settings = Settings('..\..', datafile)
    settings.set_atom_to_count(to_count_contact)
    settings.set_resolution(resolution)

    density_df = pd.read_hdf(settings.get_density_df_filename(), settings.get_density_df_key())

    density_df['datafrac_normalized'] = density_df[to_count_contact] / density_df[to_count_contact].sum()

    threshold = density_df.datafrac_normalized.max() * cluster_frac
    in_cluster = density_df[density_df.datafrac_normalized >= threshold]
    
    return in_cluster.datafrac_normalized.sum(), in_cluster[to_count_contact].sum()


In [None]:
volumes_total = pd.read_csv('../../results/volumes_total.csv')

# Directionality ~ Resolution

In [None]:
if not os.path.exists('../../results/dependencies/'):
    os.mkdir('../../results/dependencies/')

In [None]:
cluster_frac = [0.10,0.25,0.40]
resolutions = np.arange(0.1, 1.55, 0.05)

# try:
#     df = pd.read_csv('../../results/directionality_dependency_resolution.csv', header=None)
# except FileNotFoundError:
with open('../../results/directionality_dependency_resolution.csv', 'w', newline="") as resultsfile:
    csvwriter = csv.writer(resultsfile)
    csvwriter.writerow(["central", "contact", "to_count", "frac", "res", "directionality"])


for frac in cluster_frac:
    for central_group in central_groups:
        for to_count_contact, contact_group in zip(to_count, contact_groups):
            for resolution in resolutions:
                directionality = calc_directionality(to_count_contact, contact_group, central_group, frac, round(resolution, 2), volumes_total)

                with open('../../results/directionality_dependency_resolution.csv', 'a', newline="") as resultsfile:
                    csvwriter = csv.writer(resultsfile)
                    csvwriter.writerow([central_group, contact_group, to_count_contact, round(frac, 2), round(resolution, 2), directionality])

In [None]:
df = pd.read_csv('../../results/directionality_dependency_resolution.csv', header=0)
display(df)

In [None]:
for frac in cluster_frac:
    for central in central_groups:
        plt.figure(figsize=(8,8))
        plt.grid(True)
        plt.title(f"Dependency of directionality on resolution {central}")

        for to_count_contact, contact in zip(to_count, contact_groups):
            partdf = df[(df.frac == frac) & (df.central == central) & (df.contact == contact) & (df.to_count == to_count_contact)]

            plt.plot(partdf.resolution, partdf.directionality)

            plt.scatter(partdf.resolution, partdf.directionality, label=contact + "-" + to_count_contact)

        plt.ylabel("Directionality")
        plt.xlabel("Resolution")
        plt.xlim(0.09, 0.81)
        plt.legend()
        plt.show()
        plt.savefig(f"../../results/dependencies/directionality_resolution_{central}_frac_{frac :.2f}_maxres08.png")

# Directionality ~ Threshold

In [None]:
resolutions = [0.2, 0.25, 0.3]
cluster_fracs = np.arange(0.1, 1, 0.05)

# try:
#     df = pd.read_csv('../../directionality_dependency_threshold.csv', header=None)
# except FileNotFoundError:
with open('../../results/directionality_dependency_threshold.csv', 'w', newline="") as resultsfile:
    csvwriter = csv.writer(resultsfile)
    csvwriter.writerow(["central", "contact", "to_count", "res", "frac", "directionality"])

for resolution in resolutions:
    for central_group in central_groups:
        for to_count_contact, contact_group in zip(to_count, contact_groups):
            for cluster_frac in cluster_fracs:
                directionality = calc_directionality(to_count_contact, contact_group, central_group, cluster_frac, round(resolution, 2), volumes_total)

                with open('../../results/directionality_dependency_threshold.csv', 'a', newline="") as resultsfile:
                    csvwriter = csv.writer(resultsfile)
                    csvwriter.writerow([central_group, contact_group, to_count_contact, round(resolution,2), round(cluster_frac, 2), directionality])

In [None]:
df = pd.read_csv('../../results/directionality_dependency_threshold.csv', header=0)
display(df)

In [None]:
for resolution in resolutions:
    for central in central_groups:
        plt.figure(figsize=(8,8))
        plt.grid(True)
        plt.title(f"Dependency of directionality on threshold {central}")

        for to_count_contact, contact in zip(to_count, contact_groups):
            partdf = df[(df.res == resolution) & (df.central == central) & (df.contact == contact) & (df.to_count == to_count_contact)]

            plt.plot(partdf.frac, partdf.directionality)

            plt.scatter(partdf.frac, partdf.directionality, label=contact + "-" + to_count_contact)

        plt.ylabel("Directionality")
        plt.xlabel("Fraction used to determine threshold")
        plt.legend()
        plt.show()
        plt.savefig(f"../../results/dependencies/directionality_threshold_{central}_res_{resolution}.png")    

# Datapoints ~ Clusterfrac

In [None]:
resolutions = [0.2, 0.25, 0.3]
cluster_fracs = np.arange(0.1, 1, 0.05)

# try:
#     df = pd.read_csv('../../results/datapoints_dependency_threshold.csv', header=None)
# except FileNotFoundError:

with open('../../results/datapoints_dependency_threshold.csv', 'w', newline="") as resultsfile:
                csvwriter = csv.writer(resultsfile)
                csvwriter.writerow(["central", "contact", "to_count", "res", "frac", "n", "n_frac"])


for resolution in resolutions:
    for central_group in central_groups:
        for to_count_contact, contact_group in zip(to_count, contact_groups):
            for cluster_frac in cluster_fracs:
                n_frac, n = points_in_cluster(to_count_contact, contact_group, central_group, cluster_frac, round(resolution, 2))

                with open('../../results/datapoints_dependency_threshold.csv', 'a', newline="") as resultsfile:
                    csvwriter = csv.writer(resultsfile)
                    csvwriter.writerow([central_group, contact_group, to_count_contact, round(resolution, 2), round(cluster_frac, 2), n, n_frac])

In [None]:
df = pd.read_csv('../../results/datapoints_dependency_threshold.csv', header=0)
display(df)

In [None]:
for resolution in resolutions:
    for central in central_groups:
        plt.figure(figsize=(8,8))
        plt.grid(True)
        plt.title(f"Dependency of fraction datapoints on threshold {central}")

        for to_count_contact, contact in zip(to_count, contact_groups):
            partdf = df[(df.res == resolution) & (df.central == central) & (df.contact == contact) & (df.to_count == to_count_contact)]

            plt.plot(partdf.frac, partdf.n_frac)
            plt.scatter(partdf.frac, partdf.n_frac, label=contact + "-" + to_count_contact)

        plt.ylabel("Fraction of datapoints used for directionality calculation")
        plt.xlabel("Fraction used to determine threshold")
        plt.legend()
        plt.show()   
        plt.savefig(f"../../results/dependencies/datapoints_threshold_{central}_res{resolution}.png")