# Time al steps
In this notebook, you can run all calculations and time how long the steps take.

In [None]:
# to show matplotlib plots in-line
%matplotlib notebook

# allows for automatic reloading of imports and makes it unncessecary to restart the kernel
# whenever a function is changed
%load_ext autoreload
%autoreload 2

import os
import time
import numpy as np
import csv
import pandas as pd

from tqdm import tqdm

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import sys

# to be able to import scripts from scripts folder even though it's not a child repository
sys.path.append('..//scripts//')

from constants.paths import WORKDIR

from classes.Settings import Settings, AlignmentSettings
from classes.Radii import Radii

from helpers.alignment_helpers import (calc_rmse, kabsch_align, perform_rotations,
                                       perform_translation, read_raw_data)

from align_kabsch import align_all_fragments, split_file_if_too_big
from calc_avg_fragment import calc_avg_frag
from helpers.geometry_helpers import make_coordinate_df, average_fragment
from helpers.density_helpers import prepare_df, make_density_df

In [None]:
# define contact groups and resolutions
central_groups = ["REt", "RCOMe", "ArCI", "RNO2", "NO3", "RC6F5", "RC6H5", "H2O"]
contact_groups = ["CF", "RCN", "R2CO", "XH", "XH", "CCH3", "C2CH2", "RC6H5", "ArCH"]
contact_rps =    ["F",   "N",    "O",   "H", "O",  "H",     "H", "centroid", "H"] 

resolutions = np.arange(0.1, 1, 0.05)
# resolutions = [0.2, 0.5]

# Count the structures

In [None]:
count = False
counts = []

central_groups_tc = ["REt", "RCOMe", "ArCI", "RNO2", "NO3", "RC6F5", "RC6H5", "H2O"]
contact_groups_tc = ["CF", "RCN", "R2CO", "XH", "CCH3", "C2CH2", "RC6H5", "ArCH"]

if count:
    with open('../../results/amounts_structures.csv', 'w', newline='') as resultsfile:
        writer = csv.writer(resultsfile)
        writer.writerow(['central', 'contact', 'amount_structures', 'amount_cif'])
        
        for central_group in central_groups_tc:

            for contact_group in contact_groups_tc:
                datafile = "..\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"

                ids = []

                with open(datafile, 'r') as resultsFile:
                    line = "hoi"
                    while line:
                        line = resultsFile.readline()
                        if "**" in line:
                            ids.append(line.split("**")[0])

                    print(central_group, contact_group, len(ids), len(set(ids)))
                    counts.append(len(ids))

                    writer.writerow([central_group, contact_group, len(ids), len(set(ids))])

# Align, AVG fragment

In [None]:
run_everything_again = False
write_times = False

if run_everything_again:
    if write_times:
        with open('../../results/coordinate_comptimes.csv', 'w', newline='') as resultsfile:
            writer = csv.writer(resultsfile)
            writer.writerow(['central', 'contact', 'contact_rp', 'prep_data', 'alignment', 'avg_frag', 'coordinate_df'])

    for central_group in central_groups:
        for contact_rp, contact_group in zip(contact_rps, contact_groups):

            print(central_group, contact_group, contact_rp)

            datafile = "..\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"
            result1 = "..\\..\\results\\pairs\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5\\"\
                      + central_group + "_" + contact_group + "_aligned.csv" 

            if not os.path.exists(f"..\\..\\results\\pairs\\{central_group}"):
                os.mkdir(f"..\\..\\results\\pairs\\{central_group}\\")

            labelfile = datafile.rsplit('.', 1)[0] + '.csv'
            
            t0_prep = time.time()
            settings = AlignmentSettings("..\\..", datafile)
            settings.set_contact_reference_point(contact_rp)

            split_file_if_too_big(settings.coordinate_file, settings.no_atoms)
            settings.update_coordinate_filename()
            prep_time = time.time() - t0_prep

            # alignment
            t0_alignment = time.time()
            aligned_fragments_df = align_all_fragments(settings, again=True)
            alignment_time = time.time() - t0_alignment            

            radii = Radii(settings.get_radii_csv_name())
            
            # average fragment
            t0_avg_frag = time.time()

            avg_frag = calc_avg_frag(aligned_fragments_df, settings, radii)                           
            avg_frag.to_csv(settings.get_avg_frag_filename(), index=False)          
            avg_frag_time = time.time() - t0_avg_frag
            
            # coordinate df
            t0_coordinate = time.time()
            coordinate_df = make_coordinate_df(aligned_fragments_df, settings, avg_frag, radii, again=True)
            coordinate_time = time.time() - t0_coordinate

            with open('../../results/coordinate_comptimes.csv', 'a', newline='') as resultsfile:
                writer = csv.writer(resultsfile)
                writer.writerow([central_group, contact_group, contact_rp, prep_time, alignment_time, avg_frag_time, coordinate_time])
                
            print('\n')

# Calc Densities

In [None]:
rerun = False
write_time = False

if rerun:
    with open('../../results/density_comptimes.csv', 'w', newline='') as resultsfile:
        if write_time:
            writer = csv.writer(resultsfile)
            writer.writerow(['central', 'contact', 'contact_rp', 'resolution', 'density_time'])

        for central_group in central_groups:
            for contact_rp, contact_group in zip(contact_rps, contact_groups):
                datafile = ".\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"
                
                settings = Settings(WORKDIR, datafile)
                settings.set_contact_reference_point(contact_rp)
                
                df = pd.read_csv(settings.get_aligned_csv_filename())
                avg_frag = pd.read_csv(settings.get_avg_frag_filename())
                coordinate_df = pd.read_hdf(settings.get_coordinate_df_filename(), settings.get_coordinate_df_key())

                for resolution in resolutions:
                    print("\nCalculating density for central group: ", central_group, " contact group: ", contact_group, "(", contact_rp, ")",
                          "resolution: ", str(round(resolution, 2)))

                    # resolution of the bins, in Angstrom
                    settings.set_resolution(round(resolution, 2))
                                        
                    t0_density = time.time()
                    make_density_df(settings, coordinate_df, again=True)    
                    density_time = time.time() - t0_density
                    
                    print(f"Duration density: {density_time}", end="\n\n")      

                    if write_time:
                        writer.writerow([central_group, contact_group, contact_rp, round(resolution, 2), density_time])

# Analyzing data

In [None]:
df_pre = pd.read_csv('../../results/pre_density_comptimes.csv')
df = pd.read_csv('../../results/density_comptimes.csv')
df_count = pd.read_csv('../../results/amounts_structures.csv').drop_duplicates()

df = pd.merge(df[df.resolution == 0.3], df_pre, how='left', left_on=["central", "contact", "contact_rp"], right_on=["central", "contact", "contact_rp"])
df = pd.merge(df, df_count, how='right', left_on=["central", "contact"], right_on=["central", "contact"])


df['total'] = df['density_time'] + df['alignment'] + df['avg_frag'] + df['coordinate_df']
df = df.sort_values('total', ascending=False)

df.loc[(df.central == "RC6H5") & (df.contact == "RC6H5"), 'amount_structures'] = 250000
df.loc[(df.central == "RC6H5") & (df.contact == "ArCH"), 'amount_structures'] = 272727

display(df)


## merge with density comp times

In [None]:
for central_group in central_groups:
    bars1 = df_pre[df_pre.central == central_group]['alignment']
    bars2 = df_pre[df_pre.central == central_group]['avg_frag']
    bars3 = df_pre[df_pre.central == central_group]['coordinate_df']
        
    bar_width = 0.25  # the width of the bars
    r1 = np.arange(len(bars1))
    r2 = [x + bar_width for x in r1]
    r3 = [x + bar_width for x in r2]

    # make that plot
    fig, ax = plt.subplots(figsize=(8, 4))
    rects1 = ax.bar(r1, bars1, bar_width, label='Superimposition')
    rects2 = ax.bar(r2, bars2, bar_width, label='Computing central model')
    rects3 = ax.bar(r3, bars3, bar_width, label='Calculating distances contact coordinates')

    ax.set_xticks([r + bar_width for r in range(len(bars1))])
    ax.set_xticklabels(df_pre[df_pre.central == central_group]['contact'])
    
    plt.title('Prep comp times ' + central_group)
    
    plt.legend()
    
    plt.savefig("../../results/plots/comptimes_" + central_group + ".png")
    plt.show()

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(12, 5))
    
for _, row in df.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_frag']
    bars3 = row['coordinate_df']
    bars4 = row['density_time']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, color="tab:blue")
    rects2 = plt.bar(i, bars2, bar_width, bottom=bars1, color="tab:orange")
    rects3 = plt.bar(i, bars3, bar_width, bottom=bars1+bars2, color="tab:green")
    rects4 = plt.bar(i, bars4, bar_width, bottom=bars1+bars2+bars3, color="tab:purple")

    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Computational times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

ax2 = ax.twinx()
ax2.set_ylabel("Amount")

line = ax2.plot(range(len(df)), df["amount_structures"], color="red", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], rects4[0], line[0]), ('Superimposition', 'Central group model', 'Calculating distances contact coordinates', 'Calculating density at resolution 0.3$\AA$', 'Number of fragments'))

plt.savefig("../../results/plots/comptimes_total.png")

plt.show()

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(12, 5))
    
for _, row in df.iterrows():
#     bars1 = row['alignment']
    bars2 = row['avg_frag']
    bars3 = row['coordinate_df']
    bars4 = row['density_time']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    # make that plot
#     rects1 = plt.bar(i, bars1, bar_width, color="tab:blue")
    rects2 = plt.bar(i, bars2, bar_width, color="tab:orange")
    rects3 = plt.bar(i, bars3, bar_width, bottom=bars2, color="tab:green")
    rects4 = plt.bar(i, bars4, bar_width, bottom=bars2+bars3, color="tab:purple")

    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Computational times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

ax2 = ax.twinx()
ax2.set_ylabel("Amount")

line = ax2.plot(range(len(df)), df["amount_structures"], color="red", label="no unique fragments")

plt.legend((rects2[0], rects3[0], rects4[0], line[0]), ('avg_fragment', 'coordinate_df', 'density res 0.3', 'No. fragments'))

plt.savefig("../../results/plots/prep_times_total.png")

plt.show()