# Time al steps
In this notebook, you can run all calculations and time how long the steps take.

In [None]:
import sys

sys.path.append('..//scripts//')

In [None]:
%matplotlib notebook

# allows for automatic reloading of imports and makes it unncessecary to restart the kernel
# whenever a function is changed
%load_ext autoreload
%autoreload 2

import os
import time
import numpy as np
import csv
import pandas as pd

from tqdm import tqdm

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from constants.paths import WORKDIR, CENTRAL_GROUPS_CSV, RADII_CSV, METHYL_CSV

from classes.Settings import Settings, AlignmentSettings
from classes.Radii import Radii

from helpers.alignment_helpers import (calc_rmse, kabsch_align, perform_rotations,
                                       perform_translation, read_raw_data)
from align_kabsch import align_all_fragments, split_file_if_too_big
from calc_avg_fragment import calc_kabsch_rmse, calc_avg_rmse, reset_labels_with_kmeans
from helpers.geometry_helpers import make_coordinate_df, average_fragment, add_model_methyl
from helpers.density_helpers import prepare_df, make_density_df

central_groups = ["REt", "ArCI", "RCOMe", "RNO2", "ArCI", "REt", "NO3", "RC6F5", "H2O", "RC6H5"] #
contact_groups = ["CF", "RCN", "R2CO", "XH", "XH", "CCH3", "C2CH2", "RC6H5", "ArCH"]
to_count =       ["F",   "N",    "O",   "H", "O",  "H",     "H", "centroid", "H"] 

resolutions = np.arange(0.1, 1.55, 0.05)
resolutions = np.flip(resolutions)

# Count the structures

In [None]:
count = False
counts = []

if count:
    with open('../../results/amounts_structures.csv', 'a', newline='') as resultsfile:
        writer = csv.writer(resultsfile)
        writer.writerow(['central', 'contact', 'amount_cif', 'amount_structures'])
        
        for central_group in central_groups:

            for to_count_contact, contact_group in zip(to_count, contact_groups):
                datafile = "..\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"

                ids = []

                with open(datafile, 'r') as resultsFile:
                    line = "hoi"
                    while line:
                        line = resultsFile.readline()
                        if "**" in line:
                            ids.append(line.split("**")[0])

                    print(central_group, contact_group, len(ids), len(set(ids)))
                    counts.append(len(ids))

                    writer.writerow([central_group, contact_group, len(ids), len(set(ids))])

# Align, AVG fragment, Coordinate df

In [None]:
run_everything_again = True
write_times = False

if run_everything_again:
    if write_times:
        with open('results/redo_coordinate_df.csv', 'a', newline='') as resultsfile:
            writer = csv.writer(resultsfile)
            writer.writerow(['central', 'contact', 'to_count', 'coordinate_df'])

    for central_group in central_groups:
        for to_count_contact, contact_group in zip(to_count, contact_groups):

            print(central_group, contact_group, to_count_contact)

            datafile = "..\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"
            result1 = "..\\..\\results\\pairs\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5\\"\
                      + central_group + "_" + contact_group + "_aligned.csv" 

            if not os.path.exists(f"..\\..\\results\\pairs\\{central_group}"):
                os.mkdir(f"..\\..\\results\\pairs\\{central_group}\\")

            t0_alignment = time.time()

            labelfile = datafile.rsplit('.', 1)[0] + '.csv'

            settings = AlignmentSettings("..\\..", datafile, labelfile)
            settings.set_atom_to_count(to_count_contact)
            settings.set_central_group_csv(CENTRAL_GROUPS_CSV)
            settings.prepare_alignment()

            split_file_if_too_big(settings.coordinate_file, settings.no_atoms)
            settings.update_coordinate_filename()

            # TODO: align only if not aligned yet
            align_all_fragments(settings)

            t1_alignment = time.time()
            alignment_time = t1_alignment - t0_alignment

            t0_avg_frag = time.time()

            aligned_fragments_df = pd.read_csv(settings.get_aligned_csv_filename())

            radii = Radii(RADII_CSV)
            avg_frag = average_fragment(aligned_fragments_df, settings, radii)

            if settings.central_group_name == "RCOMe" or settings.central_group_name == "REt":
                print(METHYL_CSV)
                fragment = add_model_methyl(CSV=METHYL_CSV, fragment=avg_frag,
                                            settings=settings, radii=radii)

            avg_frag.to_csv(settings.get_avg_frag_filename(), index=False)
#             t1_avg_frag = time.time()
#             avg_fragment_time = t1_avg_frag - t0_avg_frag

#             t0_coordinate_df = time.time()
#             df = aligned_fragments_df[aligned_fragments_df.label == "-"]

#             coordinate_df = make_coordinate_df(df, settings, avg_frag, radii)

#             t1_coordinate_df = time.time()
#             coordinate_df_time = t1_coordinate_df - t0_coordinate_df

#             print(central_group, contact_group, to_count_contact, alignment_time, avg_fragment_time, coordinate_df_time)

            if write_times:
                writer.writerow([central_group, contact_group, to_count_contact, coordinate_df_time])

# Calc Densities

In [None]:
rerun = True
write_time = False

if rerun:
    with open('../../results/density_comp_time.csv', 'a', newline='') as resultsfile:
        if write_time:
            writer = csv.writer(resultsfile)
            writer.writerow(['central', 'contact', 'to_count', 'resolution', 'density_time'])

        for central_group in central_groups:
            for to_count_contact, contact_group in zip(to_count, contact_groups):
                datafile = ".\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"
                result1 = ".\\results\\pairs\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5\\"\
                          + central_group + "_" + contact_group + "_aligned.csv"

                for resolution in resolutions:
                    print("\nCalculating density for central group: ", central_group, " contact group: ", contact_group,
                          "resolution: ", str(round(resolution, 2)))

                    t0 = time.time()

                    settings = Settings(WORKDIR, datafile)
                    settings.set_atom_to_count(to_count_contact)

                    # resolution of the bins, in Angstrom
                    settings.set_resolution(round(resolution, 2))
                    
                    df = pd.read_csv(settings.get_aligned_csv_filename())
                    avg_frag = pd.read_csv(settings.get_avg_frag_filename())

                    radii = Radii(RADII_CSV)
                    
                    # grab only the atoms that are in the contact groups
                    df_central = df[df['label'] == '-']
                    coordinate_df = make_coordinate_df(df_central, settings, avg_frag, radii)
                    
                    make_density_df(settings, coordinate_df)    

                    t1 = time.time() - t0
                    print("Duration: %.2f s." % t1)      
                    
                    if write_time:
                        writer.writerow([central_group, contact_group, to_count_contact, round(resolution, 2), t1])

# Analyzing data

In [None]:
df = pd.read_csv('results/pre_density_comp_time.csv')

df_count = pd.read_csv('results/amounts_structures.csv')
df = pd.merge(df, df_count,  how='left', left_on=['central','contact'], right_on = ['central','contact'])

real_coordinate_time = pd.read_csv('results/redo_coordinate_df.csv')

real_coordinate_time.columns = ['central', 'contact', 'to_count', 'coordinate_df_real']
df = pd.merge(df, real_coordinate_time,  how='left', left_on=['central','contact', 'to_count'], right_on = ['central','contact', 'to_count'])

## merge with density comp times

In [None]:
display(df)
grouped = df.groupby(['central', 'contact', 'to_count'])

id_df = grouped['density_time'].apply(lambda x: pd.Series(x.values)).unstack()
id_df = id_df.reset_index()

id_df.columns = ['central', 'contact', 'to_count', 'res15', 'res14', 'res13', 'res12', 'res11',\
                                                   'res10', 'res09', 'res08', 'res07', 'res06',\
                                                   'res05', 'res04', 'res03', 'res02', 'res01']
display(id_df)

df_pre = pd.read_csv('results/pre_density_comp_time.csv')

df_total = pd.merge(df_pre, id_df, how='left', left_on=['contact','central', 'to_count'], right_on = ['contact','central', 'to_count'])
display(df_total)

In [None]:
for central_group in central_groups:
    print(central_group)
    bars1 = df[df.central == central_group]['alignment']
    bars2 = df[df.central == central_group]['avg_fragment']
    bars3 = df[df.central == central_group]['coordinate_df']
        
    bar_width = 0.25  # the width of the bars
    r1 = np.arange(len(bars1))
    r2 = [x + bar_width for x in r1]
    r3 = [x + bar_width for x in r2]

    # make that plot
    fig, ax = plt.subplots()
    rects1 = ax.bar(r1, bars1, bar_width, label='Alignment')
    rects2 = ax.bar(r2, bars2, bar_width, label='avg_fragment')
    rects3 = ax.bar(r3, bars3, bar_width, label='coordinate_df')

    ax.set_xticks([r + bar_width for r in range(len(bars1))])
    ax.set_xticklabels(df[df.central == central_group]['contact'])
    
    plt.title('Prep comp times ' + central_group)
    
    plt.legend()
    
    plt.savefig("results/figures/Prep_times_" + central_group + ".svg", format="svg")
    plt.show()

In [None]:
# sort df
df["total"] = df["alignment"] + df["avg_fragment"] + df["coordinate_df"]
df = df.sort_values("total", ascending=False)

df.to_hdf('ready_pre_density_comp.hdf', 'key')

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(9,5))
    
for _, row in df.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_fragment']
    bars3 = row['coordinate_df']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, color="tab:blue")
    rects2 = plt.bar(i, bars2, bar_width, bottom=bars1, color="tab:orange")
    rects3 = plt.bar(i, bars3, bar_width, bottom=bars1+bars2, color="tab:green")

    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Prep comp times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

ax2 = ax.twinx()
ax2.set_ylabel("Amount")

line = ax2.plot(range(len(df)), df["amount_structures"], color="red", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], line[0]), ('alignment', 'avg_fragment', 'coordinate_df', 'No. fragments'))

plt.savefig("results/figures/Prep_times_total.svg", format="svg")

plt.show()

In [None]:
jit_df = pd.read_csv('results/coordinate_df_jit.csv')


combined = pd.merge(jit_df, df,  how='left', left_on=['contact','central', 'to_count'], right_on = ['contact','central', 'to_count', ])

combined["total_jit"] = combined["alignment"] + combined["avg_fragment"] + combined["coordinate_df_jit"]
combined = combined.sort_values("total", ascending=False)

combined["diff"] = combined["coordinate_df"] - combined["coordinate_df_jit"]

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(9,5))
    
for _, row in combined.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_fragment']
    bars3 = row['coordinate_df_jit']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, color="tab:blue")
    rects2 = plt.bar(i, bars2, bar_width, bottom=bars1, color="tab:orange")
    rects3 = plt.bar(i, bars3, bar_width, bottom=bars1+bars2, color="tab:green")

    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1

plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Prep comp times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

ax2 = ax.twinx()
ax2.set_ylabel("Amount")

line = ax2.plot(range(len(df)), df["amount_structures"], color="red", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], line[0]), ('alignment', 'avg_fragment', 'coordinate_df_jit', 'No. fragments'))

plt.savefig("results/figures/Prep_times_total.svg", format="svg")

plt.show()

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(9,15))
    
for _, row in df_total.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_fragment']
    bars3 = row['coordinate_df']
    bars4 = row['res01']
    bars5 = row['res02']
    bars6 = row['res03']
    bars7 = row['res04']
    bars8 = row['res05']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    bottom = 0
    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, bottom=0, color="tab:blue")
    
    bottom += bars1
    rects2 = plt.bar(i, bars2, bar_width, bottom=bottom, color="tab:orange")
    
    bottom += bars2
    rects3 = plt.bar(i, bars3, bar_width, bottom=bottom, color="tab:green")
    
    bottom += bars3
    rects01 = plt.bar(i, bars4, bar_width, bottom=bottom, color='tab:red')
    
    bottom += bars4
    rects02 = plt.bar(i, bars5, bar_width, bottom=bottom, color='tab:purple')
    
    bottom += bars5
    rects03 = plt.bar(i, bars6, bar_width, bottom=bottom, color='tab:brown')
    
    bottom += bars6
    rects04 = plt.bar(i, bars7, bar_width, bottom=bottom, color='tab:pink')
    
    bottom += bars7
    rects05 = plt.bar(i, bars8, bar_width, bottom=bottom, color='tab:gray')
    
    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Computational times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

# ax.set_ylim(0,100)

ax2 = ax.twinx()
ax2.set_ylabel("Amount")
# ax2.set_ylim(0, 600000)

line = ax2.plot(range(len(df_total)), df_total["amount_structures"], color="red", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], rects01[0], rects02[0], rects03[0], rects04[0], rects05[0], line[0]),
           ('alignment', 'avg_fragment', 'coordinate_df', 'density res 0.1', 'density res 0.2',\
            'density res 0.3', 'density res 0.4', 'density res 0.5', 'No. fragments'))

plt.savefig("results/figures/comp_times_total.svg", format="svg")

plt.show()

In [None]:
df_total['total_density'] = df_total.iloc[:, -15:].sum(axis=1)
df_total['total'] = df_total['total_density'] + df_total['alignment'] + df_total['avg_fragment'] + df_total['coordinate_df']
df_total = df_total.sort_values("total", ascending=False)

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(9,12))
    
for _, row in df_total.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_fragment']
    bars3 = row['coordinate_df']
    bars4 = row['res01']
    bars5 = row['res02']
    bars6 = row['res03']
    bars7 = row['res04']
    bars8 = row['res05']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    bottom = 0
    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, bottom=0, color="tab:blue")
    
    bottom += bars1
    rects2 = plt.bar(i, bars2, bar_width, bottom=bottom, color="tab:orange")
    
    bottom += bars2
    rects3 = plt.bar(i, bars3, bar_width, bottom=bottom, color="tab:green")
    
    bottom += bars3
    rects01 = plt.bar(i, bars4, bar_width, bottom=bottom, color='tab:red')
    
    bottom += bars4
    rects02 = plt.bar(i, bars5, bar_width, bottom=bottom, color='tab:purple')
    
    bottom += bars5
    rects03 = plt.bar(i, bars6, bar_width, bottom=bottom, color='tab:brown')
    
    bottom += bars6
    rects04 = plt.bar(i, bars7, bar_width, bottom=bottom, color='tab:pink')
    
    bottom += bars7
    rects05 = plt.bar(i, bars8, bar_width, bottom=bottom, color='tab:gray')
    
    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Computational times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

# ax.set_ylim(0,100)

ax2 = ax.twinx()
ax2.set_ylabel("Amount")
# ax2.set_ylim(0, 600000)

line = ax2.plot(range(len(df_total)), df_total["amount_structures"], color="gold", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], rects01[0], rects02[0], rects03[0], rects04[0], rects05[0], line[0]),
           ('alignment', 'avg_fragment', 'coordinate_df', 'density res 0.1', 'density res 0.2',\
            'density res 0.3', 'density res 0.4', 'density res 0.5', 'No. fragments'))

plt.savefig("results/figures/comp_times_total.svg", format="svg", bbox_inches='tight')
plt.savefig("results/figures/comp_times_total.png")

plt.show()