In [1]:
%matplotlib notebook
import os
import time
import numpy as np
import csv
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from classes.Settings import Settings
from helpers.geometry_helpers import make_coordinate_df
from calc_density_4 import count_points_per_square

from helpers.geometry_helpers import make_coordinate_df
from helpers.density_helpers import prepare_df

central_groups = ["RCOMe", "RNO2", "ArCI", "NO3", "RC6F5", "H2O", "RC6H5"]
contact_groups = ["CF", "RCN", "R2CO", "XH", "XH", "CCH3", "C2CH2", "RC6H5", "ArCH"]  #
to_count =       ["F",   "N",    "O",   "H", "O",  "H",     "H", "centroid", "H"] #, 

resolutions = np.arange(0.1, 1.6, 0.1)

resolutions = [0.5]
print(resolutions)
resolutions = np.flip(resolutions)
print(resolutions)

[0.5]
[0.5]


# Data Gathering

In [2]:
rerun = True

if rerun:
    with open('results/density_comp_time.csv', 'a', newline='') as resultsfile:
        writer = csv.writer(resultsfile)
        writer.writerow(['central', 'contact', 'to_count', 'resolution', 'density_time'])

        for central_group in central_groups:
            for to_count_contact, contact_group in zip(to_count, contact_groups):
                datafile = ".\\data\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5.cor"
                result1 = ".\\results\\" + central_group + "\\" + central_group + "_" + contact_group + "_vdw.5\\"\
                          + central_group + "_" + contact_group + "_kabsch_aligned.csv"

                for resolution in resolutions:
                    print("\nCalculating density for central group: ", central_group, " contact group: ", contact_group,
                          "resolution: ", str(round(resolution, 2)))

                    t0 = time.time()

                    settings = Settings(datafile)
                    settings.set_atom_to_count(to_count_contact)

                    # resolution of the bins, in Angstrom
                    settings.set_resolution(round(resolution, 2))
                    
                    df = pd.read_csv(settings.get_kabsch_aligned_csv_filename())
                    avg_frag = pd.read_csv(settings.get_avg_frag_filename())

                    # grab only the atoms that are in the contact groups
                    df_central = df[df['label'] == '-']
                    coordinate_df = make_coordinate_df(df_central, settings, avg_frag)

                    empty_density_df = prepare_df(df=coordinate_df, settings=settings)

                    density_df = count_points_per_square(df=empty_density_df, contact_points_df=coordinate_df, settings=settings)

                    # save so we can use the data but only change the plot - saves time :)
                    density_df.to_hdf(settings.get_density_df_filename(), settings.get_density_df_key())               

                    t1 = time.time() - t0
                    print("Duration: %.2f s." % t1)                    
                    writer.writerow([central_group, contact_group, to_count_contact, round(resolution, 2), t1])


Calculating density for central group:  RCOMe  contact group:  CF resolution:  0.5
Counting points per bin: 
0 / 7355
5000 / 7355
Duration: 1.27 s.

Calculating density for central group:  RCOMe  contact group:  RCN resolution:  0.5
Counting points per bin: 
0 / 1874
Duration: 0.14 s.

Calculating density for central group:  RCOMe  contact group:  R2CO resolution:  0.5
Counting points per bin: 
0 / 63130
5000 / 63130
10000 / 63130
15000 / 63130
20000 / 63130
25000 / 63130
30000 / 63130
35000 / 63130
40000 / 63130
45000 / 63130
50000 / 63130
55000 / 63130
60000 / 63130
Duration: 3.20 s.

Calculating density for central group:  RCOMe  contact group:  XH resolution:  0.5
Counting points per bin: 
0 / 41430
5000 / 41430
10000 / 41430
15000 / 41430
20000 / 41430
25000 / 41430
30000 / 41430
35000 / 41430
40000 / 41430
Duration: 1.77 s.

Calculating density for central group:  RCOMe  contact group:  XH resolution:  0.5
Counting points per bin: 
0 / 24131
5000 / 24131
10000 / 24131
15000 / 24

Counting points per bin: 
0 / 45296
5000 / 45296
10000 / 45296
15000 / 45296
20000 / 45296
25000 / 45296
30000 / 45296
35000 / 45296
40000 / 45296
45000 / 45296
Duration: 2.47 s.

Calculating density for central group:  RC6F5  contact group:  C2CH2 resolution:  0.5
Counting points per bin: 
0 / 16364
5000 / 16364
10000 / 16364
15000 / 16364
Duration: 1.09 s.

Calculating density for central group:  RC6F5  contact group:  RC6H5 resolution:  0.5
Counting points per bin: 
0 / 26500
5000 / 26500
10000 / 26500
15000 / 26500
20000 / 26500
25000 / 26500
Duration: 2.72 s.

Calculating density for central group:  RC6F5  contact group:  ArCH resolution:  0.5
Counting points per bin: 
0 / 43092
5000 / 43092
10000 / 43092
15000 / 43092
20000 / 43092
25000 / 43092
30000 / 43092
35000 / 43092
40000 / 43092
Duration: 3.21 s.

Calculating density for central group:  H2O  contact group:  CF resolution:  0.5
Counting points per bin: 
0 / 3775
Duration: 0.25 s.

Calculating density for central group:  H2

70000 / 77196
75000 / 77196
Duration: 5.48 s.

Calculating density for central group:  RC6H5  contact group:  CCH3 resolution:  0.5
Counting points per bin: 
0 / 445710
5000 / 445710
10000 / 445710
15000 / 445710
20000 / 445710
25000 / 445710
30000 / 445710
35000 / 445710
40000 / 445710
45000 / 445710
50000 / 445710
55000 / 445710
60000 / 445710
65000 / 445710
70000 / 445710
75000 / 445710
80000 / 445710
85000 / 445710
90000 / 445710
95000 / 445710
100000 / 445710
105000 / 445710
110000 / 445710
115000 / 445710
120000 / 445710
125000 / 445710
130000 / 445710
135000 / 445710
140000 / 445710
145000 / 445710
150000 / 445710
155000 / 445710
160000 / 445710
165000 / 445710
170000 / 445710
175000 / 445710
180000 / 445710
185000 / 445710
190000 / 445710
195000 / 445710
200000 / 445710
205000 / 445710
210000 / 445710
215000 / 445710
220000 / 445710
225000 / 445710
230000 / 445710
235000 / 445710
240000 / 445710
245000 / 445710
250000 / 445710
255000 / 445710
260000 / 445710
265000 / 445710
270

FileNotFoundError: [Errno 2] No such file or directory: '.\\results\\RC6H5\\RC6H5_RC6H5_vdw.5\\RC6H5_RC6H5_kabsch_aligned.csv'

# Analyzing data

In [None]:
df = pd.read_csv('results/density_comp_time.csv')

for central in central_groups:
    plt.figure(figsize=(6,4))
    for contact, to_count_contact in zip(contact_groups, to_count):
        dfje = df[(df.central == central) & (df.contact == contact)]
        print(len(dfje))
        plt.plot(dfje.resolution, dfje.density_time, label=central + '-' + contact)
        plt.scatter(dfje.resolution, dfje.density_time)
    
    plt.legend()
    plt.xlabel("Resolution")
    plt.ylabel("Time (s)")
    plt.title("Density computational time " + central)
    plt.savefig("results/figures/comp_time_" + central + ".svg", format='svg')

# Merge results with pre comp results

In [None]:
display(df)
grouped = df.groupby(['central', 'contact', 'to_count'])

In [None]:
id_df = grouped['density_time'].apply(lambda x: pd.Series(x.values)).unstack()
id_df = id_df.reset_index()

In [None]:
id_df.columns = ['central', 'contact', 'to_count', 'res15', 'res14', 'res13', 'res12', 'res11',\
                                                   'res10', 'res09', 'res08', 'res07', 'res06',\
                                                   'res05', 'res04', 'res03', 'res02', 'res01']
display(id_df)

In [None]:
df_pre = pd.read_csv('results/pre_density_comp_time.csv')

df_total = pd.merge(df_pre, id_df, how='left', left_on=['contact','central', 'to_count'], right_on = ['contact','central', 'to_count'])
display(df_total)

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(9,15))
    
for _, row in df_total.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_fragment']
    bars3 = row['coordinate_df']
    bars4 = row['res01']
    bars5 = row['res02']
    bars6 = row['res03']
    bars7 = row['res04']
    bars8 = row['res05']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    bottom = 0
    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, bottom=0, color="tab:blue")
    
    bottom += bars1
    rects2 = plt.bar(i, bars2, bar_width, bottom=bottom, color="tab:orange")
    
    bottom += bars2
    rects3 = plt.bar(i, bars3, bar_width, bottom=bottom, color="tab:green")
    
    bottom += bars3
    rects01 = plt.bar(i, bars4, bar_width, bottom=bottom, color='tab:red')
    
    bottom += bars4
    rects02 = plt.bar(i, bars5, bar_width, bottom=bottom, color='tab:purple')
    
    bottom += bars5
    rects03 = plt.bar(i, bars6, bar_width, bottom=bottom, color='tab:brown')
    
    bottom += bars6
    rects04 = plt.bar(i, bars7, bar_width, bottom=bottom, color='tab:pink')
    
    bottom += bars7
    rects05 = plt.bar(i, bars8, bar_width, bottom=bottom, color='tab:gray')
    
    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Computational times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

# ax.set_ylim(0,100)

ax2 = ax.twinx()
ax2.set_ylabel("Amount")
# ax2.set_ylim(0, 600000)

line = ax2.plot(range(len(df_total)), df_total["amount_structures"], color="red", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], rects01[0], rects02[0], rects03[0], rects04[0], rects05[0], line[0]),
           ('alignment', 'avg_fragment', 'coordinate_df', 'density res 0.1', 'density res 0.2',\
            'density res 0.3', 'density res 0.4', 'density res 0.5', 'No. fragments'))

plt.savefig("results/figures/comp_times_total.svg", format="svg")

plt.show()

In [None]:
df_total.describe()

In [None]:
# df_total = df_total.drop(columns=['Unnamed: 0', 'total'])
print(df_total.columns)

In [None]:
# df_total = df_total[['central', 'contact', 'to_count', 'alignment', 'coordinate_df',
#        'avg_fragment', 'total', 'res15', 'res14', 'res13', 'res12', 'res11', 'res10', 'res09', 'res08',
#        'res07', 'res06', 'res05', 'res04', 'res03', 'res02', 'res01']]

# sort again
df_total['total_density'] = df_total.iloc[:, -15:].sum(axis=1)
df_total['total'] = df_total['total_density'] + df_total['alignment'] + df_total['avg_fragment'] + df_total['coordinate_df']
df_total = df_total.sort_values("total", ascending=False)

In [None]:
i = 0
r1 = []
xtick_labels = []

fig, ax = plt.subplots(figsize=(9,12))
    
for _, row in df_total.iterrows():
    bars1 = row['alignment']
    bars2 = row['avg_fragment']
    bars3 = row['coordinate_df']
    bars4 = row['res01']
    bars5 = row['res02']
    bars6 = row['res03']
    bars7 = row['res04']
    bars8 = row['res05']
        
    bar_width = 0.5  # the width of the bars
    r1.append(i)

    bottom = 0
    # make that plot
    rects1 = plt.bar(i, bars1, bar_width, bottom=0, color="tab:blue")
    
    bottom += bars1
    rects2 = plt.bar(i, bars2, bar_width, bottom=bottom, color="tab:orange")
    
    bottom += bars2
    rects3 = plt.bar(i, bars3, bar_width, bottom=bottom, color="tab:green")
    
    bottom += bars3
    rects01 = plt.bar(i, bars4, bar_width, bottom=bottom, color='tab:red')
    
    bottom += bars4
    rects02 = plt.bar(i, bars5, bar_width, bottom=bottom, color='tab:purple')
    
    bottom += bars5
    rects03 = plt.bar(i, bars6, bar_width, bottom=bottom, color='tab:brown')
    
    bottom += bars6
    rects04 = plt.bar(i, bars7, bar_width, bottom=bottom, color='tab:pink')
    
    bottom += bars7
    rects05 = plt.bar(i, bars8, bar_width, bottom=bottom, color='tab:gray')
    
    xtick_labels.append(row["central"] + "-" + row['contact'])
    i+=1


    
plt.xticks(r1, xtick_labels, rotation=90)

plt.title('Computational times')
plt.subplots_adjust(bottom=0.3)

ax.set_xlabel("Pair")
ax.set_ylabel("Computational time (s)")

# ax.set_ylim(0,100)

ax2 = ax.twinx()
ax2.set_ylabel("Amount")
# ax2.set_ylim(0, 600000)

line = ax2.plot(range(len(df_total)), df_total["amount_structures"], color="gold", label="no unique fragments")

plt.legend((rects1[0], rects2[0], rects3[0], rects01[0], rects02[0], rects03[0], rects04[0], rects05[0], line[0]),
           ('alignment', 'avg_fragment', 'coordinate_df', 'density res 0.1', 'density res 0.2',\
            'density res 0.3', 'density res 0.4', 'density res 0.5', 'No. fragments'))

plt.savefig("results/figures/comp_times_total.svg", format="svg", bbox_inches='tight')
plt.savefig("results/figures/comp_times_total.png")

plt.show()

In [None]:
df_total.describe()

In [None]:
df_test = df_total.copy()
df_test['total'] = df_test['total'] - df_test['res01']
df_test = df_test.drop(columns=['res01'])
df_test.describe()