In [None]:
%matplotlib notebook

import copy

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from mpl_toolkits.mplot3d import Axes3D

from classes.Settings import Settings
from helpers.plot_functions import plot_density, plot_fragment_colored, plot_vdw_spheres
from helpers.density_helpers import calculate_no_bins, add_boundaries_per_bin
from calc_avg_fragment_2 import read_results_alignment, make_avg_fragment_if_not_exists

from helpers.density_helpers import prepare_df, find_available_volume
from helpers.geometry_helpers import (make_coordinate_df,
                                      get_vdw_distance_contact)

from sklearn.cluster import KMeans

from tqdm import tqdm
import time
import math
import copy

In [None]:
resolutions = []

resolutions1 = np.arange(0.1, 1.1, 0.1)
resolutions.extend(resolutions1)

## Other stuff

In [None]:
def make_density_plot(avg_fragment, density_df, settings):
    plotname = settings.get_density_plotname()
    fig = plt.figure()
    ax: Axes3D = fig.add_subplot(111, projection='3d')

    ax = plot_fragment_colored(ax, avg_fragment)

    p, ax = plot_density(ax=ax, df=density_df, settings=settings)

    ax.set_title("4D density plot\n Resolution: " + str(settings.resolution))

    fig.colorbar(p)
    plt.savefig(plotname)
    plt.show()

In [None]:
settings = Settings(".\\results\\NO3\\NO3_XH_vdw.5\\NO3_XH_vdw.5_aligned.csv")
settings.set_atom_to_count("H")
settings.set_resolution(round(0.5, 2))

df = read_results_alignment(settings.get_aligned_csv_filename())
avg_fragment = make_avg_fragment_if_not_exists(settings, df)

density_df = pd.read_hdf(settings.get_density_df_filename(), settings.get_density_df_key())
make_density_plot(avg_fragment, density_df, settings)

In [None]:
def calc_clusters(density_df, amount_of_clusters):
    to_cluster_df = density_df[density_df['to_cluster']]
    
    X = np.transpose(np.array([to_cluster_df.x_center, to_cluster_df.y_center, to_cluster_df.z_center]))

    kmeans = KMeans(n_clusters=amount_of_clusters, random_state=1)
    kmeans.fit(X)
    
    density_df.loc[density_df['to_cluster'], "cluster"] = kmeans.labels_
    
    return density_df, kmeans.cluster_centers_

In [None]:
def make_cluster_plot(avg_fragment, df, column, settings, centroids):
    colors = ["red", "green", "blue"]
    
    df["cluster_color"] = "grey"

    df.loc[df[column].notna(), "cluster_color"] = [colors[int(i)] for i in list(df.loc[df[column].notna(), column])]
    df = df[df[settings.to_count_contact + "_normalized"] > 0]

    fig = plt.figure()
    ax: Axes3D = fig.add_subplot(111, projection='3d')

    ax = plot_fragment_colored(ax, avg_fragment)
    
    greys = df[df.cluster_color == "grey"]
    rest = df[df.cluster_color != "grey"]
    
    for i, centroid in enumerate(centroids):
        volume = len(df[df[column] == i]) * settings.resolution**3
        ax.scatter(centroid[0], centroid[1], centroid[2], s=100,\
                    label="Cluster: " + str(i+1), color=colors[i])
    
    ax.scatter(list(greys.x_center), list(greys.y_center), list(greys.z_center), s=0.1,
               color=list(greys.cluster_color), alpha=0.75)

    ax.scatter(list(rest.x_center), list(rest.y_center), list(rest.z_center),
               color=list(rest.cluster_color))

    ax.set_title("Clusters from density plot\n Resolution: " + str(settings.resolution))
    
    ax.set_xlim(-6, 6)
    ax.set_ylim(-6, 6)
    
    ax.set_xlabel("X coordinate")
    ax.set_ylabel("Y coordinate")
    
    ax.legend(fontsize='x-small')

    plt.show()

In [None]:
def find_bins_to_cluster(settings, fraction=0.25):
    
    density_df = pd.read_hdf(settings.get_density_df_filename(), settings.get_density_df_key())
    print(settings.get_density_df_filename(), settings.get_density_df_key())
    
    density_df["x_center"] = density_df.xstart + 0.5 * settings.resolution
    density_df["y_center"] = density_df.ystart + 0.5 * settings.resolution
    density_df["z_center"] = density_df.zstart + 0.5 * settings.resolution
    
    # normalize
    density_df.loc[:, settings.to_count_contact + "_normalized"] =\
        density_df[settings.to_count_contact] / density_df[settings.to_count_contact].sum()

    # reset cluster color for when you run this cell again
    density_df["cluster_color"] = np.nan
    
    # set a threshold as to determine will belong to a cluster and which ones won't
    # get the upper kwartant
    max_bin = density_df[settings.to_count_contact].max()
    threshold = max_bin * fraction
    print("Threshold k-means:", threshold, "max_bin:", max_bin, "with fraction:", fraction)
  
    density_df["cluster"] = np.nan
    density_df["to_cluster"] = False
    density_df.loc[density_df[settings.to_count_contact] > threshold, 'to_cluster'] = True
    
    return density_df

In [None]:
amount_of_clusters = 3

# set up cluster df to save all the clusters and their information in
cluster_df = pd.DataFrame(index=range(0, len(resolutions) * amount_of_clusters),\
                           columns=["resolution", "cluster_id", "centroid_x", "centroid_y", "centroid_z",\
                                    "cluster_volume", "datafraction"])

cluster_count = 0

for resolution in resolutions:
    print("RESOLUTION: ", round(resolution, 2))
    settings.set_resolution(round(resolution, 2))
    
    # work only with bins that are >0.25*maximum full
    density_df = find_bins_to_cluster(settings=settings, fraction=0.3)
        
    # calc for each bin in what cluster it belongs
    density_df, centroids = calc_clusters(density_df, amount_of_clusters)
        
    make_cluster_plot(avg_fragment, density_df, 'cluster', settings, centroids)
       
    for i, centroid in enumerate(centroids):
        df = density_df

        volume = len(df[df.cluster == i]) * (settings.resolution**3)
        data_fraction = df[df.cluster == i][settings.to_count_contact + "_normalized"].sum()
        
        # specific for NO3 - XH
        cluster_idx = 3
        
        if centroid[0] > 1.5:
            cluster_idx = 1
        elif centroid[1] < -0.5:
            cluster_idx = 2

        cluster_df.iloc[cluster_count] = [float(resolution), cluster_idx, centroid[0], centroid[1], centroid[2], volume, data_fraction]
        
        cluster_count +=1

In [None]:
display(cluster_df)

In [None]:
clusters = [[], [], []]

fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1, 1, 1)

plt.title("Centroid of the clusters for all resolutions")


colors = ["green", "tab:green", "fuchsia"]

for _, centroid in cluster_df.iterrows():
    ax.scatter(centroid.centroid_x, centroid.centroid_y, color=colors[int(centroid.cluster_id - 1)])
    

for _, row in avg_fragment.iterrows():
    if row.atom_symbol == "O":
        ax.scatter(row.atom_x, row.atom_y, edgecolor="black", color="red")
    else:
        ax.scatter(row.atom_x, row.atom_y, edgecolor="black", color="grey")

plt.xlim(-2, 5)
plt.ylim(-3, 4)
plt.show()

In [None]:
cluster_df = cluster_df.astype('float')
cluster_df["fraction_over_volume"] = cluster_df["datafraction"] / cluster_df["cluster_volume"]

In [None]:
plt.figure(figsize=(6,4))
plt.title("Cluster volume")
for i in range(amount_of_clusters):
    
    plt.plot(cluster_df[cluster_df["cluster_id"] == i + 1].resolution, 
             cluster_df[cluster_df["cluster_id"] == i + 1].cluster_volume, 
             label="Cluster " + str(i+1),
             color=colors[i -1])

plt.xlabel("Resolution")
plt.ylabel("Volume ($\AA^{3}$)")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.title("Datfraction per cluster")
for i in range(amount_of_clusters):
    
    plt.plot(cluster_df[cluster_df["cluster_id"] == i + 1].resolution, 
             cluster_df[cluster_df["cluster_id"] == i + 1].datafraction,
             label="Cluster " + str(i+1),
             color=colors[i-1])

plt.legend()
plt.xlabel("Resolution")
plt.ylabel("Fraction")
plt.show()

In [None]:
plt.figure(figsize=(5,4))
plt.title("Fraction over volume")
for i in range(amount_of_clusters):
    
    plt.plot(cluster_df[cluster_df["cluster_id"] == i + 1].resolution, 
             cluster_df[cluster_df["cluster_id"] == i + 1].fraction_over_volume,
             label="Cluster " + str(i+1),
             color=colors[i-1])

plt.legend()
plt.xlabel("Resolution")
plt.ylabel("Fraction over volume ($\AA^{-3}$)")
plt.show()

# General: make function for Directionality Calculation

In [None]:
def recluster(df, settings, recluster_frac):
    amount = settings.to_count_contact
    
    unique_clusters = df.cluster.dropna().unique()

    df["new_cluster"] = np.nan
    
    fullest_bin = []
    for cluster_id in unique_clusters:
        
        max_bin = df[df.cluster == cluster_id][amount].max()
        
        # add old bins that are full enough when using local maximum
        fraction = max_bin * recluster_frac
        print("Cluster id: " + str(cluster_id), " max bin: " + str(max_bin) + " fraction for this bin: ", fraction)
        df.loc[((df.cluster == cluster_id) & (df[amount] > fraction)), 'new_cluster'] = cluster_id      
            
        # add bins in "x" radius around the local maximums
        new_found = True
        
        # search 1 bins further in all directions
        radius = settings.resolution
        
        # while new bins that belong to the cluster are found, keep expanding
        while new_found:
            old_length = len(df[df.new_cluster == cluster_id])
            
            for _, row in df[df.new_cluster == cluster_id].iterrows():
                df.loc[(df.x_center > row.x_center - radius) & (df.x_center < row.x_center + radius) & 
                    (df.y_center > row.y_center - radius) & (df.y_center < row.y_center + radius) &
                    (df.z_center > row.z_center - radius) & (df.z_center < row.z_center + radius) &
                    (df[amount] > fraction) & (df.new_cluster.isna()), 'new_cluster'] = cluster_id
                
            new_length = len(df[df.new_cluster == cluster_id])
            
            if new_length <= old_length:
                new_found = False
        
    return df

In [None]:
def calc_directionality(central, contact, to_count_contact, resolution, k, kmeans_frac, recluster_frac):
    settings = Settings(".\\results\\NO3\\NO3_XH_vdw.5\\NO3_XH_vdw.5_aligned.csv")
    settings.set_atom_to_count(to_count_contact)
    settings.set_resolution(round(0.2, 2))

    amount_of_clusters = 3

    cluster_count = 0

    # work only with bins that are >0.25*maximum full
    density_df = find_bins_to_cluster(settings=settings, fraction=0.25)

    # calc for each bin in what cluster it belongs
    density_df, _ = calc_clusters(density_df, amount_of_clusters)
    density_df.drop(columns=["to_cluster"])
    
    # find the volume of the central group
    tolerance = 0.5

    df = read_results_alignment(settings.get_aligned_csv_filename())
    avg_fragment = make_avg_fragment_if_not_exists(settings, df)

    coordinate_df = make_coordinate_df(df, settings, avg_fragment)

    contact_radius = get_vdw_distance_contact(coordinate_df, settings)

    available_volume = find_available_volume(avg_fragment=avg_fragment, extra=(tolerance + contact_radius))
    
    print("\nRecluster")
    density_df = recluster(density_df, settings, recluster_frac)
        
    return density_df, available_volume

In [None]:
density_df, V_available = calc_directionality(central="NO3",
                                              contact="XH",
                                              to_count_contact="H",
                                              resolution=0.2, 
                                              k=3, 
                                              kmeans_frac=0.25,
                                              recluster_frac=0.25)

display(density_df)
print("Available volume:", V_available)

In [None]:
unique_clusters = density_df.cluster.dropna().unique()

In [None]:
print('Cluster ID, Volume, DataFraction Datafraction/Volume, Datafraction/Volume * available volume')
# print result for comparance
for cluster_id in unique_clusters:
    volume = len(density_df[density_df.cluster == cluster_id]) * (0.2**3)
    data_fraction = density_df[density_df.cluster == cluster_id][settings.to_count_contact + "_normalized"].sum()
    
    print(cluster_id +1,  round(volume,4), round(data_fraction,4), round(data_fraction/volume,4), round(data_fraction/volume * V_available,4))

print("\nReclustered")
for cluster_id in unique_clusters:
    volume = len(density_df[density_df.new_cluster == cluster_id]) * (0.2**3)
    data_fraction = density_df[density_df.new_cluster == cluster_id][settings.to_count_contact + "_normalized"].sum()
    
    print(cluster_id+1,  round(volume,4), round(data_fraction,4), round(data_fraction/volume,4), round(data_fraction/volume * V_available,4))

In [None]:
make_cluster_plot(avg_fragment, density_df, 'cluster', settings, centroids)
make_cluster_plot(avg_fragment, density_df, 'new_cluster', settings, centroids)

In [None]:
def show_directionality(avg_fragment, df, column, settings, available_volume):
    colors = ["red", "green", "blue"]
    
    df["cluster_color"] = "grey"

    df.loc[df[column].notna(), "cluster_color"] = [colors[int(i)] for i in list(df.loc[df[column].notna(), column])]
    df = df[df[settings.to_count_contact + "_normalized"] > 0]

    fig = plt.figure(figsize=(8,8))
    ax: Axes3D = fig.add_subplot(111, projection='3d')

    rest = df[df.cluster_color != "grey"]
    
    for cluster_id in df[column].dropna().unique():
        points = df[df[column] == cluster_id]
        firstpoint = points.iloc[0]
        volume = len(points) * settings.resolution**3
        fraction = points[settings.to_count_contact + "_normalized"].sum()
        directionality = fraction/volume * available_volume
        
        ax.scatter(firstpoint.x_center, firstpoint.y_center, firstpoint.z_center,\
                    label="Cluster: " + str(cluster_id) + " Directionality:" + str(round(directionality, 2)),\
                    color=firstpoint.cluster_color)
    
    ax.scatter(list(rest.x_center), list(rest.y_center), list(rest.z_center),
               color=list(rest.cluster_color))
    
    # plot the average fragment
    ax = plot_fragment_colored(ax, avg_fragment)

    ax.set_title("Clusters " + settings.central_group_name + "-" + settings.contact_group_name + ", resolution: " + str(settings.resolution))
    
    ax.set_xlim(-6, 6)
    ax.set_ylim(-6, 6)
    
    ax.set_xlabel("X coordinate")
    ax.set_ylabel("Y coordinate")
    
    ax.legend(fontsize='x-small')
    
    elev = 91
    azim = 89
    ax.view_init(elev, azim)

    plt.show()

In [None]:
settings.set_resolution(0.2)

show_directionality(avg_fragment, density_df, 'cluster', settings, V_available)
show_directionality(avg_fragment, density_df, 'new_cluster', settings, V_available)

# Azimuth, elevation test

In [None]:
# ****************************************************************************
# *                               Create data                                *
# ****************************************************************************
np.random.seed(11)
n = 200
x = y = np.linspace(-10, 10, n)
z = np.random.randn(n)*3 + 2


# ****************************************************************************
# *                                 Plot data                                *
# ****************************************************************************
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')

# # If we knew what angles we wanted to set, these lines will set it


# Show the figure, adjust it with the mouse
plt.show()

# Once the figure is closed, the azimutal angle and the elevation will
# be printed. They may be used as input for ax.view_init(elev, azim)
print('ax.azim {}'.format(ax.azim))
print('ax.elev {}'.format(ax.elev))