In [2]:
%matplotlib qt
import numpy as np
import pandas as pd
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
def compute_distance(A, B):
    return np.linalg.norm(A - B)

npy_data = np.load("cropped_porphyrine.npy", allow_pickle=True)

atom_indices = {
    "Fe": 768,
    "N1": 769, "N2": 770, "N3": 771, "N4": 772,
    "S": 773, "H": 774,
    "C1": 775, "C2": 776, "C3": 777, "C4": 779, "C5": 780,
    "C6": 781, "C7": 784, "C8": 785, "C9": 787, "C10": 788,
    "C11": 789, "C12": 792, "C13": 793, "C14": 795, "C15": 796,
    "C16": 797, "C17": 800, "C18": 801, "C19": 803, "C20": 804,
    "O1": 807, "O2": 808
}

angle_defs = {
    "O2-O1-Fe": ("O2", "O1", "Fe"),
    "O1-Fe-N1": ("O1", "Fe", "N1"),
    "O1-Fe-N2": ("O1", "Fe", "N2"),
    "O1-Fe-N3": ("O1", "Fe", "N3"),
    "O1-Fe-N4": ("O1", "Fe", "N4"),
    "O1-Fe-S":  ("O1", "Fe", "S"),
    "Fe-S-H":   ("Fe", "S", "H"),
    "O1-Fe-C1": ("O1", "Fe", "C1"),
}

new_atom_indices = dict(zip(
                            list(atom_indices.keys()),
                            np.array(list(atom_indices.values())).astype(int) - 768
))
distance_defs = {}

for angle_name, (a1, a2, a3) in angle_defs.items():
    for pair in [(a1, a2), (a2, a3)]:
        name = f"{pair[0]}-{pair[1]}"
        reverse_name = f"{pair[1]}-{pair[0]}"
        if name not in distance_defs and reverse_name not in distance_defs:
            distance_defs[name] = pair

all_distances = {name: [] for name in distance_defs}

for frame in npy_data:
    coords = {atom: frame[idx][1:] for atom, idx in new_atom_indices.items()}
    
    for dist_name, (a, b) in distance_defs.items():
        dist = compute_distance(coords[a], coords[b])
        all_distances[dist_name].append(dist)

dist_df = pd.DataFrame(all_distances)
dist_df["frame"] = range(len(dist_df))
long_df = dist_df.melt(id_vars="frame", var_name="distance_type", value_name="distance")

In [4]:
df_scsd = pd.read_csv("scsd_50_porphyrine.csv")

symm_names = ["B1u", "B2u"]
comb_names = ["O2-O1", "O1-Fe", "Fe-S"]

dfs_list = []
for item in symm_names:
    df_bu = df_scsd.query(f"Symm == '{item}'")
    sub_dfs_list = []
    for comb in long_df["distance_type"].unique():
        sub_df = long_df.query(f'distance_type == "{comb}"')
        sub_df["Sum (SCSD, Å)"] = df_bu["Sum (SCSD, Å)"].to_list()
        corr = sub_df["distance"].corr(sub_df["Sum (SCSD, Å)"])
        sub_dfs_list.append(sub_df)

    sub_dfs = pd.concat(sub_dfs_list)
    sub_dfs["Symm"] = item
    dfs_list.append(sub_dfs)
    
df_final = pd.concat(dfs_list)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df["Sum (SCSD, Å)"] = df_bu["Sum (SCSD, Å)"].to_list()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df["Sum (SCSD, Å)"] = df_bu["Sum (SCSD, Å)"].to_list()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df["Sum (SCSD, Å)"] = df_bu["Sum (SCSD, Å)"].to_list()
A value is trying to be set 

In [6]:
df_final.to_csv("/Users/samirabaghbanbari/samira/porphyrin_results/distance.csv")

In [17]:
g = sns.FacetGrid(data=df_final, row="Symm", col="distance_type",
                  col_order=comb_names,
                  sharey=True,
                  sharex=True
                  )
g.map_dataframe(
                sns.regplot,
                x="Sum (SCSD, Å)",
                y="distance"
)
g.savefig("/Users/samirabaghbanbari/samira/porphyrin_results/bond_distance_vs_scsd.pdf")