### Comparison of csms (continuous symmetry measures) both in AFM and FM subset in 90° pm x bond angles

For the definition of csm see ChemEnv publication by Waroquiers et al., 10.1107/S2052520620007994

Please note that non-integer occurrences of csms are possible because we do not include ligand multiplicities in the analysis and not all bond angles of an edge may lie inside the given bond angle range of the analysis.

#### Summary of results
- FM connections often have lower average and median csms than AFM connections around 90° bond angles
- also, significantly more FM connections with csm == 0.0 than AFM


In [1]:
import json
import math
from monty.json import MontyDecoder
import numpy as np
import os
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from pymatgen.core import Element

from utils_kga.statistical_analysis.get_spin_and_bond_angle_statistics import get_bond_angle_interval_statistics, get_bond_angle_occurrences
from utils_kga.general import pretty_plot

In [2]:
def weighted_lower_median(pairs):
    thr = sum([p[1] for p in pairs]) / 2
    pairs.sort(key = lambda p: p[0])
    w_sums = 0
    for a, w in pairs:
        w_sums += w
        if len(pairs) % 2 != 0 and w_sums > thr:
            return a
        elif len(pairs) % 2 == 0 and w_sums >= thr:
            return a

def weighted_avg_and_std(values, weights):
    """
    Adapted from Eric O. Lebigot's answer on stackoverflow
    https://stackoverflow.com/questions/2413522/weighted-standard-deviation-in-numpy
    Return the weighted average and standard deviation.
    values, weights -- NumPy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    variance = np.average((values-average)**2, weights=weights)
    return average, math.sqrt(variance)

In [3]:
# Load edge-df
with open("data/dfs_of_magnetic_edge_information.json") as f:
    dict_all_stats = json.load(f)
all_stats = {key: pd.DataFrame.from_dict(df) for key, df in dict_all_stats.items()}

# For metadata filtering
with open("../../data_retrieval_and_preprocessing_MAGNDATA/data/df_grouped_and_chosen_commensurate_MAGNDATA.json") as f:
    df = json.load(f, cls=MontyDecoder)

plot_dir = "plots/TM_octahedra_analysis_csms"
os.makedirs(plot_dir, exist_ok=True)

In [4]:
# Add is_tm bool for later easier analysis
for ang_df in all_stats.values():
    ang_df["spin_angle"] = ang_df["spin_angle"].apply(lambda x: round(x, 0))
    ang_df["site_is_tm"] = ang_df["site_element"].apply(lambda el: Element(el).is_transition_metal)
    ang_df["site_to_is_tm"] = ang_df["site_to_element"].apply(lambda el: Element(el).is_transition_metal)
    ang_df["ligand_el_set"] = ang_df["ligand_elements"].apply(lambda ls: set(ls))
    
    ang_df.drop(columns=[c for c in ang_df.columns if "clude_ligand" in c], inplace=True)

In [5]:
description = [
    "all edges with TM octahedra at both nodes",
    "all oxygen edges with TM octahedra at both nodes",
]

colors = px.colors.qualitative.Plotly

In [6]:
normalize_string = "absolute occurrences"
for bond_angle_range90 in [[89, 91], [85, 95], [80, 100], [75, 100]]:
    for ligand_multiplicity_bool, ligand_multiplicity_string in zip([False], ["no ligand multiplicity included"]):
        for data_string in description:
            print("------------- bond angle interval (deg): ", bond_angle_range90, "-----------------")
            print(data_string)
            csms = {"FM": [], "AFM": []}
            n_compounds = {"FM": 0, "AFM": 0}
            for md_id, ang_df in all_stats.items():
                subdf = ang_df.loc[(ang_df["site_ce"]=="O:6") 
                       & (ang_df["site_to_ce"]=="O:6")
                       & (ang_df["site_is_tm"]) 
                       & (ang_df["site_to_is_tm"])
                ]
                if "oxygen" in data_string:
                    subdf = subdf.loc[subdf["ligand_el_set"]=={"O"}]
                    
                n_lattice_points = df.at[md_id, "n_lattice_points"]
                
                for mag_string, spin_angle_condition in zip(["FM", "AFM"], [subdf["spin_angle"]<=10.0, subdf["spin_angle"]>=170.0]):
                    magnet_df = subdf.loc[spin_angle_condition]
                    occus = get_bond_angle_interval_statistics(df=magnet_df, include_ligand_multiplicity=ligand_multiplicity_bool,
                                                               analyze_column="site_csm", n_lattice_points=n_lattice_points,
                                                               bond_angle_interval=bond_angle_range90)
                    if occus:
                        csms[mag_string].extend(occus)
                        n_compounds[mag_string] += 1
                    
            one_d_fig = go.Figure(layout=go.Layout(xaxis=go.layout.XAxis(title="csm"),
                                                       yaxis=go.layout.YAxis(title="Occurrence"),
                                                       title=f"{data_string}, {ligand_multiplicity_string}, {normalize_string}, bond angles={bond_angle_range90}, n_compounds={n_compounds}"))
            for mag_idx, mag_string in enumerate(["FM", "AFM"]):
                mag_df = pd.DataFrame(columns=["site_csm", "occurrence"], data=csms[mag_string])
                perfect_octs = mag_df.loc[mag_df["site_csm"]<0.02]["occurrence"].values.sum()
                all_octs = mag_df["occurrence"].values.sum()
                print(f"{mag_string}: perfect octahedra percentage: {perfect_octs/all_octs} ({perfect_octs} out of {all_octs} with n_structures: ", n_compounds[mag_string])
                av = np.average(mag_df["site_csm"], weights=mag_df["occurrence"])
                median = weighted_lower_median(csms[mag_string])
                print(f"{mag_string} csm av: {av}")
                print(f"{mag_string} csm lower median: {median}")
                one_d_fig.add_trace(go.Histogram(
                    histfunc="sum",
                    x=mag_df["site_csm"].values,
                    y=mag_df["occurrence"].values,
                    name=mag_string,
                    marker_color= colors[mag_idx]
                ))
                for stat, stat_string in zip([av, median], ["av", "median"]):
                    one_d_fig.add_trace(go.Scatter(
                        x=[stat, stat],
                        y=[0, 100],
                        mode="lines",
                        marker_color=colors[mag_idx+2],
                        name=mag_string + f" {stat_string} ({round(stat, 2)})"
                    ))
            one_d_fig = pretty_plot(one_d_fig)
            one_d_fig.update_layout(title=dict(font=dict(size=10)))
            one_d_fig.write_html(os.path.join(plot_dir, f"csms_{data_string}_{bond_angle_range90[0]}-{bond_angle_range90[1]}-deg_bond_angles.html"))

------------- bond angle interval (deg):  [89, 91] -----------------
all edges with TM octahedra at both nodes
FM: perfect octahedra percentage: 0.6460674157303371 (230.0 out of 356.0 with n_structures:  28
FM csm av: 0.19696629213483147
FM csm lower median: 0.01
AFM: perfect octahedra percentage: 0.4285714285714286 (130.0 out of 303.3333333333333 with n_structures:  30
AFM csm av: 0.3625164835164835
AFM csm lower median: 0.33
------------- bond angle interval (deg):  [89, 91] -----------------
all oxygen edges with TM octahedra at both nodes
FM: perfect octahedra percentage: 0.46302250803858525 (96.0 out of 207.33333333333331 with n_structures:  16
FM csm av: 0.29308681672025727
FM csm lower median: 0.2
AFM: perfect octahedra percentage: 0.23076923076923078 (48.0 out of 208.0 with n_structures:  20
AFM csm av: 0.42600961538461546
AFM csm lower median: 0.34
------------- bond angle interval (deg):  [85, 95] -----------------
all edges with TM octahedra at both nodes
FM: perfect octahed

In [8]:
# What is std of distribution of lower bond angles in FM and AFM interactions?
for normalize_bool, normalize_string in zip([False], ["absolute occurrences"]):
    for ligand_multiplicity_bool, ligand_multiplicity_string in zip([False], ["no ligand multiplicity included"]):
        for data_string in description[:1]:
            all_spin_occus = []
            for md_id, ang_df in all_stats.items():
                subdf = ang_df.loc[(ang_df["site_ce"]=="O:6")
                       & (ang_df["site_to_ce"]=="O:6")
                       & (ang_df["site_is_tm"])
                       & (ang_df["site_to_is_tm"])
                ]
                if "oxygen" in data_string:
                    subdf = subdf.loc[subdf["ligand_el_set"]=={"O"}]
                if not subdf.empty:
                    n_lattice_points = df.at[md_id, "n_lattice_points"]
                    occus = get_bond_angle_occurrences(df=subdf,
                                                                     include_ligand_multiplicity=ligand_multiplicity_bool,
                                                                     normalize=normalize_bool,
                                                                     n_lattice_points=n_lattice_points,
                                                                     spin_angle_round=0,
                                                                     bond_angle_round=7)
                    all_spin_occus.extend(occus)
            print("------")
            print(data_string, normalize_string, ligand_multiplicity_string)
            all_spin_occus_df = pd.DataFrame(columns=["spin_angle", "bond_angle", "occurrence"], data=all_spin_occus)

            all_spin_occus_df = all_spin_occus_df.loc[(all_spin_occus_df["bond_angle"] <= 105) & (all_spin_occus_df["bond_angle"] >= 75)]
            fm_occus = all_spin_occus_df.loc[all_spin_occus_df["spin_angle"] <= 10]
            afm_occus = all_spin_occus_df.loc[all_spin_occus_df["spin_angle"] >= 170]

            # Zoomed-in bond angle plot
            for magdf, magstring in zip([afm_occus, fm_occus], ["AFM", "FM"]):
                one_d_fig = go.Figure(layout=go.Layout(xaxis=go.layout.XAxis(title="Bond angle (°)"),
                                                           yaxis=go.layout.YAxis(title="Occurrence"),
                                                           title=f"{data_string}, {ligand_multiplicity_string}, {normalize_string}, approx. {magstring}"))
                one_d_fig.add_trace(go.Histogram(
                    histfunc="sum",
                    x=magdf["bond_angle"].values,
                    y=magdf["occurrence"].values,
                    xbins=dict(
                      start=75.5,
                      end=104.5,
                      size=1.0), # ensure binning with centers at integer bond angle values
                    autobinx=False,
                    showlegend=False,
                    marker_color="#025268"
                ))
                one_d_fig.add_trace(go.Scatter(
                    x=[90, 90],
                    y=[0, 295],
                    mode="lines",
                    marker_color="#B61F1C",
                    showlegend=False
                ))
                one_d_fig = pretty_plot(one_d_fig)
                one_d_fig.update_layout(title=dict(font=dict(size=10)))

                one_d_fig.update_layout(yaxis_range=[0, 295])
                one_d_fig.update_yaxes(zeroline=False)
                one_d_fig.update_layout(autosize=False, width=500, height=500)
                one_d_fig.show()
                one_d_fig.write_image(os.path.join(plot_dir, f"bond_angles_close_to_90_deg_{data_string}_{normalize_string}_{ligand_multiplicity_string}_{magstring}_10_deg_tol.pdf"))

            for o in [afm_occus, fm_occus]:
                print(weighted_avg_and_std(o["bond_angle"].values, o["occurrence"].values))

------
all edges with TM octahedra at both nodes absolute occurrences no ligand multiplicity included


(93.4299759351284, 6.2423391702135875)
(93.141345930696, 5.745154483183653)
