## Goal: Analyze the correlations between the factors we have created and the anisotropy matrix generated from the polarized spectra 

In [5]:
import os
import pickle

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pathlib import Path

# Standardizing Plots
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['axes.titlesize'] = 25
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['legend.fontsize'] = 15
plt.rcParams['legend.loc'] = 'upper left'
plt.rcParams['legend.fancybox'] = True
plt.rcParams['figure.figsize'] = (10, 6)

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Times New Roman'
plt.rcParams['mathtext.fontset'] = 'custom'
plt.rcParams['mathtext.rm'] = 'Times New Roman'

### Data Exploration

In [42]:
factor_dict_dir_path = Path('..\\Data\\factor_dictionary\\')
os.listdir(factor_dict_dir_path)

['CoS2_factor_dictionary.txt',
 'Cr203_factor_dictionary.txt',
 'CrPbO4_factor_dictionary.txt',
 'Fe2O3_factor_dictionary.txt',
 'LiMnP_factor_dictionary.txt',
 'MnO_factor_dictionary.txt',
 'NiO_factor_dictionary.txt',
 'Sr2CuO3_factor_dictionary.txt',
 'TmNiC2_factor_dictionary.txt',
 'V2O5_factor_dictionary.txt',
 'Y2TiO5_factor_dictionary.txt',
 'ZnS_factor_dictionary.txt']

In [43]:
# CoS2 Factor Dictionaries

with open(factor_dict_dir_path / 'CoS2_factor_dictionary.txt', 'rb') as file:
    CoS2 = pickle.load(file)
print(CoS2.keys())

for key, val in CoS2.items():
    print(key, val)

dict_keys(['Material', 'Steinhart Vector', 'Steinhart Parameter Sum', 'band_gap', 'oxidation_state', 'possible_species', 'density', 'quadrupole moment', 'quadrupole moment normalized'])
Material CoS2
Steinhart Vector ([0.026923139384568175, 3.562684099915903e-18, 0.001815416798598674, 6.6787601789609585e-18, 0.020362629920496835, 1.2736755761031287e-17, 0.010238640661916671, 1.2816402922153213e-17, 0.018779294740504375, 2.1917955476639138e-17, 0.012299585387824725], 'CoS2')
Steinhart Parameter Sum 0.09041870689390952
band_gap 0.0
oxidation_state 4
possible_species ['Co4+', 'S2-']
density 4.849095401441955
quadrupole moment [[ 3.55271368e-15  4.36054258e+00  4.36054258e+00]
 [ 4.36054258e+00 -1.06581410e-14  4.36054258e+00]
 [ 4.36054258e+00  4.36054258e+00  7.10542736e-15]]
quadrupole moment normalized [[ 2.22044605e-16  2.72533911e-01  2.72533911e-01]
 [ 2.72533911e-01 -6.66133815e-16  2.72533911e-01]
 [ 2.72533911e-01  2.72533911e-01  4.44089210e-16]]


In [44]:
anisotropy_matrix_path = Path("..\\Data\\Wed1\\anisotropy_data.csv")
anisotropy_matrix = pd.read_csv(anisotropy_matrix_path)
anisotropy_matrix.set_index('parent_dir', inplace=True)
anisotropy_matrix.index = [name.split('_')[0] for name in anisotropy_matrix.index]
anisotropy_matrix.index.name = 'Material'
print(anisotropy_matrix.head())

          m00       m01       m02       m10  m11       m12       m20  \
Material                                                               
Y2TiO5    0.0  0.432346  0.428979  0.432346  0.0  0.239362  0.428979   
V2O5      0.0  0.123250  0.436309  0.123250  0.0  0.511185  0.436309   
Cr2O3     0.0  0.000008  0.112632  0.000008  0.0  0.112626  0.112632   
CrPbO4    0.0  0.127756  0.066659  0.127756  0.0  0.097652  0.066659   
LiMnP     0.0  0.000001  0.208893  0.000001  0.0  0.208893  0.208893   

               m21  m22  
Material                 
Y2TiO5    0.239362  0.0  
V2O5      0.511185  0.0  
Cr2O3     0.112626  0.0  
CrPbO4    0.097652  0.0  
LiMnP     0.208893  0.0  


#### Material Dictionary Formatting

In [45]:
data_list = []

for file_path in factor_dict_dir_path.glob('*.txt'):
    with open(file_path, 'rb') as file:
        material_dict = pickle.load(file)

        # Extract individual values, keeping only numerical stuff
        material = material_dict['Material']
        steinhart_vector = material_dict['Steinhart Vector'][0]  # Just the vector, ignoring the material name
        steinhart_param_sum = material_dict['Steinhart Parameter Sum']
        band_gap = material_dict['band_gap']
        oxidation_state = material_dict['oxidation_state']
        possible_species = len(material_dict['possible_species'])
        density = material_dict['density']
        quadrupole_moment = material_dict['quadrupole moment'].flatten()  # Flatten matrix to vector
        quadrupole_moment_norm = material_dict['quadrupole moment normalized'].flatten()

        # Combine all the features into a single row (list)
        data_row = [
            material,
            *steinhart_vector,  # Unpack vector values
            steinhart_param_sum,
            band_gap,
            oxidation_state,
            possible_species,
            density,
            *quadrupole_moment,  # Unpack matrix values
            *quadrupole_moment_norm  # Unpack normalized matrix values
        ]
        
        data_list.append(data_row)

# Define the column names
columns = ['Material'] + [f"ST {i}" for i in range(11)] # Steinhart vector values from 0 to 10
columns += ['Steinhart Parameter Sum', 'band_gap', 'oxidation_state', 'possible_species', 'density']
columns += [f'QM {i}' for i in range(9)] # Flattened quadrupole matrix
columns += [f'QM Norm {i}' for i in range(9)] # Flattened quadrupole matrix (normalized)

print(columns)

factor_df = pd.DataFrame(data_list, columns=columns)
factor_df.set_index('Material', inplace=True)

print(factor_df)

['Material', 'ST 0', 'ST 1', 'ST 2', 'ST 3', 'ST 4', 'ST 5', 'ST 6', 'ST 7', 'ST 8', 'ST 9', 'ST 10', 'Steinhart Parameter Sum', 'band_gap', 'oxidation_state', 'possible_species', 'density', 'QM 0', 'QM 1', 'QM 2', 'QM 3', 'QM 4', 'QM 5', 'QM 6', 'QM 7', 'QM 8', 'QM Norm 0', 'QM Norm 1', 'QM Norm 2', 'QM Norm 3', 'QM Norm 4', 'QM Norm 5', 'QM Norm 6', 'QM Norm 7', 'QM Norm 8']
              ST 0          ST 1          ST 2          ST 3      ST 4  \
Material                                                                 
CoS2      0.026923  3.562684e-18  1.815417e-03  6.678760e-18  0.020363   
Cr2O3     0.062571  6.051352e-03  7.456023e-04  1.018819e-02  0.043804   
CrPbO4    0.075711  1.022337e-03  3.375536e-03  5.624809e-02  0.038469   
Fe2O3     0.063556  2.752807e-02  1.389350e-02  4.646688e-02  0.043597   
LiMnP     0.029022  6.932334e-18  1.795132e-03  2.151267e-02  0.014701   
MnO       0.056633  9.503577e-04  5.734463e-04  2.389813e-03  0.043057   
NiO       0.059718  2.523857

### Analyze correlations using Ordinary Least Squares (OLS)

#### Factor contributions

I'm going to start with only the un-normalized quadrupole moments

#### Anisotropy matrix formatting

I'm going to simplify the anisotropy matrix down to its base value by just summing all the values to get only a single 'anisotropy parameter' that I will be trying to predict. Later I'll look at the 3 off diagonal values independently.

In [53]:
import statsmodels.api as sm

# Define the independent variables (X) and dependent variable (y)
X = factor_df[[f'QM {i}' for i in range(9)]] # Only use un-normalized quadrupole moment
y = anisotropy_matrix.sum(axis=1)
y = y.reindex(X.index)

print(X.columns)

# Add a constant to the independent variables (intercept term)
X = sm.add_constant(X)

model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

Index(['QM 0', 'QM 1', 'QM 2', 'QM 3', 'QM 4', 'QM 5', 'QM 6', 'QM 7', 'QM 8'], dtype='object')
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.938
Model:                            OLS   Adj. R-squared:                  0.886
Method:                 Least Squares   F-statistic:                     18.05
Date:                Sun, 20 Oct 2024   Prob (F-statistic):            0.00148
Time:                        17:43:54   Log-Likelihood:                -1.8210
No. Observations:                  12   AIC:                             15.64
Df Residuals:                       6   BIC:                             18.55
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------

