# DRNets phase diagrams

In [1]:
import os, subprocess
import numpy as np
import xarray as xr
import glob as glob
import matplotlib as mpl
from pathlib import Path

mpl.rcParams['mathtext.default'] = 'regular'

%matplotlib widget

# Function for assembling Xarray dataset
1) Get elemental composition (e.g., CAl, Cli, CFe) from instance file
    - These will be coordinates
2) Get phases (e.g., BName1, Bname2,...), phase weights, and mixed diffraction patterns from solution file
    - These should be data variables
    - np.tile used to account for difference in array shapes

In [2]:
def get_repo_root() -> Path:
    # 1) If running inside a Git clone, ask Git
    try:
        root = subprocess.check_output(
            ["git", "rev-parse", "--show-toplevel"], text=True
        ).strip()
        return Path(root)
    except Exception:
        pass
    # 2) Optional: allow an override via env var
    if os.getenv("PROJECT_ROOT"):
        return Path(os.environ["PROJECT_ROOT"]).expanduser().resolve()
    # 3) Fallback: current working directory
    return Path.cwd()

In [3]:
# Creates a phase diagram dataset from drnets files
def create_drnets_ds(path, inst_file, solu_file):
    #
    # Instance file reading, data organization
    #

    # Read instance file
    with open(os.path.join(path,inst_file), 'r') as file:
        lines = file.readlines()

    # Parse metadata
    n_points_line = next(line for line in lines if line.startswith("N="))
    n_points = int(n_points_line.split('=')[1].strip())

    composition_line = next(line for line in lines if line.startswith("Composition="))
    composition_keywords = composition_line.split('=')[1].strip().split(',') # split on =, grab 2nd element, strip whitespace, split on commas

    elements_line = next(line for line in lines if line.startswith("Elements="))
    elements = elements_line.split('=')[1].strip().split(',') # split on =, grab 2nd element, strip whitespace, split on commas

    # Error checking composition and element lines
    if len(composition_keywords) != len(elements):
        raise ValueError(f'Length of composition keywords (currently {len(composition_keywords)}) and length of elements (currently {len(elements)}) must be equal')

    for element, keyword, in zip(elements, composition_keywords):
        if element not in keyword:
            raise ValueError(f'{element} not in {keyword} - elements and composition_keywords may be out of sync')

    # Getting elemental compositions
    compositions_dict = {element: [] for element in elements}
    for element, keyword in zip(elements, composition_keywords):
        comp_line = next(line for line in lines if line.startswith(keyword))
        compositions_dict[element] = list(map(float, comp_line.split('=')[1].strip().split(','))) # split on =, grab 2nd element, strip whitespace, split on commas, map values to floats and make a list

    compositions = np.array(list(compositions_dict.values())).T

    # Parse Q values
    q_line = next(line for line in lines if line.startswith("Q="))
    q_values = np.array(list(map(float, q_line.split('=')[1].strip().split(',')))) # split on =, grab 2nd element, strip whitespace, split on commas, map values to floats and make a list

    # Extract intensity data
    xrd_data = []
    for num in range(1, n_points + 1):
        for line in lines:
            if line.startswith(f"I{num}="):
                intensity_array = np.array(list(map(float, line.split('=')[1].split(','))))
                xrd_data.append((q_values,intensity_array))

    xrd_array = np.array(xrd_data)

    # Error checking on xrd_array for shape consistency
    if xrd_array.shape[0] != n_points:
        raise ValueError(f"xrd_array.shape[0] (length of xrd_array) must be equal to n_points")

    if xrd_array.shape[1] != 2:
        raise ValueError(f"xrd_array.shape[1] must be 2 for a tuple of Q and I arrays (Q, I)")

    if xrd_array.shape[2] != len(q_values):
        raise ValueError('xrd_array.shape[2] (length of xrd_array elements) must be equal to the length of q_values')

    # Create 2D array of elements to match compositions shape / make indexing consistent
    element_coords = np.tile(elements, (n_points, 1))
    if element_coords.shape != compositions.shape:
        raise ValueError(f'Shape of element_coords ({element_coords.shape}) and compositions ({compositions.shape}) must be equivalent')

    #
    # Solution file reading, data organization
    #

    # Read solution file
    with open(os.path.join(path,solu_file), 'r') as file:
        lines = file.readlines()

    # Parse metadata
    n_phases_line = next(line for line in lines if line.startswith("K="))
    n_phases = int(n_phases_line.split('=')[1].strip())

    # Extract phase names
    phase_names_list = []
    for num in range(1, n_phases + 1):
        for line in lines:
            if line.startswith(f"BName{num}="):
                phase_names = line.split('=')[1].replace('\n','')
                phase_names_list.append(phase_names)

    phase_names = np.array(phase_names_list)

    # Extract phase concentrations (weights) - uses n_points from inst file
    phase_concs_list = []
    for num in range(1, n_points + 1):
        for line in lines:
            if line.startswith(f"C{num}="):
                phase_concs = np.array(list(map(float, line.split('=')[1].split(','))))
                phase_concs_list.append(phase_concs)

    phase_concentrations = np.array(phase_concs_list)

    # Create 2D array of phase names for consistency in array shapes
    phase_names_coords = np.tile(phase_names, (n_points, 1))
    if phase_names_coords.shape != phase_concentrations.shape:
        raise ValueError(f'Shape of phase_names_coords ({phase_names_coords.shape}) and phase_concentrations ({phase_concentrations.shape}) must be equivalent')

    #
    # Assembling dataset
    #
    ds = xr.Dataset(
        data_vars={
            "iq": (["index","tuple_index", 'q_points'], xrd_array),
        },
        coords={
            "elements": (["index","element"], element_coords),
            "element_weights": (["index","composition"], compositions),
            "q_points": q_values,
            "phase_names": (["index", "name"], phase_names_coords),
            "phase_weights": (["index", "weights"], phase_concentrations)
        }
    )

    # Adding attributes
    ds.attrs['Elements'] = elements
    ds.attrs['Phases'] = phase_names_list
    ds.attrs['Number of elements'] = len(elements)
    ds.attrs['Number of phases'] = n_phases
    ds.attrs['Number of points'] = n_points
    ds.attrs['Instance file'] = inst_file
    ds.attrs['Solution file'] = solu_file

    return ds

## Al-Li-Fe phase diagram dataset

In [4]:
# Al-Li-Fe dataset
root_dir = get_repo_root()
data_dir = root_dir / "Data" / "DRNets" / "AlLiFe"
phasediagram_dir = root_dir / "Data" / "phasediagram_datasets"
inst_file = 'Al-Li-Fe_inst_Q650.txt'
solu_file = 'Al-Li-Fe_sol_Q650.txt'
ds_AlLiFe = create_drnets_ds(data_dir, inst_file, solu_file)
ds_AlLiFe.to_netcdf(os.path.join(phasediagram_dir, 'Al-Li-Fe_dataset.nc'))

In [5]:
# Al-Li-Fe dataset
ds_AlLiFe

## Bi-Cu-V dataset

In [7]:
# Bi-Cu-V dataset
root_dir = get_repo_root()
data_dir = root_dir / "Data" / "DRNets" / "BiCuV"
phasediagram_dir = root_dir / "Data" / "phasediagram_datasets"
inst_file = 'Bi-Cu-V307_inst_Q300.txt'
solu_file = 'Bi-Cu-V307_sol_Q300.txt'
ds_BiCuV = create_drnets_ds(data_dir, inst_file, solu_file)
ds_BiCuV.to_netcdf(os.path.join(phasediagram_dir, 'Bi-Cu-V_dataset.nc'))

In [8]:
# Bi-Cu-V dataset
ds_BiCuV

## Li-Sr-Al dataset

In [9]:
#Li-Sr-Al dataset
root_dir = get_repo_root()
data_dir = root_dir / "Data" / "DRNets" / "LiSrAl"
phasediagram_dir = root_dir / "Data" / "phasediagram_datasets"
inst_file = 'Li-Sr-Al_inst_Q4501.txt'
solu_file = 'Li-Sr-Al_sol_Q4501.txt'
ds_LiSrAl = create_drnets_ds(data_dir, inst_file, solu_file)
ds_LiSrAl.to_netcdf(os.path.join(phasediagram_dir, 'Li-Sr-Al_dataset.nc'))

In [10]:
# Li-Sr-Al dataset
ds_LiSrAl