In [1]:
import pysipfenn
from pysipfenn import Calculator     # The only thing needed for calculations
from pprint import pprint            # pretty printing
from collections import defaultdict  # convenience in the example
import numpy as np
import os
import glob
import pandas as pd
import shutil
import matplotlib.pyplot as plt
from pymatgen.entries.computed_entries import ComputedEntry
from pymatgen.analysis.phase_diagram import PhaseDiagram, PDPlotter
from pymatgen.core.composition import Composition

  from .autonotebook import tqdm as notebook_tqdm


## Donwload and load the models

In [2]:
#c.downloadModels() # download models from the server
c = Calculator() # load models from the local cache

✔ SIPFENN_Krajewski2020 Standard Materials Model
✔ SIPFENN_Krajewski2020 Novel Materials Model
✔ SIPFENN_Krajewski2020 Light Model
✔ SIPFENN_Krajewski2022 KS2022 Novel Materials Model
Loading models:


100%|██████████| 4/4 [00:24<00:00,  6.11s/it]

*********  PySIPFENN Successfully Initialized  **********





## Define the POSCAR directory name

In [3]:
# Endmembers directory name
endmembers = "Al3Ni2_endmembers"

# dilute directory name
dilute = "Al3Ni2_dilute"

#SQS directory name
SQS = "Al3Ni_SQS"

## Run and save the pysipfenn results in csv

In [4]:
#pysipfenn endmembers results   
c.runFromDirectory(directory=endmembers,
                   descriptor='KS2022', # descriptor to use may try other model descriptor "Ward2017"
                   mode='serial');
c.writeResultsToCSV(endmembers+'_pySIPFENN.csv')

Importing structures...


100%|██████████| 27/27 [00:00<00:00, 112.00it/s]



Models that will be run: ['SIPFENN_Krajewski2022_NN30']
Calculating descriptors...


100%|██████████| 27/27 [00:04<00:00,  5.65it/s]


Done!
Making predictions...
Obtained predictions from:  SIPFENN_Krajewski2022_NN30
Done!


In [5]:
#pysipfenn dilute results   
c.runFromDirectory(directory=dilute,
                   descriptor='KS2022',
                   mode='serial');
c.writeResultsToCSV(dilute+'_pySIPFENN.csv')

Importing structures...


100%|██████████| 162/162 [00:00<00:00, 2290.81it/s]



Models that will be run: ['SIPFENN_Krajewski2022_NN30']
Calculating descriptors...


100%|██████████| 162/162 [00:33<00:00,  4.78it/s]


Done!
Making predictions...
Obtained predictions from:  SIPFENN_Krajewski2022_NN30
Done!


## Define the concentration and process POSCAR file function

In [6]:
# Function to parse POSCAR and extract atomic counts
def extract_atom_counts(poscar_path):
    with open(poscar_path, 'r') as file:
        lines = file.readlines()
        atom_types = lines[5].split()  # This line contains the atomic species
        atom_counts = list(map(int, lines[6].split()))  # This line contains their respective counts
    return dict(zip(atom_types, atom_counts))

# Function to calculate concentrations of all elements
def calculate_concentrations(atom_counts):
    total_atoms = sum(atom_counts.values())
    concentrations = {element: (count / total_atoms) * 100 for element, count in atom_counts.items()}
    return concentrations

# Function to process all POSCAR files in given directories and save results to CSV
def process_poscar_files_to_csv(directories, output_csv):
    # Create a list to store the data for the CSV
    data = []
    
    for directory in directories:
        # Verify the directory exists
        if not os.path.exists(directory):
            print(f"Directory not found: {directory}")
            continue
        
        # Find all files ending with .POSCAR in the directory
        poscar_files = glob.glob(os.path.join(directory, '*.POSCAR'))
        
        # Check if any .POSCAR files were found
        if not poscar_files:
            print(f"No .POSCAR files found in directory: {directory}")
            continue
        
        for poscar_path in poscar_files:
            try:
                atom_counts = extract_atom_counts(poscar_path)
                concentrations = calculate_concentrations(atom_counts)
                
                # Extract concentrations for Al, Fe, and Ni (default to 0 if not present)
                al_concentration = concentrations.get('Al', 0)
                fe_concentration = concentrations.get('Fe', 0)
                ni_concentration = concentrations.get('Ni', 0)
                
                # Get the filename and append the data
                filename = os.path.basename(poscar_path)
                data.append([filename, al_concentration, fe_concentration, ni_concentration])
            
            except Exception as e:
                print(f"Error processing file {poscar_path}: {e}")
    
    # Create a DataFrame from the collected data
    df = pd.DataFrame(data, columns=['Name', 'Al Concentration (%)', 'Fe Concentration (%)', 'Ni Concentration (%)'])
    
    # Save to CSV
    df.to_csv(output_csv, index=False)
    return df


## Cleaned format of CSV

In [7]:
# Example usage
directories = [endmembers,dilute ]  # Add all your directories here
output_csv = 'poscar_concentrations_updated.csv'
df_result = process_poscar_files_to_csv(directories, output_csv)


# Load the three CSV files
poscar_df = pd.read_csv(output_csv)
endmembers_df = pd.read_csv(endmembers+'_pySIPFENN.csv')
dilute_df = pd.read_csv(dilute+'_pySIPFENN.csv')

# Strip any whitespace from column headers to avoid mismatches
poscar_df.columns = poscar_df.columns.str.strip()
endmembers_df.columns = endmembers_df.columns.str.strip()
dilute_df.columns = dilute_df.columns.str.strip()

# Merge the endmembers_df into poscar_df based on the 'Name' column
merged_df = poscar_df.merge(endmembers_df[['Name', 'SIPFENN_Krajewski2022_NN30']], on='Name', how='left')

# Merge the dilute_df into the already merged dataframe
# Suffixes to differentiate the columns, also could be SQS
merged_df = merged_df.merge(dilute_df[['Name', 'SIPFENN_Krajewski2022_NN30']], on='Name', how='left', suffixes=('_endmembers', '_dilute'))  

# You can choose how to handle the two different columns from endmembers and dilute data, 
# here I'm just choosing to replace the values in 'Formation_H' from the first merge
merged_df['Formation_H'] = merged_df['SIPFENN_Krajewski2022_NN30_endmembers'].combine_first(merged_df['SIPFENN_Krajewski2022_NN30_dilute'])

# Save the final merged DataFrame
merged_df.to_csv('poscar_concentrations_updated_with_Formation_H.csv', index=False)



## Ploting

In [8]:
# Load the data
merged_df = pd.read_csv('poscar_concentrations_updated_with_Formation_H.csv')

# Assuming the data contains 'Name', 'H_formation_column', and the concentration columns for elements
name_column = merged_df['Name']  # Replace with the actual column name if different
H_formation_column = 'Formation_H'  # Replace with the actual column name if different
element_columns = [col for col in merged_df.columns if 'Concentration (%)' in col]

# Create a list of ComputedEntry objects for Phase Diagram
entries = []

for _, row in merged_df.iterrows():
    # Create a composition dictionary for the current entry
    comp_dict = {element.split(' ')[0]: row[element] / 100 for element in element_columns if pd.notna(row[element])}
    
    # Create the Composition object
    composition = Composition.from_dict(comp_dict)
    
    # Get the formation energy value
    formation_energy = row[H_formation_column]
    
    # Create a ComputedEntry object
    entry = ComputedEntry(composition, formation_energy)
    entries.append(entry)



# Create the phase diagram from the entries
PD = PhaseDiagram(entries)

# Plot the phase diagram using PDPlotter
plotter = PDPlotter(PD, show_unstable=True)

# Display the plot
plotter.get_plot().show(dpi=300)


## Get the POSCARS on the convex hull 

In [9]:
# Load your CSV file
file_path = 'poscar_concentrations_updated_with_Formation_H.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Extract relevant columns
composition_columns = ['Al Concentration (%)', 'Fe Concentration (%)', 'Ni Concentration (%)']
formation_energy_column = 'Formation_H'

# Prepare the entries for constructing the phase diagram
entries = []
for idx, row in data.iterrows():
    # Create the composition from concentration percentages
    composition_dict = {'Al': row['Al Concentration (%)'], 
                        'Fe': row['Fe Concentration (%)'], 
                        'Ni': row['Ni Concentration (%)']}
    # Normalize to pymatgen's expected format (fractional)
    total = sum(composition_dict.values())
    composition = Composition({el: amount / total for el, amount in composition_dict.items() if amount > 0})
    
    # Get the formation enthalpy
    formation_energy = row[formation_energy_column]
    
    # Create ComputedEntry
    entry = ComputedEntry(composition, formation_energy)
    entries.append(entry)

# Construct the PhaseDiagram
phase_diagram = PhaseDiagram(entries)

# Extract the points on the convex hull
hull_entries = phase_diagram.stable_entries

# Extract data to report
hull_data = [{'Name': data.loc[data.index == idx, 'Name'].values[0], 
              'Composition': str(entry.composition), 
              'Formation Energy': entry.energy_per_atom} 
             for idx, entry in enumerate(entries) if entry in hull_entries]

# Create a DataFrame for easy viewing
hull_df = pd.DataFrame(hull_data)
sorted_data = hull_df.sort_values(by='Formation Energy', ascending=True)
# Display the convex hull points
print(sorted_data)
sorted_data.to_csv('sorted_data_by_Formation_H.csv', index=False)


                           Name  Composition  Formation Energy
17     149_Al3Ni2_dilute.POSCAR  Al0.6 Ni0.4         -0.645543
16     110_Al3Ni2_dilute.POSCAR  Al0.6 Ni0.4         -0.645543
3   25_Al3Ni2_endmembers.POSCAR  Al0.4 Ni0.6         -0.465003
0    2_Al3Ni2_endmembers.POSCAR  Al0.6 Fe0.4         -0.328355
6       63_Al3Ni2_dilute.POSCAR  Al0.6 Fe0.4         -0.328355
10     119_Al3Ni2_dilute.POSCAR  Al0.6 Fe0.4         -0.328355
11      23_Al3Ni2_dilute.POSCAR  Al0.8 Fe0.2         -0.272587
18       3_Al3Ni2_dilute.POSCAR  Al0.8 Fe0.2         -0.272587
15     122_Al3Ni2_dilute.POSCAR  Al0.2 Ni0.8         -0.242845
5      159_Al3Ni2_dilute.POSCAR  Al0.2 Ni0.8         -0.242845
13      61_Al3Ni2_dilute.POSCAR  Al0.2 Fe0.8          0.014667
19      81_Al3Ni2_dilute.POSCAR  Al0.2 Fe0.8          0.014667
12      59_Al3Ni2_dilute.POSCAR          Al1          0.037190
2    1_Al3Ni2_endmembers.POSCAR          Al1          0.037190
8      113_Al3Ni2_dilute.POSCAR          Al1          0

## Save the POSCARs on the convex hull in the directory

In [10]:
# Load the CSV file
file_path = 'sorted_data_by_Formation_H.csv'
df = pd.read_csv(file_path)

# Define source directories
source_folder_endmembers = endmembers
source_folder_dilute = dilute
# Define the destination directory
destination_folder = 'POSCAR_on_the_convex_hull'

# Ensure the destination folder exists, if not, create it
os.makedirs(destination_folder, exist_ok=True)

# Iterate over the dataframe to copy files
for index, row in df.iterrows():
    file_name = row['Name']
    
    # Determine the source folder based on the name pattern in the CSV
    if endmembers in file_name:
        source_path = os.path.join(source_folder_endmembers, file_name)
    elif dilute in file_name:
        source_path = os.path.join(source_folder_dilute, file_name)
    else:
        continue  # Skip if the path doesn't match the expected folders
    
    # Define the destination path
    destination_path = os.path.join(destination_folder, file_name)
    
    # Copy the file if it exists
    if os.path.exists(source_path):
        shutil.copyfile(source_path, destination_path)
    else:
        print(f"Warning: Source file {source_path} does not exist.")

print("POSCARs saved in the directory.")


POSCARs saved in the directory.
