In [2]:
import pandas as pd
from pymatgen.core import Composition, Element
import matplotlib.pyplot as plt

In [3]:
# Load the CSV file containing formulas
df = pd.read_csv('../../data/d4_screening_2D_materials/top_10.csv')

In [4]:
# Function to get elements from a formula
def get_elements(formula):
    comp = Composition(formula)
    return [str(el) for el in comp.elements]

# Function to classify elements as metal or non-metal
def classify_elements(elements):
    metals = [el.symbol for el in Element if el.is_metal]
    non_metals = [el.symbol for el in Element if not el.is_metal and el.symbol != "H"]
    classification = []
    for el in elements:
        if el in metals:
            classification.append(f'{el} - Metal')
        elif el in non_metals:
            classification.append(f'{el} - Non-Metal')
        else:
            classification.append(f'{el} - Unknown')
    return classification

# Function to calculate electronegativity of elements in a formula
def calculate_electronegativity(elements):
    electronegativities = []
    for element_str in elements:
        # Get the element object from pymatgen
        element = Element(element_str)
        # Get the electronegativity of the element
        electronegativity = element.X
        electronegativities.append(electronegativity)
    return electronegativities

In [5]:
# Apply the functions to the DataFrame
df['Elements'] = df['Formula'].apply(get_elements)
df['Metal_NonMetal'] = df['Elements'].apply(classify_elements)
df['Electronegativity'] = df['Elements'].apply(calculate_electronegativity)
df['Composition'] = df['Formula'].apply(lambda x: Composition(x))
df['Num_Elements'] = df['Composition'].apply(lambda x: len(x.elements))  # Number of elements
df['Atomic_Composition'] = df['Composition'].apply(lambda x: x.get_el_amt_dict())  # Atomic composition

In [6]:
df.head(20)

Unnamed: 0,Formula,2D_probability,Elements,Metal_NonMetal,Electronegativity,Composition,Num_Elements,Atomic_Composition
0,XeBiRe3,0.932754,"[Xe, Bi, Re]","[Xe - Non-Metal, Bi - Metal, Re - Metal]","[2.6, 2.02, 1.9]","(Xe, Bi, Re)",3,"{'Xe': 1.0, 'Bi': 1.0, 'Re': 3.0}"
1,FeNiWCl2Ag2,0.92768,"[Fe, Ni, W, Cl, Ag]","[Fe - Metal, Ni - Metal, W - Metal, Cl - Non-M...","[1.83, 1.91, 2.36, 3.16, 1.93]","(Fe, Ni, W, Cl, Ag)",5,"{'Fe': 1.0, 'Ni': 1.0, 'W': 1.0, 'Cl': 2.0, 'A..."
2,RhBr3Re6,0.925749,"[Rh, Br, Re]","[Rh - Metal, Br - Non-Metal, Re - Metal]","[2.28, 2.96, 1.9]","(Rh, Br, Re)",3,"{'Rh': 1.0, 'Br': 3.0, 'Re': 6.0}"
3,Cl2Os5,0.924434,"[Cl, Os]","[Cl - Non-Metal, Os - Metal]","[3.16, 2.2]","(Cl, Os)",2,"{'Cl': 2.0, 'Os': 5.0}"
4,IrI2Ta5,0.919987,"[Ir, I, Ta]","[Ir - Metal, I - Non-Metal, Ta - Metal]","[2.2, 2.66, 1.5]","(Ir, I, Ta)",3,"{'Ir': 1.0, 'I': 2.0, 'Ta': 5.0}"
5,CrI,0.915075,"[Cr, I]","[Cr - Metal, I - Non-Metal]","[1.66, 2.66]","(Cr, I)",2,"{'Cr': 1.0, 'I': 1.0}"
6,I2Mn5,0.914463,"[I, Mn]","[I - Non-Metal, Mn - Metal]","[2.66, 1.55]","(I, Mn)",2,"{'I': 2.0, 'Mn': 5.0}"
7,I3Tc5,0.914414,"[I, Tc]","[I - Non-Metal, Tc - Metal]","[2.66, 1.9]","(I, Tc)",2,"{'I': 3.0, 'Tc': 5.0}"
8,CdTaCl3Mo4,0.913819,"[Cd, Ta, Cl, Mo]","[Cd - Metal, Ta - Metal, Cl - Non-Metal, Mo - ...","[1.69, 1.5, 3.16, 2.16]","(Cd, Ta, Cl, Mo)",4,"{'Cd': 1.0, 'Ta': 1.0, 'Cl': 3.0, 'Mo': 4.0}"
9,FKrNb,0.912111,"[F, Kr, Nb]","[F - Non-Metal, Kr - Non-Metal, Nb - Metal]","[3.98, 3.0, 1.6]","(F, Kr, Nb)",3,"{'F': 1.0, 'Kr': 1.0, 'Nb': 1.0}"


In [7]:
# Function to predict metal_nonmetal for a formula

metals = [el.symbol for el in Element if el.is_metal]
non_metals = [el.symbol for el in Element if not el.is_metal and el.symbol != "H"]

def classify_metal_nonmetal(electroneg_values, elements, threshold=2.1):
    if len(electroneg_values) == 1:  # Check if there's only one element in the formula
        symbol = elements[0]  # Extract the symbol for single-element formulas
        if symbol in metals:
            return 1
        else:
            return 0  
    else:
        avg_electroneg = sum(electroneg_values) / len(electroneg_values)
        if avg_electroneg <= threshold:
            return 1
        else:
            return 0  # Return 1 for metal if average electronegativity is less than or equal to threshold, else 0 for non-metal

In [8]:
# Apply the classification function to each row in the DataFrame
df['Is_Metal'] = df.apply(lambda row: classify_metal_nonmetal(row['Electronegativity'], row['Elements']), axis=1)

# Define a mapping from 0/1 to FALSE/TRUE
mapping = {0: 'FALSE', 1: 'TRUE'}

df['Is_Metal'] = df['Is_Metal'].map(mapping)

In [9]:
df.head(20)

Unnamed: 0,Formula,2D_probability,Elements,Metal_NonMetal,Electronegativity,Composition,Num_Elements,Atomic_Composition,Is_Metal
0,XeBiRe3,0.932754,"[Xe, Bi, Re]","[Xe - Non-Metal, Bi - Metal, Re - Metal]","[2.6, 2.02, 1.9]","(Xe, Bi, Re)",3,"{'Xe': 1.0, 'Bi': 1.0, 'Re': 3.0}",False
1,FeNiWCl2Ag2,0.92768,"[Fe, Ni, W, Cl, Ag]","[Fe - Metal, Ni - Metal, W - Metal, Cl - Non-M...","[1.83, 1.91, 2.36, 3.16, 1.93]","(Fe, Ni, W, Cl, Ag)",5,"{'Fe': 1.0, 'Ni': 1.0, 'W': 1.0, 'Cl': 2.0, 'A...",False
2,RhBr3Re6,0.925749,"[Rh, Br, Re]","[Rh - Metal, Br - Non-Metal, Re - Metal]","[2.28, 2.96, 1.9]","(Rh, Br, Re)",3,"{'Rh': 1.0, 'Br': 3.0, 'Re': 6.0}",False
3,Cl2Os5,0.924434,"[Cl, Os]","[Cl - Non-Metal, Os - Metal]","[3.16, 2.2]","(Cl, Os)",2,"{'Cl': 2.0, 'Os': 5.0}",False
4,IrI2Ta5,0.919987,"[Ir, I, Ta]","[Ir - Metal, I - Non-Metal, Ta - Metal]","[2.2, 2.66, 1.5]","(Ir, I, Ta)",3,"{'Ir': 1.0, 'I': 2.0, 'Ta': 5.0}",False
5,CrI,0.915075,"[Cr, I]","[Cr - Metal, I - Non-Metal]","[1.66, 2.66]","(Cr, I)",2,"{'Cr': 1.0, 'I': 1.0}",False
6,I2Mn5,0.914463,"[I, Mn]","[I - Non-Metal, Mn - Metal]","[2.66, 1.55]","(I, Mn)",2,"{'I': 2.0, 'Mn': 5.0}",False
7,I3Tc5,0.914414,"[I, Tc]","[I - Non-Metal, Tc - Metal]","[2.66, 1.9]","(I, Tc)",2,"{'I': 3.0, 'Tc': 5.0}",False
8,CdTaCl3Mo4,0.913819,"[Cd, Ta, Cl, Mo]","[Cd - Metal, Ta - Metal, Cl - Non-Metal, Mo - ...","[1.69, 1.5, 3.16, 2.16]","(Cd, Ta, Cl, Mo)",4,"{'Cd': 1.0, 'Ta': 1.0, 'Cl': 3.0, 'Mo': 4.0}",False
9,FKrNb,0.912111,"[F, Kr, Nb]","[F - Non-Metal, Kr - Non-Metal, Nb - Metal]","[3.98, 3.0, 1.6]","(F, Kr, Nb)",3,"{'F': 1.0, 'Kr': 1.0, 'Nb': 1.0}",False


In [10]:
# Drop Probabilities Column

# Drop the specific column (e.g., 'column_to_drop')
df.drop(columns=['2D_probability'], inplace=True)
# Drop the specific column (e.g., 'column_to_drop')
df.drop(columns=['Composition'], inplace=True)

In [11]:
# Save the dataframe to csv

df.to_csv('../../data/d5_properties_calculation/properties_top10.csv', index=False)