In [2]:
import pandas as pd
from mendeleev import element
import numpy as np
import pickle
import seaborn as sb
import matplotlib.pyplot as plt
import pymatgen as mg
from pymatgen.ext.matproj import MPRester

In [2]:
def get_energy_data(df):
    """Get the dataframe as an input and get the energy data present in the 'data' 
    column of the dataframe to retrieve the most relaxed energy for that material.
    The result is the existing dataframe with data column replaced by energy column
    
    input: dataframe containing the energy data
    
    output: input dataframe with added columns from the 'data' column i.e. energy, LOOP+, etc.
    """
    
    pivot = pd.DataFrame(df.data.values.tolist())
    df = pd.concat([df, pivot], axis=1, sort=False)
    
    for i, en in enumerate(df.energy):
        if isinstance(en, list):
            df.energy[i] =  (en[-1])
    return df

In [1]:
mprest = MPRester('PzoyfeSvTTxngtLA')

NameError: name 'MPRester' is not defined

In [4]:
def get_sum_individual_energies(df):
    """Get sum of individual energies and individual elements in the molecule as added columns in the dataframe
    
    input : dataframe
    
    output: input dataframe with sum of individual energies for each crystal structure
    and a column for array of elements in the crystal structure
    """
    
    df['sum_individual_energy'] = np.nan
    df['elements'] = np.nan
    
    #Loop through the array of individual elements in the formula and get the sum of the energy
    for j, formula in enumerate(df.workflow):
        sum_individual_energies = 0
        
        arr_of_elements = mg.Composition(formula)
        num_of_elements = arr_of_elements.get_el_amt_dict()
        arr_of_elements = pd.array(arr_of_elements)
        df['elements'][j] = arr_of_elements
        
        for elem in arr_of_elements:
            en = mprest.get_data(elem)
            en = [x['energy_per_atom'] for x in en]
            if en:
                en = np.min(en)*num_of_elements[str(elem)]
            else:
                en = -0.142
                
            sum_individual_energies = sum_individual_energies + en
        
        df['sum_individual_energy'][j] = sum_individual_energies
    
    return df

In [5]:
def get_formation_energy(df):
    """Get formation energy for the materials in the dataframe as an added column
    
    input : dataframe
    
    output: input dataframe with fromation energy column for each crystal structure
    """
    
    df['formation_energy'] = np.nan
    
    for i in range(len(df)):
        if df.energy[i] is not None:
            df['formation_energy'][i] = df.energy[i] - df.sum_individual_energy[i]
    return df

In [6]:
def get_elements(df):
    """Get elements from the formula as added columns A and X"""
    
    df[['A','A_two','X']] = pd.DataFrame(df.elements.tolist(), index= df.index)
    df[['A','A_two','X']] = df[['A','A_two','X']].astype(str)
    
    for i in range(len(df)):
            
        if df.X[i] == 'None':
            df.X[i], df.A_two[i] = df.A_two[i], df.A[i]    
            
        if element(str(df.A[i])).atomic_number in [9.0, 17.0, 35.0, 53.0]:
            df.A[i], df.X[i] =  df.X[i], df.A[i]
            
        if element(str(df.A_two[i])).atomic_number in [9.0, 17.0, 35.0, 53.0]:
            df.A_two[i], df.X[i] =  df.X[i], df.A_two[i]
            
        if element(str(df.A[i])).atomic_number > element(str(df.A_two[i])).atomic_number:
            df.A[i], df.A_two[i] =  df.A_two[i], df.A[i]
            
    return df

In [7]:
def get_atomic_number(df_formation):
    """Get Atomic Number of Elements in the molecule"""
    
    df_formation['Atomic_number_A'] = np.nan
    df_formation['Atomic_number_A_two'] = np.nan
    df_formation['Atomic_number_X'] = np.nan
    
    for i in range(len(df_formation)):
        df_formation.Atomic_number_A[i] = element(str(df_formation.A[i])).atomic_number
        df_formation.Atomic_number_A_two[i] = element(str(df_formation.A_two[i])).atomic_number
        df_formation.Atomic_number_X[i] = element(str(df_formation.X[i])).atomic_number
        
    return df_formation

In [3]:
def get_magnetic_moment(df):
    """ parse magnetization value from magnetization raw data """
    
#     df['magmom_sites_tot'] = np.nan
    df['magmom_unitcell_x'] = np.nan
    df['magmom_unitcell_y'] = np.nan
    df['magmom_unitcell_z'] = np.nan
    
    for i in range(len(df)):
        magmom_sites_tot = np.nan
        magmom_unitcell = np.nan
        print(type(df['data'][i]))
        if ('magnetization' in df['data'][i]):
            raw_magmom = df['data'][i]['magnetization']

            if df.name[i] == 'initial' or df.name[i] == 'spin' or df.name[i] == 'afm':
                magmom = raw_magmom[-1]
                magmom_x = magmom[2]

                if isinstance(magmom_x[0], list):
                    magmom_x_sites_tot = [x[-1] for x in magmom_x]
                else:
                    magmom_x_sites_tot = [x[-1] for x in magmom]
                    
                magmom_x_unitcell = np.sum(magmom_x_sites_tot)
                magmom_sites_tot = magmom_x_sites_tot
                
                df['magmom_unitcell_x'][i] = abs(magmom_x_unitcell)
#                 df['magmom_sites_tot'][i] = magmom_sites_tot

                
            elif df.name[i] == 'spin_so' or df.name[i] == 'afm_so':
                magmom = raw_magmom[-3:]
                magmom_x = magmom[0][2]
                magmom_y = magmom[1][2]
                magmom_z = magmom[2][2]

                if isinstance(magmom_x[0], list):
                    magmom_x_sites_tot = [x[-1] for x in magmom_x]
                    magmom_x_unitcell = np.sum(magmom_x_sites_tot)
                else:
                    magmom_x_sites_tot = magmom_x
                    magmom_x_unitcell = magmom_x

                if isinstance(magmom_y[0], list):
                    magmom_y_sites_tot = [x[-1] for x in magmom_y]
                    magmom_y_unitcell = np.sum(magmom_y_sites_tot)
                else:
                    magmom_y_sites_tot = magmom_y
                    magmom_y_unitcell = magmom_y

                if isinstance(magmom_z[0], list):
                    magmom_z_sites_tot = [x[-1] for x in magmom_z]
                    magmom_z_unitcell = np.sum(magmom_z_sites_tot)
                else:
                    magmom_z_sites_tot = magmom_z
                    magmom_z_unitcell = magmom_z

                magmom_sites_tot = np.asarray([magmom_x_sites_tot, magmom_y_sites_tot, magmom_z_sites_tot], object)
                
                df['magmom_unitcell_x'][i] = abs(magmom_x_unitcell)
                df['magmom_unitcell_y'][i] = abs(magmom_y_unitcell)
                df['magmom_unitcell_z'][i] = abs(magmom_z_unitcell)
#                 df['magmom_sites_tot'][i] = magmom_sites_tot

    return df         

In [2]:
with open("C:\\Users\\admin\\AppData\\Local\\Programs\\Python\\Python37\\Scripts\\AAX_data_dump.pkl", "rb") as f:
    data = pickle.load(f)
    
df  = pd.DataFrame(data)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\admin\\AppData\\Local\\Programs\\Python\\Python37\\Scripts\\AAX_data_dump.pkl'

In [21]:
df

Unnamed: 0,_state,job_id,workflow,name,description,lock,tick,parents,input_files,stage_in_url,...,NBANDS,energy,stress,retry_count,lattice_vecs,total charge,magnetization,position_force,volume of cell,is_relaxed
0,<django.db.models.base.ModelState object at 0x...,1a77946d-5661-4557-8079-83313ffa5525,Cr2Cu2Br12,initial,,,2020-08-11 17:26:17.271554+00:00,[],*,,...,96.0,-53.4342,"[[[XX, YY, ZZ, XY, YZ, ZX], [[-1.26924, -0.663...",0,"[[[13.024019134, 0.013330794, -0.01239713, 0.0...","[[[s, p, d, tot], [[0.261, 6.251, 4.011, 10.52...","[[x, [s, p, d, tot], [[0.025, 0.026, 2.79, 2.8...","[[[6.50943, 0.00857, 1.76772, 0.006711, -0.007...","[2156.26, 2156.26, 2156.26, 2156.26, 2156.26, ...",
1,<django.db.models.base.ModelState object at 0x...,637093ac-f54f-497b-bf69-093ed46373a2,Cr4F12,initial,,,2020-08-11 17:26:17.288801+00:00,[],*,,...,96.0,-96.3027,"[[[XX, YY, ZZ, XY, YZ, ZX], [[-101.80114, -84....",0,"[[[12.861339827, -0.200118766, 0.133360105, 0....","[[[s, p, d, tot], [[0.291, 6.336, 4.181, 10.80...","[[x, [s, p, d, tot], [[0.079, 0.016, 2.756, 2....","[[[6.55534, -0.06896, 1.84909, -6.466301, -3.7...","[2156.26, 2156.26, 2156.26, 2156.26, 2156.26, ...",
2,<django.db.models.base.ModelState object at 0x...,440e75cc-ec88-45aa-83cb-4e97e153efb1,Cr2Cu2Br12,spin,,,2020-08-11 17:26:17.300197+00:00,"[""1a77946d-5661-4557-8079-83313ffa5525""]",POTCAR,,...,96.0,-53.4253,"[[[XX, YY, ZZ, XY, YZ, ZX], [[9.30684, 9.29735...",0,"[[[12.945711061, -0.000221826, 0.000309889, 0....","[[[s, p, d, tot], [[0.258, 6.246, 3.759, 10.26...","[[x, [s, p, d, tot], [[0.025, 0.025, 2.565, 2....","[[[3.2363, 5.60205, 1.79044, -0.000549, -0.005...","[2156.26, 2156.26, 2156.26, 2156.26, 2156.26, ...",
3,<django.db.models.base.ModelState object at 0x...,c77ed12c-8368-4379-a09f-a6dcb40120f3,Cr2Pd2I12,afm_so,,,2020-08-11 17:26:17.303418+00:00,"[""5ebb0ce4-7890-4b33-a9f9-f7140101ad24""]",POTCAR,,...,160.0,-53.9543,"[[[XX, YY, ZZ, XY, YZ, ZX], [[-0.14551, -0.145...",0,"[[[14.361950181, 0.007987263, -0.267944803, 0....","[[[s, p, d, tot], [[0.223, 6.203, 2.958, 9.385...","[[x, [s, p, d, tot], [[0.0, 0.0, 0.001, 0.001]...","[[[3.56588, 5.97521, 1.44355, 0.000252, 0.0001...","[2156.26, 2156.26, 2156.26]",
4,<django.db.models.base.ModelState object at 0x...,c1f6f446-1f7e-49ae-88ae-636c0bb6099f,Cr2Ir2Cl12,afm_so,,,2020-09-23 16:55:57.829013+00:00,"[""351f0715-aeba-4e48-bf32-46881e6af404""]",POTCAR,,...,160.0,-72.5361,"[[[XX, YY, ZZ, XY, YZ, ZX], [[-0.40962, -0.409...",2,"[[[12.149662357, 1.4653e-05, 0.000155501, 0.08...","[[[s, p, d, tot], [[0.223, 6.222, 3.414, 9.859...","[[x, [s, p, d, tot], [[0.0, 0.0, 0.0, 0.0], [0...","[[[3.03744, 5.26093, 2.03158, 4e-06, 5e-06, -7...","[2156.26, 2156.26, 2156.26, 2156.26]",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555,<django.db.models.base.ModelState object at 0x...,0535f246-f890-4389-a7c6-8fa965c6119f,Zr2Ti2Cl12,spin,Zr2 Ti2 Cl12\n,,2020-08-25 16:08:55.724704+00:00,"[""d615f892-6e3d-4d6e-9a82-5721a5ed43d0""]",POTCAR,,...,,,,0,,,,,,
1556,<django.db.models.base.ModelState object at 0x...,914a3069-222d-4e2e-a5fe-0e3dca959389,Y2Ru2F12,afm,Y2 Ru2 F12\n,,2020-08-25 16:08:38.021629+00:00,"[""839a88f6-95b2-49a5-b746-b54d05ee6f6d""]",POTCAR,,...,80.0,,,0,"[[13.643535556, 0.014190121, -0.00395171, 0.07...",,,,2156.26,False
1557,<django.db.models.base.ModelState object at 0x...,d5836862-fcf4-40f7-9bc0-ecb60cf69b8c,Ti2Au2Cl12,spin,Ti2 Au2 Cl12\n,,2020-08-25 16:08:38.021629+00:00,"[""f376d733-b040-4954-a02d-474566dbd2b2""]",POTCAR,,...,96.0,-70.2739,"[[[XX, YY, ZZ, XY, YZ, ZX], [[-0.38007, -0.277...",1,"[[[12.375084179, 0.002149205, 0.145906552, 0.0...","[[[s, p, d, tot], [[0.29, 0.325, 7.041, 7.656]...","[[x, [s, p, d, tot], [[0.01, 0.008, 2.519, 2.5...","[[[6.1891, 0.03491, 2.0319, -0.110382, -0.0838...","[2156.26, 2156.26, 2156.26, 2156.26, 2156.26, ...",
1558,<django.db.models.base.ModelState object at 0x...,1b141ffc-0f57-481f-9fb4-0dd53d9923d8,V2Co2I12,afm_so,V2 Co2 I12\n,,2020-08-25 16:08:38.021629+00:00,"[""a518ffb1-6c66-41aa-91ab-3378cad8e4db""]",POTCAR,,...,,,,4,,,,,,


In [20]:
df = get_energy_data(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [11]:
# df = get_sum_individual_energies(df)

In [12]:
# df = get_formation_energy(df)

In [13]:
# df = get_elements(df)

In [14]:
# df = get_atomic_number(df)

In [15]:
# df = get_magnetic_moment(df)

# df.to_csv('AAX.csv')