In [58]:
import numpy as np
import pandas as pd
import os
import Gen_atom
import re
import copy
from Gen_atom import atomic_dict, lattice_dict
import json
import csv
PROPERTY_NUMBER = 25

In [59]:
def decompose_formula(formula):
    """ Return the decomposed elements from the formula.

    The format of the formula must be standard, which should be
    like 'Xn1Yn2Zn3' where 'X/Y/Z' is the element, the n1/n2/n3 is the number
    of element, and n1+n2+n3=N (number of atoms in unit cell). such as Pb1Se1.

    Args:
        formula: a string representing the formula of the material.
    Returns:
        A list of elements.
        A list of numbers.
    """
    element = re.findall(r'[A-Za-z]+', formula)
    element_number = re.findall(r'(\d+)', formula)
    element_number = [int(i) for i in element_number]
    return element, element_number




def get_atom_related_properties(formula):
    """Get the compositional weighted (CW) properties.

    The atomic properties include 25 features for each element. They are
    atomic number, valence electrons,
    atomic mass  group, period, electronegativity,
    Mendeleev number, global hardness,
    the orbital exponent of Slater-type orbitals, polarizability,
    electrophilicity indices, van der Waals radii, covalent radii,
    absolute radii, electron affinity, molar volume,
    first ionization energy, boiling point, melting point,
    thermal conductivity, atomization enthalpy, fusion enthalpy,
    vaporization enthalpy, binding energy, atomic density.

    Args:
        formula: a string representing the formula of the material in standard form.
    Returns:
        A list with shape '(25,)' of the CW properties.
    """
    var = ['atomic number', 'Mendeleev number', 'period', 'group',
           'atomic mass', 'atomic density', 'valence electrons',
           'absolute radii', 'covalent radii', 'van der Waals radii', 'electron affinity',
           'electronegativity',
           'first ionization energy', 'boiling point', 'melting point', 'molar volume',
           'thermal conductivity', 'the orbital exponent of Slater-type orbitals',
           'polarizability', 'global hardness', 'electrophilicity indices',
           'atomization enthalpy', 'fusion enthalpy',
           'vaporization enthalpy', 'binding energy']
    # the data type of the input parameter is string
    element, element_number = decompose_formula_II(formula)
    sum = np.zeros((PROPERTY_NUMBER,))
    N = np.sum(element_number)
    for i, ele in enumerate(element):
        atom = Gen_atom.atom(ele)
        try:
            ap = atom.get_property()
            tmp = element_number[i] * np.array(ap)
            sum = sum + tmp
        except AttributeError:
            print("No such property!")
    return sum / N

def decompose_formula_II(formula):
    """ Return the decomposed elements from the formula.

    The format of the formula is more readable for people. The difference
    from the 'decompose_formula' function is that the the number of n is omitted
    when n=1 in the formula of 'XnYnZn', such as PbSe instead of Pb1Se1

    Args:
        formula: a string representing the formula of the material.
    Returns:
        A list of elements.
        A list of numbers.
    """
    namelist = []
    numlist = []
    ccomps = formula
    while (len(ccomps) != 0):
        stemp = ccomps[1:]
        if (len(stemp) == 0):
            namelist.append(ccomps)
            numlist.append(1.0)
            break
        it = 0
        for st in stemp:
            it = it + 1
            if (st.isupper()):
                im = 0
                for mt in stemp[:it]:
                    im = im + 1
                    if (mt.isdigit()):
                        namelist.append(ccomps[0:im])
                        numlist.append(float(ccomps[im:it]))
                        ccomps = ccomps[it:]
                        break
                    elif (im == len(stemp[:it])):
                        namelist.append(ccomps[0:im])
                        numlist.append(1.0)
                        ccomps = ccomps[it:]
                        break
                break
            elif (it == len(stemp)):
                im = 0
                for mt in stemp:
                    im = im + 1
                    if (mt.isdigit()):
                        namelist.append(ccomps[0:im])
                        numlist.append(float(ccomps[im:]))
                        ccomps = ccomps[it + 1:]
                        break
                    elif (im == len(stemp)):
                        namelist.append(ccomps)
                        numlist.append(1.0)
                        ccomps = ccomps[it + 1:]
                        break
                break
    return namelist, numlist


def get_qf_descriptors(formula, position_frac, a, b, c, index):
    """Get the qf descriptors, which is called crystal structure fingerprints.

    Args:
        formula: a string representing the chemical formula of the material.
        position_frac: an array with shape '(N, 3)'.
        a: the length of lattice in a axis.
        b: the length of lattice in b axis.
        c: the length of lattice in c axis.
        index: a number between [0, 25).

    Returns:
        A number.
    """
    dis_matrix, adj_matrix = get_dis_adj_matrix(position_frac, a, b, c)
    atom_matrix, atom_matrix_2 = get_atom_matrix(formula, index)
    # N: number of atoms in unit cell
    N = dis_matrix.shape[0]
    # the elements along the diagonal of the dis_matrix is zero.
    # to prevent the reciprocal of 0 becoming infinity, we add one along the diagonal.
    dis_matrix = dis_matrix + np.diag(np.ones(N))
    # element-wise product
    M = np.multiply(adj_matrix, 1 / dis_matrix ** 2)
    T = np.multiply(M, atom_matrix)
    descriptor = np.sum(T)
    return descriptor

def get_atom_matrix(formula, index):
    """Get the atomic matrix with shape '(N, N)'.

    The Q represent the elemental property, which is related to specific
     reference property. Qij=|Qi-Qj| and Q is the atom matrix. When i=j or
     element i = element j, the Qij should be 0.

    Args:
        formula: a string representing the chemical formula of the material, like "Ag4O2".
        index: a number. The 'index' is the index among the 25 features, where
         the value must be a number between [0, 25).
    Returns:
        An array with shape '(N, N)'
        An array with shape '(N, N)'
    """
    element, element_number = decompose_formula(formula)
    # N : number of atoms in unit cell
    N = np.sum(element_number)
    num_0 = int(element_number[0])
    ele_0 = element[0]
    atom_0 = Gen_atom.atom(ele_0)
    all_property = atom_0.get_property()
    property_0 = all_property[index]
    vec = np.ones((num_0,)) * property_0
    # if there are more elements, we should concatenate these properties.
    if np.shape(element)[0] > 1:
        for i, number in enumerate(element_number, start=0):
            if i > 0:
                num_i = int(number)
                ele_i = element[i]
                atom_i = Gen_atom.atom(ele_i)
                all_property_i = atom_i.get_property()
                property_i = all_property_i[index]
                vec_i = np.ones((num_i,)) * property_i
                vec = np.concatenate((vec, vec_i), axis=0)
    atom_matrix = np.tile(vec, (N, 1))
    atom_matrix_1 = np.abs(atom_matrix.T - atom_matrix)
    atom_matrix_2 = (atom_matrix.T + atom_matrix) / 2
    return atom_matrix_1, atom_matrix_2

def get_dis_adj_matrix(position_frac, a, b, c):
    """Get the distance and adjacency matrix.

    Given an array 'position_frac' with shape '(N, 3)', the distance and
    adjacency matrix with shape '(N, N)' can be calculated. The lattice constants
    of a, b, c must be supplied.

    Args:
        position_frac: an array with shape '(N, 3)'.
        a: the length of lattice in a axis.
        b: the length of lattice in b axis.
        c: the length of lattice in c axis.
    Returns:
        An array with shape '(N, N)'
        An array with shape '(N, N)'
    """
    N = position_frac.shape[0]  # number of atoms
    # expand the unit cell into 2*2*2 taking into account the periodicity along three axes.
    p1 = expand_cell(position_frac, a, b, c, 1, 1, 1)
    p2 = expand_cell(position_frac, a, b, c, 2, 1, 1)
    p3 = expand_cell(position_frac, a, b, c, 1, 2, 1)
    p4 = expand_cell(position_frac, a, b, c, 1, 1, 2)
    p5 = expand_cell(position_frac, a, b, c, 2, 2, 1)
    p6 = expand_cell(position_frac, a, b, c, 2, 1, 2)
    p7 = expand_cell(position_frac, a, b, c, 1, 2, 2)
    p8 = expand_cell(position_frac, a, b, c, 2, 2, 2)

    # constructing the distance matrix and adjacency matrix
    dis_matrix = np.zeros((N, N))
    adj_matrix = np.zeros((N, N))
    for i in range(0, N):
        if i < N - 1:  # if i=N-1,the element of (N,N)
            for j in range(i + 1, N):
                array_x = np.vstack((p1[i], p2[i], p3[i], p4[i], p5[i], p6[i], p7[i], p8[i]))
                array_y = np.vstack((p1[j], p2[j], p3[j], p4[j], p5[j], p6[j], p7[j], p8[j]))
                dis_matrix[i, j] = find_min_dis(array_x, array_y)
            inx = find_min_nonzero(dis_matrix[i])
            adj_matrix[i, inx] = 1
    dis_matrix = dis_matrix.T + dis_matrix
    adj_matrix = adj_matrix.T + adj_matrix
    return dis_matrix, adj_matrix


def find_min_dis(array_x, array_y):
    """Find the minimum distance between two atoms.

    Args:
        array_x: an array with shape '(8, 3)'
        array_y: an array with shape '(8, 3)'
    Returns:
        The value of the minimum distance between two atoms.
    """
    n_equivalent = array_x.shape[0]
    min_ = []
    for i in range(n_equivalent):
        array_x_i = np.ones((n_equivalent, 1)) * array_x[i]
        dis = np.sqrt(np.sum((array_x_i - array_y) ** 2, axis=1))
        min_dis = dis[find_min_nonzero(dis)]
        min_.append(min_dis)
    min = np.array(min_).min()
    return min


def expand_cell(position_frac, a, b, c, n_a, n_b, n_c):
    """Expand the coordinates of the atom in the crystal unit cell
    into a new ones in the supercell.

    Args:
        position_frac: an array with shape '(N, 3)'.
        a: the length of lattice in a axis.
        b: the length of lattice in b axis.
        c: the length of lattice in c axis.
        n_a: the number of repetitions along a axis.
        n_b: the number of repetitions along b axis.
        n_c: the number of repetitions along c axis
    Returns:
        An array with shape '(N ,3)'.
    """
    N = position_frac.shape[0]  # number of atoms
    tmp = copy.copy(position_frac)
    # expand the unit cell. n_a,n_b and n_c represent the number of repetitions along a,b,c axis.
    tmp[:, 0] = (position_frac[:, 0] + (n_a - 1) * np.ones(N)) * a
    tmp[:, 1] = (position_frac[:, 1] + (n_b - 1) * np.ones(N)) * b
    tmp[:, 2] = (position_frac[:, 2] + (n_c - 1) * np.ones(N)) * c
    return tmp

def find_min_nonzero(array):
    nonzero_array = array[np.nonzero(array)]
    min_value = nonzero_array[np.argmin(nonzero_array)]
    index = list(array).index(min_value)
    return index

In [60]:
data = pd.read_csv('aflow_data')



In [61]:
var = ['atomic number', 'Mendeleev number', 'period', 'group',
           'atomic mass', 'atomic density', 'valence electrons',
           'absolute radii', 'covalent radii', 'van der Waals radii', 'electron affinity',
           'electronegativity',
           'first ionization energy', 'boiling point', 'melting point', 'molar volume',
           'thermal conductivity', 'the orbital exponent of Slater-type orbitals',
           'polarizability', 'global hardness', 'electrophilicity indices',
           'atomization enthalpy', 'fusion enthalpy',
           'vaporization enthalpy', 'binding energy']
"""
for i in var:
    data[i] = 0
"""

'\nfor i in var:\n    data[i] = 0\n'

In [62]:
data


Unnamed: 0,compound,auid,aurl,spacegroup_relax,Pearson_symbol_relax,agl_thermal_conductivity_300K,volume_atom,volume_cell,spacegroup_orig,geometry
0,Bi4,aflow:fadf792d7b321127,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/BCT/Bi1_I...,140,tI8,0.009881,40.00310,160.0120,140,"[6.525239, 6.525239, 6.525239, 95.91631, 95.91..."
1,Hg33Rb3,aflow:3a84e674e05ac4e6,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Hg11R...,221,cP36,0.011507,30.58650,1101.1100,221,"[10.32628, 10.32628, 10.32628, 90, 90, 90]"
2,Hg33K3,aflow:ac7610d35123f5c5,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Hg11K...,221,cP36,0.011813,30.18190,1086.5500,221,"[10.28056, 10.28056, 10.28056, 90, 90, 90]"
3,Cs6Hg40,aflow:978182b72d30a019,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cs3Hg...,223,cP46,0.013735,33.18990,1526.7300,223,"[11.51475, 11.51475, 11.51475, 90, 90, 90]"
4,Ag4O2,aflow:f024f2b3bfd420b0,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ag2O1...,224,cP6,0.017453,19.36390,116.1840,224,"[4.87957, 4.87957, 4.87957, 90, 90, 90]"
...,...,...,...,...,...,...,...,...,...,...
5659,C4,aflow:b2688e84030188b8,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/BCT/C1_IC...,139,tI8,206.237000,6.02134,24.0853,139,"[3.341838, 3.341838, 3.341838, 98.1149, 98.114..."
5660,Al1Co2Ti1,aflow:7fd9c09131def966,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/FCC/Al1Co...,225,cF16,213.507000,13.87990,55.5196,225,"[4.282071, 4.282071, 4.282071, 60, 60, 60]"
5661,C4,aflow:440c4eee274b61b6,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/HEX/C1_IC...,194,hP4,272.257000,5.71410,22.8564,194,"[2.512409, 2.512406, 4.181162, 90, 90, 120]"
5662,B1N1,aflow:fd5539a4f79db51c,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/FCC/B1N1_...,216,cF8,281.785000,5.95797,11.9159,216,"[2.563782, 2.563782, 2.563782, 60, 60, 60]"


In [63]:


compound_properties = pd.DataFrame(columns=var)
compound_properties

Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,molar volume,thermal conductivity,the orbital exponent of Slater-type orbitals,polarizability,global hardness,electrophilicity indices,atomization enthalpy,fusion enthalpy,vaporization enthalpy,binding energy


In [64]:
for i in data['compound']:
    compound_properties.loc[len(compound_properties)] = get_atom_related_properties(i)


There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizationEnthalpy data!
There is no bindingEnergy data!
There is no atomizat

In [65]:

compound_properties

Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,molar volume,thermal conductivity,the orbital exponent of Slater-type orbitals,polarizability,global hardness,electrophilicity indices,atomization enthalpy,fusion enthalpy,vaporization enthalpy,binding energy
0,83.000000,87.000000,6.000000,15.000000,208.980000,9780.000000,3.000000,1.571200,1.480000,2.070000,...,21.310000,8.000000,1.500000,42.676700,3.397200,2.671040,207.000000,10.900000,160.000000,90526.000000
1,76.416667,68.583333,5.916667,11.083333,190.996483,13135.166667,1.916667,1.218842,1.393333,2.131667,...,17.562500,12.441667,4.072658,160.589500,2.274700,5.495105,65.416667,2.281667,60.266667,77443.500000
2,74.916667,68.666667,5.833333,11.083333,187.132358,13078.833333,1.916667,1.209525,1.379167,2.108333,...,16.744167,15.941667,4.076375,135.758583,2.318467,5.504012,66.083333,2.293333,60.675000,76477.533333
3,76.739130,65.391304,6.000000,10.565217,191.761522,12584.217391,1.869565,1.327552,1.466087,2.230000,...,21.505217,11.913043,3.888235,243.457391,2.194387,5.290321,65.565217,2.263913,59.956522,76956.304348
4,34.000000,81.000000,4.000000,12.666667,77.245133,7491.666667,0.000000,1.250233,1.186667,1.906667,...,12.633333,286.675527,1.458333,48.154333,7.054867,3.578853,273.000000,7.607333,171.136667,17190.366667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5659,6.000000,95.000000,2.000000,14.000000,12.010700,2267.000000,4.000000,0.942200,0.760000,1.700000,...,5.290000,140.000000,1.625000,1.243200,11.040700,4.768960,717.000000,117.000000,715.000000,284.200000
5660,22.250000,64.750000,3.750000,8.750000,48.178725,6251.750000,2.500000,1.607325,1.332500,1.997500,...,8.495000,114.250000,1.031575,38.354800,3.834800,2.394932,412.250000,15.450000,367.000000,5485.750000
5661,6.000000,95.000000,2.000000,14.000000,12.010700,2267.000000,4.000000,0.942200,0.760000,1.700000,...,5.290000,140.000000,1.625000,1.243200,11.040700,4.768960,717.000000,117.000000,715.000000,284.200000
5662,6.000000,93.000000,2.000000,14.000000,12.408850,1743.000000,0.000000,1.012400,0.775000,1.735000,...,8.965000,13.512915,1.625000,1.573650,11.041400,4.768800,518.000000,25.180000,254.895000,298.950000


In [66]:
data_compund_properties = pd.concat([compound_properties, data],axis=1)
data_compund_properties

Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,compound,auid,aurl,spacegroup_relax,Pearson_symbol_relax,agl_thermal_conductivity_300K,volume_atom,volume_cell,spacegroup_orig,geometry
0,83.000000,87.000000,6.000000,15.000000,208.980000,9780.000000,3.000000,1.571200,1.480000,2.070000,...,Bi4,aflow:fadf792d7b321127,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/BCT/Bi1_I...,140,tI8,0.009881,40.00310,160.0120,140,"[6.525239, 6.525239, 6.525239, 95.91631, 95.91..."
1,76.416667,68.583333,5.916667,11.083333,190.996483,13135.166667,1.916667,1.218842,1.393333,2.131667,...,Hg33Rb3,aflow:3a84e674e05ac4e6,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Hg11R...,221,cP36,0.011507,30.58650,1101.1100,221,"[10.32628, 10.32628, 10.32628, 90, 90, 90]"
2,74.916667,68.666667,5.833333,11.083333,187.132358,13078.833333,1.916667,1.209525,1.379167,2.108333,...,Hg33K3,aflow:ac7610d35123f5c5,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Hg11K...,221,cP36,0.011813,30.18190,1086.5500,221,"[10.28056, 10.28056, 10.28056, 90, 90, 90]"
3,76.739130,65.391304,6.000000,10.565217,191.761522,12584.217391,1.869565,1.327552,1.466087,2.230000,...,Cs6Hg40,aflow:978182b72d30a019,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cs3Hg...,223,cP46,0.013735,33.18990,1526.7300,223,"[11.51475, 11.51475, 11.51475, 90, 90, 90]"
4,34.000000,81.000000,4.000000,12.666667,77.245133,7491.666667,0.000000,1.250233,1.186667,1.906667,...,Ag4O2,aflow:f024f2b3bfd420b0,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ag2O1...,224,cP6,0.017453,19.36390,116.1840,224,"[4.87957, 4.87957, 4.87957, 90, 90, 90]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5659,6.000000,95.000000,2.000000,14.000000,12.010700,2267.000000,4.000000,0.942200,0.760000,1.700000,...,C4,aflow:b2688e84030188b8,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/BCT/C1_IC...,139,tI8,206.237000,6.02134,24.0853,139,"[3.341838, 3.341838, 3.341838, 98.1149, 98.114..."
5660,22.250000,64.750000,3.750000,8.750000,48.178725,6251.750000,2.500000,1.607325,1.332500,1.997500,...,Al1Co2Ti1,aflow:7fd9c09131def966,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/FCC/Al1Co...,225,cF16,213.507000,13.87990,55.5196,225,"[4.282071, 4.282071, 4.282071, 60, 60, 60]"
5661,6.000000,95.000000,2.000000,14.000000,12.010700,2267.000000,4.000000,0.942200,0.760000,1.700000,...,C4,aflow:440c4eee274b61b6,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/HEX/C1_IC...,194,hP4,272.257000,5.71410,22.8564,194,"[2.512409, 2.512406, 4.181162, 90, 90, 120]"
5662,6.000000,93.000000,2.000000,14.000000,12.408850,1743.000000,0.000000,1.012400,0.775000,1.735000,...,B1N1,aflow:fd5539a4f79db51c,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/FCC/B1N1_...,216,cF8,281.785000,5.95797,11.9159,216,"[2.563782, 2.563782, 2.563782, 60, 60, 60]"


In [67]:
get_atom_related_properties('Hg33Rb3')

array([ 7.64166667e+01,  6.85833333e+01,  5.91666667e+00,  1.10833333e+01,
        1.90996483e+02,  1.31351667e+04,  1.91666667e+00,  1.21884167e+00,
        1.39333333e+00,  2.13166667e+00, -4.36173667e-01,  2.03296417e-01,
        9.91580250e+00,  6.57473333e+02,  2.40831667e+02,  1.75625000e+01,
        1.24416667e+01,  4.07265833e+00,  1.60589500e+02,  2.27470000e+00,
        5.49510500e+00,  6.54166667e+01,  2.28166667e+00,  6.02666667e+01,
        7.74435000e+04])

In [68]:
get_atom_related_properties("Bi4")

array([8.30000e+01, 8.70000e+01, 6.00000e+00, 1.50000e+01, 2.08980e+02,
       9.78000e+03, 3.00000e+00, 1.57120e+00, 1.48000e+00, 2.07000e+00,
       9.42362e-01, 2.25650e-01, 7.28560e+00, 1.83700e+03, 5.44400e+02,
       2.13100e+01, 8.00000e+00, 1.50000e+00, 4.26767e+01, 3.39720e+00,
       2.67104e+00, 2.07000e+02, 1.09000e+01, 1.60000e+02, 9.05260e+04])

In [69]:
index = [-1, -3, -4, -5, -6, -7]
position_frac = list([[0.  , 0.  , 0.  ],
               [0.25, 0.25, 0.25],
               [0.75, 0.75, 0.75],
               [0.5 , 0.5 , 0.5 ]])

pf_np =np.array(position_frac)
pf_np.shape

(4, 3)

In [70]:
get_qf_descriptors('SiC',position_frac=
               pf_np[0],a = 6.525239, b = 6.525239,c =  6.525239,index=1)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [71]:
data_compund_properties.to_csv("data_compund_properties")


In [72]:
ds_ = np.load("data_series_841", allow_pickle=True)
ds

UnpicklingError: Failed to interpret file 'data_series_841' as a pickle

In [74]:
exp_data = pd.read_csv('Ex_data_5.csv')

In [75]:
exp_data

Unnamed: 0,compound,thermal_conductivity_exp,spacegroup_relax,volume_atom,volume_cell
0,Ag9AlSe6,0.29,216,21.338638,1365.6728
1,CaGa6Te10,0.53,155,31.160575,3178.3786
2,Cu2CoTi3S8,1.4,227,17.255459,966.3057
3,Cu2Se,0.54,225,15.384067,184.6088
4,Cu2Te,0.7,216,18.27135,219.2562
5,Cu7PS6,0.24,198,16.141973,903.9505
6,Cu8GeSe6,0.24,216,17.796265,1067.7759
7,Cu8SiSe6,0.26,31,17.88648,536.5944
8,CuCo2S4,1.5,227,15.204146,851.4322
9,Ge,60.0,227,22.626162,181.0093


In [76]:

exp_compound_properties = pd.DataFrame(columns=var)
exp_compound_properties


Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,molar volume,thermal conductivity,the orbital exponent of Slater-type orbitals,polarizability,global hardness,electrophilicity indices,atomization enthalpy,fusion enthalpy,vaporization enthalpy,binding energy


In [77]:
for i in exp_data['compound']:
    exp_compound_properties.loc[len(exp_compound_properties)] = get_atom_related_properties(i)


In [78]:
exp_compound_properties

Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,molar volume,thermal conductivity,the orbital exponent of Slater-type orbitals,polarizability,global hardness,electrophilicity indices,atomization enthalpy,fusion enthalpy,vaporization enthalpy,binding energy
0,40.0,79.8125,4.5,13.0,91.972094,7876.5,0.0,1.387875,1.34125,2.00875,...,12.559375,256.7575,1.367944,43.626262,4.328313,2.695031,265.8125,9.05,171.5,19195.8125
1,42.705882,83.647059,4.588235,14.117647,102.024471,5845.529412,0.0,1.525476,1.345882,2.007647,...,17.741176,23.764706,1.544335,20.942818,4.551918,2.853182,224.117647,12.769412,127.705882,22610.617647
2,19.928571,79.5,3.428571,12.214286,41.867586,3995.785714,-0.071429,1.272243,1.221429,1.917857,...,12.646429,69.117143,1.458007,23.310093,6.128921,3.142716,339.071429,8.024286,166.314286,4310.071429
3,30.666667,79.0,4.0,12.666667,68.684,7553.0,0.666667,1.3546,1.28,1.966667,...,10.213333,266.84,1.382867,21.597833,4.697733,2.74758,301.0,10.533333,208.666667,10205.333333
4,36.666667,78.666667,4.333333,12.666667,84.897333,8026.666667,0.666667,1.389633,1.34,2.02,...,11.56,267.666667,1.3359,24.748133,4.144767,2.63502,291.0,14.566667,216.0,16590.666667
5,22.428571,82.714286,3.5,13.428571,47.727557,5430.214286,-0.071429,1.259293,1.186429,1.9,...,11.426429,200.104714,1.460421,16.189114,5.972557,3.103537,311.071429,7.337143,155.085714,5702.178571
6,31.2,81.2,4.0,13.2,70.317867,7039.8,0.533333,1.331913,1.264,1.967333,...,11.268667,217.541333,1.458547,18.935987,4.954813,2.841806,296.2,11.266667,192.666667,10592.2
7,30.0,81.266667,3.933333,13.2,67.347567,6840.266667,0.533333,1.327853,1.258,1.966667,...,11.164,223.541333,1.448967,18.5903,5.026713,2.856671,301.466667,12.493333,194.333333,9974.6
8,21.0,82.285714,3.428571,13.285714,44.238914,4937.142857,-0.285714,1.222357,1.148571,1.885714,...,11.795714,85.831429,1.501443,16.299586,6.2765,3.196804,329.428571,7.488571,155.6,4897.857143
9,32.0,84.0,4.0,14.0,72.64,5323.0,4.0,1.4363,1.2,2.11,...,13.63,60.0,1.527,11.9864,5.1874,2.92701,377.0,31.8,334.0,11103.0


In [79]:
exp_compound_properties = pd.concat([exp_compound_properties, exp_data],axis=1)
exp_compound_properties

Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,electrophilicity indices,atomization enthalpy,fusion enthalpy,vaporization enthalpy,binding energy,compound,thermal_conductivity_exp,spacegroup_relax,volume_atom,volume_cell
0,40.0,79.8125,4.5,13.0,91.972094,7876.5,0.0,1.387875,1.34125,2.00875,...,2.695031,265.8125,9.05,171.5,19195.8125,Ag9AlSe6,0.29,216,21.338638,1365.6728
1,42.705882,83.647059,4.588235,14.117647,102.024471,5845.529412,0.0,1.525476,1.345882,2.007647,...,2.853182,224.117647,12.769412,127.705882,22610.617647,CaGa6Te10,0.53,155,31.160575,3178.3786
2,19.928571,79.5,3.428571,12.214286,41.867586,3995.785714,-0.071429,1.272243,1.221429,1.917857,...,3.142716,339.071429,8.024286,166.314286,4310.071429,Cu2CoTi3S8,1.4,227,17.255459,966.3057
3,30.666667,79.0,4.0,12.666667,68.684,7553.0,0.666667,1.3546,1.28,1.966667,...,2.74758,301.0,10.533333,208.666667,10205.333333,Cu2Se,0.54,225,15.384067,184.6088
4,36.666667,78.666667,4.333333,12.666667,84.897333,8026.666667,0.666667,1.389633,1.34,2.02,...,2.63502,291.0,14.566667,216.0,16590.666667,Cu2Te,0.7,216,18.27135,219.2562
5,22.428571,82.714286,3.5,13.428571,47.727557,5430.214286,-0.071429,1.259293,1.186429,1.9,...,3.103537,311.071429,7.337143,155.085714,5702.178571,Cu7PS6,0.24,198,16.141973,903.9505
6,31.2,81.2,4.0,13.2,70.317867,7039.8,0.533333,1.331913,1.264,1.967333,...,2.841806,296.2,11.266667,192.666667,10592.2,Cu8GeSe6,0.24,216,17.796265,1067.7759
7,30.0,81.266667,3.933333,13.2,67.347567,6840.266667,0.533333,1.327853,1.258,1.966667,...,2.856671,301.466667,12.493333,194.333333,9974.6,Cu8SiSe6,0.26,31,17.88648,536.5944
8,21.0,82.285714,3.428571,13.285714,44.238914,4937.142857,-0.285714,1.222357,1.148571,1.885714,...,3.196804,329.428571,7.488571,155.6,4897.857143,CuCo2S4,1.5,227,15.204146,851.4322
9,32.0,84.0,4.0,14.0,72.64,5323.0,4.0,1.4363,1.2,2.11,...,2.92701,377.0,31.8,334.0,11103.0,Ge,60.0,227,22.626162,181.0093


In [80]:
exp_compound_properties.to_csv("exp_data_compund_properties_5")

In [55]:
exp_compound_properties

Unnamed: 0,atomic number,Mendeleev number,period,group,atomic mass,atomic density,valence electrons,absolute radii,covalent radii,van der Waals radii,...,electrophilicity indices,atomization enthalpy,fusion enthalpy,vaporization enthalpy,binding energy,compound,thermal_conductivity_exp,volume_atom,volume_cell,spacegroup_relax
0,40.0,79.8125,4.5,13.0,91.972094,7876.5,0.0,1.387875,1.34125,2.00875,...,2.695031,265.8125,9.05,171.5,19195.8125,Ag9AlSe6,0.29,21.338638,1365.6728,216
1,42.705882,83.647059,4.588235,14.117647,102.024471,5845.529412,0.0,1.525476,1.345882,2.007647,...,2.853182,224.117647,12.769412,127.705882,22610.617647,CaGa6Te10,0.53,31.160575,3178.3786,155
2,19.928571,79.5,3.428571,12.214286,41.867586,3995.785714,-0.071429,1.272243,1.221429,1.917857,...,3.142716,339.071429,8.024286,166.314286,4310.071429,Cu2CoTi3S8,1.4,17.255459,966.3057,227
3,30.666667,79.0,4.0,12.666667,68.684,7553.0,0.666667,1.3546,1.28,1.966667,...,2.74758,301.0,10.533333,208.666667,10205.333333,Cu2Se,0.54,15.384067,184.6088,225
4,36.666667,78.666667,4.333333,12.666667,84.897333,8026.666667,0.666667,1.389633,1.34,2.02,...,2.63502,291.0,14.566667,216.0,16590.666667,Cu2Te,0.7,18.27135,219.2562,216
5,22.428571,82.714286,3.5,13.428571,47.727557,5430.214286,-0.071429,1.259293,1.186429,1.9,...,3.103537,311.071429,7.337143,155.085714,5702.178571,Cu7PS6,0.24,16.141973,903.9505,198
6,31.2,81.2,4.0,13.2,70.317867,7039.8,0.533333,1.331913,1.264,1.967333,...,2.841806,296.2,11.266667,192.666667,10592.2,Cu8GeSe6,0.24,17.796265,1067.7759,216
7,30.0,81.266667,3.933333,13.2,67.347567,6840.266667,0.533333,1.327853,1.258,1.966667,...,2.856671,301.466667,12.493333,194.333333,9974.6,Cu8SiSe6,0.26,17.88648,536.5944,31
8,21.0,82.285714,3.428571,13.285714,44.238914,4937.142857,-0.285714,1.222357,1.148571,1.885714,...,3.196804,329.428571,7.488571,155.6,4897.857143,CuCo2S4,1.5,15.204146,851.4322,227
9,32.0,84.0,4.0,14.0,72.64,5323.0,4.0,1.4363,1.2,2.11,...,2.92701,377.0,31.8,334.0,11103.0,Ge,60.0,22.626162,181.0093,227


In [56]:
exp_compound_properties.columns

Index(['atomic number', 'Mendeleev number', 'period', 'group', 'atomic mass',
       'atomic density', 'valence electrons', 'absolute radii',
       'covalent radii', 'van der Waals radii', 'electron affinity',
       'electronegativity', 'first ionization energy', 'boiling point',
       'melting point', 'molar volume', 'thermal conductivity',
       'the orbital exponent of Slater-type orbitals', 'polarizability',
       'global hardness', 'electrophilicity indices', 'atomization enthalpy',
       'fusion enthalpy', 'vaporization enthalpy', 'binding energy',
       'compound', 'thermal_conductivity_exp ', 'volume_atom', 'volume_cell',
       'spacegroup_relax'],
      dtype='object')