In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#os.chdir('../')

In [3]:
plt.rcParams['mathtext.fontset'] = 'cm'
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = ["Times New Roman"
                                                ] + plt.rcParams["font.serif"]
plt.rcParams['font.size'] = 13
plt.rcParams['figure.dpi'] = 300

# The Data

In [4]:
data = pd.read_excel('../data/data.xlsx')
data = data.drop(columns=['S/N'])
data.head()

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM
0,150,3.5,100,14.05,0,23,12,0,61
1,150,3.3,80,14.05,0,23,7,0,66
2,150,3.2,50,14.05,0,34,5,3,52
3,150,3.1,10,14.05,1,42,5,2,42
4,150,3.0,5,14.05,4,48,5,10,19


In [5]:
features_col = list(data.columns[:4])
target_col = list(data.columns[4:])
#target_col = [target_col[0], target_col[2]]
print('Features: ', features_col)
print('Target: ', target_col)

Features:  ['cDen', 'Pot', 'Sn %', 'pH']
Target:  ['C2H4', 'CO', 'H2', 'EtoH', 'FORM']


In [6]:
# normalize the data in target columns by 100
data[target_col] = data[target_col] / 100
data.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM
0,150,3.5,100,14.05,0.0,0.23,0.12,0.0,0.61
1,150,3.3,80,14.05,0.0,0.23,0.07,0.0,0.66


In [7]:
data[features_col[2]] = data[features_col[2]] / 100
data.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM
0,150,3.5,1.0,14.05,0.0,0.23,0.12,0.0,0.61
1,150,3.3,0.8,14.05,0.0,0.23,0.07,0.0,0.66


In [8]:
data['Cu %'] = 1 - data['Sn %']
data.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM,Cu %
0,150,3.5,1.0,14.05,0.0,0.23,0.12,0.0,0.61,0.0
1,150,3.3,0.8,14.05,0.0,0.23,0.07,0.0,0.66,0.2


In [9]:
data.head(2)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM,Cu %
0,150,3.5,1.0,14.05,0.0,0.23,0.12,0.0,0.61,0.0
1,150,3.3,0.8,14.05,0.0,0.23,0.07,0.0,0.66,0.2


In [10]:
# create a pymatgen structure from the data. remember that it's CuSn with Sn fraction in position 2 in features_col
import pymatgen.core as pmg

def create_structure(Sn_percent):
    # create the structure
    if Sn_percent <= 1:
        base = f'Cu{1-Sn_percent}Sn{Sn_percent}'
        comp = pmg.Composition(base)
    else:
        raise ValueError('Sn percent must be less than or equal to 1')
    return comp

In [15]:
comp = create_structure(0.03)

In [16]:
comp.get_atomic_fraction('Cu')

0.97

In [17]:
print("formula", comp.alphabetical_formula)
print("chemical system", comp.chemical_system)

formula Cu0.97 Sn0.03
chemical system Cu-Sn


In [18]:
comp.average_electroneg

1.9018

In [None]:
comp.average_el

In [43]:
data['weight'] = data['Sn %'].apply(create_structure).apply(lambda x: x.weight)
data.head(5)

Unnamed: 0,cDen,Pot,Sn %,pH,C2H4,CO,H2,EtoH,FORM,Cu %,weight
0,150,3.5,1.0,14.05,0.0,0.23,0.12,0.0,0.61,0.0,118.71
1,150,3.3,0.8,14.05,0.0,0.23,0.07,0.0,0.66,0.2,107.6772
2,150,3.2,0.5,14.05,0.0,0.34,0.05,0.03,0.52,0.5,91.128
3,150,3.1,0.1,14.05,0.01,0.42,0.05,0.02,0.42,0.9,69.0624
4,150,3.0,0.05,14.05,0.04,0.48,0.05,0.1,0.19,0.95,66.3042


In [44]:
data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cDen,35.0,269.171429,119.205824,141.0,150.0,250.0,350.0,450.0
Pot,35.0,3.86,0.500118,2.8,3.55,4.0,4.15,4.7
Sn %,35.0,0.354286,0.388203,0.0,0.03,0.1,0.8,1.0
pH,35.0,12.844,2.447214,8.02,14.05,14.05,14.05,14.05
C2H4,35.0,0.079714,0.103483,0.0,0.0,0.04,0.095,0.37
CO,35.0,0.219429,0.12312,0.05,0.125,0.19,0.275,0.5
H2,35.0,0.137429,0.093442,0.05,0.07,0.1,0.155,0.37
EtoH,35.0,0.106,0.132114,0.0,0.0,0.06,0.14,0.48
FORM,35.0,0.353714,0.217067,0.07,0.14,0.38,0.54,0.7
Cu %,35.0,0.645714,0.388203,0.0,0.2,0.9,0.97,1.0
