### Three set of metrics are demonstrated for all data
1. 80-20 validation, with validation error demonstrated as peformance metric
2. five-fold cross validation, with average CV error demonstrated as performance metric

In [1]:
import pandas as pd
from pandas import DataFrame
import os
from sklearn.metrics import accuracy_score, f1_score, recall_score
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC,SVC

prefix = os.getcwd()

In [2]:
train_2D_df = pd.read_csv('train_2D.csv')
test_2D_df = pd.read_csv('test_2D.csv')

### Split the data according to ehull (the ehull value is calculated as Ehull_DFT - S_ideal*T) and redo the validation

In [3]:
print(train_2D_df.head())

       CompName      Ehull  (cbrt(NNaLst)+(AnionChgStdLst)^2)  \
0    MgZr(SO4)3 -60.260567                                0.0   
1    MgTi(SO4)3 -27.986572                                0.0   
2    MgSn(SO4)3 -30.640966                                0.0   
3  Mg4Nb2(SO4)9 -42.783127                                0.0   
4    ZrZn(SO4)3 -40.419098                                0.0   

   ((EWaldSumLst)^2*(XWithNaLst*RDiffStdLst))  
0                                    0.000000  
1                                  125.960243  
2                                   46.109451  
3                                   79.651306  
4                                   24.253452  


In [4]:
Ehull_train = train_2D_df['Ehull'].to_numpy()
Y_total = np.zeros_like(Ehull_train)
Y_total[np.where(Ehull_train<=0)] = 1
Y_total[np.where(Ehull_train>0)] = 0
X_total = train_2D_df[['(cbrt(NNaLst)+(AnionChgStdLst)^2)','((EWaldSumLst)^2*(XWithNaLst*RDiffStdLst))']].to_numpy()

Ehull_test = test_2D_df['Ehull'].to_numpy()
Y_valid = np.zeros_like(Ehull_test)
Y_valid[np.where(Ehull_test<=0)] = 1
Y_valid[np.where(Ehull_test>0)] = 0
X_valid = test_2D_df[['(cbrt(NNaLst)+(AnionChgStdLst)^2)','((EWaldSumLst)^2*(XWithNaLst*RDiffStdLst))']].to_numpy()
print(np.unique(Y_total),np.unique(Y_valid))

[0. 1.] [0. 1.]


## Performance metric on 20% of the validation data (out of the training sample)

In [5]:
# The SVC validation error from 80-20 splitting

clf = SVC(kernel='linear',class_weight='balanced',probability=True)
clf.fit(X_total,Y_total)
Y_valid_predict = clf.predict(X_valid)
test_score = accuracy_score(Y_valid,Y_valid_predict)
test_recall = recall_score(Y_valid,Y_valid_predict,average='weighted')
test_F1 = f1_score(Y_valid,Y_valid_predict,average='weighted')
print(f'Accuracy = {test_score}, recall = {test_recall}, F1 score = {test_F1}')

Accuracy = 0.8236808236808236, recall = 0.8236808236808236, F1 score = 0.839478375313282


# Performance metric on five fold cross validation

In [6]:
skf = StratifiedKFold(n_splits=5,random_state=0)



In [7]:
X_all_data = np.concatenate((X_total,X_valid))
Y_all_data = np.concatenate((Y_total,Y_valid))

In [8]:
print(Y_all_data.shape,X_all_data.shape)

(3881,) (3881, 2)


In [9]:
# The real train set = train set from skf + the validation data set
accuracy_lst, recall_lst, F1_lst = [], [], []

for train_index, test_index in skf.split(X_total,Y_total):
    X_train = np.concatenate((X_all_data[train_index],X_all_data))
    X_test = X_total[test_index]
    Y_train = np.concatenate((Y_all_data[train_index],Y_all_data))
    Y_test = Y_total[test_index]
    
    clf = SVC(kernel='linear',class_weight='balanced',probability=True)

    clf.fit(X_train, Y_train)
    Y_cv_predict = clf.predict(X_test)
    
    test_score = accuracy_score(Y_test,Y_cv_predict)
    test_recall = recall_score(Y_test,Y_cv_predict,average='weighted')
    test_F1 = f1_score(Y_test,Y_cv_predict,average='weighted')
    
    accuracy_lst.append(test_score)
    recall_lst.append(test_recall)
    F1_lst.append(test_F1)
    
    print(f'Accuracy = {test_score}, recall = {test_recall}, F1 score = {test_F1}')
    
print(f'Average accuracy = {np.average(accuracy_lst)}, recall = {np.average(recall_lst)}, F1 score = {np.average(F1_lst)}')

Accuracy = 0.6586151368760065, recall = 0.6586151368760065, F1 score = 0.7018645685891012
Accuracy = 0.7681159420289855, recall = 0.7681159420289855, F1 score = 0.7968101979983231
Accuracy = 0.8341384863123994, recall = 0.8341384863123994, F1 score = 0.850510113330676
Accuracy = 0.9162640901771336, recall = 0.9162640901771336, F1 score = 0.9206093171585407
Accuracy = 0.917741935483871, recall = 0.917741935483871, F1 score = 0.9089623252575623
Average accuracy = 0.8189751181756793, recall = 0.8189751181756793, F1 score = 0.8357513044668406
