In [14]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_error, r2_score, accuracy_score, precision_score
from sklearn.model_selection import train_test_split
from joblib import dump, load
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.metrics import make_scorer, f1_score
from sklearn.exceptions import DataConversionWarning
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="X does not have valid feature names", module="sklearn")
warnings.filterwarnings("ignore", category=DataConversionWarning, module="sklearn")
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [15]:
df = pd.read_csv('_data/aflow_test_set.csv')

X = df.drop(['Egap', 'is_metal', 'gap_type'], axis=1)
y1 = df['is_metal']
y2 = df['Egap']
y3 = df['gap_type']

y1 = LabelEncoder().fit_transform(y1)
y3 = LabelEncoder().fit_transform(y3)

In [16]:
metal_nonmetal_classifier = load('models/metal_nonmetal_classifier.joblib')
band_gap_predictor = load('models/rfr_regressor.joblib')
gap_type_classifier = load('models/gaptype_classifier.joblib')

In [17]:
def predict_properties(material):

    # Predict whether it's a metal or non metal
    is_metal = metal_nonmetal_classifier.predict(material.values.reshape(1, -1))[0]

    if is_metal:
        # If it's a metal, predict that its band gap is zero and gap type is non existent
        band_gap = 0
        gap_type = 2
    else:
        # If it's a non-metal, predict its band gap and gap type
        band_gap = band_gap_predictor.predict(material.values.reshape(1, -1))[0]
        gap_type = gap_type_classifier.predict(material.values.reshape(1, -1))[0]

    # Return the predicted properties as a tuple
    return is_metal, band_gap, gap_type

In [18]:
# Predict the properties for all the materials in the test set
predicted_properties = np.array([predict_properties(material) for index, material in X.iterrows()])
predicted_properties = predicted_properties.astype(np.float)

In [19]:
is_metal_x = predicted_properties[:, 0].astype(int)
band_gap = predicted_properties[:, 1].astype(float)
gap_type = predicted_properties[:, 2].astype(int)

accuracy_is_metal = accuracy_score(y1, is_metal_x)
f1_score_is_metal = f1_score(y1, is_metal_x)
mae = mean_absolute_error(y2, band_gap)
r2_score = r2_score(y2, band_gap)
accuracy_gap_type= accuracy_score(y3, gap_type)
f1_score_gap_type = f1_score(y3, gap_type, average='weighted')


# Print the evaluation metrics
print('Accuracy for metal-nonmetal classifier: {:.4f}' .format(accuracy_is_metal))
print('F1 Score for metal-nonmetal classifier: {:.4f}' .format(f1_score_is_metal))

print('MAE for bandgap prediction:{:.4f}' .format(mae))
print('R2 Score for bandgap prediction:{:.4f}' .format(r2_score))

print('Accuracy for gap type classifier: {:.4f}' .format(accuracy_gap_type))
print('F1 Score for gap type classifier: {:.4f}' .format(f1_score_gap_type))

Accuracy for metal-nonmetal classifier: 0.9508
F1 Score for metal-nonmetal classifier: 0.9521
MAE for bandgap prediction:0.2496
R2 Score for bandgap prediction:0.8906
Accuracy for gap type classifier: 0.8665
F1 Score for gap type classifier: 0.8659


In [20]:
w_metal = 0.3
w_bandgap = 0.4
w_type = 0.3

def eval_metric(y1_pred, y2_pred, y3_pred, y1_test, y2_test, y3_test):
    
    # f1 score and precision for metal-nonmetal classification
    f1_metal = f1_score(y1_pred, y1_test, pos_label=1)
    precision_metal = precision_score(y1_pred, y1_test, pos_label=1)
    
    # Penalize metrics for metal-nonmetal classification
    penalty = w_metal * (1 - precision_metal)
    
    # MAE for band gap prediction
    mae_bandgap = mean_absolute_error(y2_pred, y2_test)
    
    # MAE normalized by the maximum band gap value
    max_bandgap = np.max(y2_test)
    norm_mae_bandgap = mae_bandgap / max_bandgap
    
    # f1 score for gap type classification
    f1_type = f1_score(y3_pred, y3_test, average='weighted')
    
    # overall metric
    score = 1 - (penalty + w_bandgap * norm_mae_bandgap + w_type * (1 - f1_type))
    
    return score

score = eval_metric(is_metal_x, band_gap, gap_type, y1, y2, y3)

print('The Evaluation Metric for the model: {:.4f}' .format(score))

The Evaluation Metric for the model: 0.9299
