In [13]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_error, r2_score, accuracy_score, precision_score
from sklearn.model_selection import train_test_split
from joblib import dump, load
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.exceptions import DataConversionWarning
from sklearn.metrics import make_scorer, f1_score
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="X does not have valid feature names", module="sklearn")
warnings.filterwarnings("ignore", category=DataConversionWarning, module="sklearn")

In [14]:
df = pd.read_csv('_data/aflow_test_set.csv')

X = df.drop(['Egap', 'is_metal', 'gap_type' ], axis=1)
y1 = df['is_metal']
y2 = df['Egap']
y3 = df['gap_type']

y1 = LabelEncoder().fit_transform(y1)
y3 = LabelEncoder().fit_transform(y3)

In [15]:
metal_nonmetal_classifier = load('models/metal_nonmetal_classifier.joblib')
cluster_classifier = load('models/cluster_classification.joblib')

classifiers = []
regression_models = []
for i in range(5):
    classifiers.append(load('models/classifier_{}.joblib'.format(i)))
    regression_models.append([load('models/regressor_{}_1.joblib'.format(i)),
                              load('models/regressor_{}_2.joblib'.format(i))])

In [16]:
def predict_bandgap(row):
    feature_vector = row.values.reshape(1, -1)
    # predict's metal or non-metal
    metal_or_nonmetal = metal_nonmetal_classifier.predict(feature_vector)

    if metal_or_nonmetal == 1:
        # metal
        return metal_or_nonmetal, 0, 2
    else:
        # non-metal
        # predict's cluster assignment
        cluster_assignment = cluster_classifier.predict(feature_vector)[0].astype(int)

        bandgap_prediction = regression_models[cluster_assignment][1].predict(feature_vector).item()
        gaptype_prediction = classifiers[cluster_assignment].predict(feature_vector)[0]

        return metal_or_nonmetal, bandgap_prediction, gaptype_prediction

In [17]:
metal_nonmetal_predictions = []
gap_type_predictions = []
band_gap_predictions = []

for index, row in X.iterrows():
    metal_or_nonmetal, band_gap_prediction, gap_type_prediction = predict_bandgap(row)
    metal_nonmetal_prediction = 0 if metal_or_nonmetal == 0 else 1

    metal_nonmetal_predictions.append(metal_nonmetal_prediction)
    gap_type_predictions.append(gap_type_prediction)
    band_gap_predictions.append(band_gap_prediction)

In [18]:
accuracy_is_metal = accuracy_score(y1, metal_nonmetal_predictions)
f1_score_is_metal = f1_score(y1, metal_nonmetal_predictions)
mae = mean_absolute_error(y2, band_gap_predictions)
r2_score = r2_score(y2, band_gap_predictions)
accuracy_gap_type= accuracy_score(y3, gap_type_predictions)
f1_score_gap_type = f1_score(y3, gap_type_predictions, average='weighted')


# printing the evalution metrics
print('Accuracy for metal-nonmetal classifier: {:.4f}' .format(accuracy_is_metal))
print('F1 Score for metal-nonmetal classifier: {:.4f}' .format(f1_score_is_metal))

print('MAE for bandgap prediction:{:.4f}' .format(mae))
print('R2 Score for bandgap prediction:{:.4f}' .format(r2_score))

print('Accuracy for gap type classifier: {:.4f}' .format(accuracy_gap_type))
print('F1 Score for gap type classifier: {:.4f}' .format(f1_score_gap_type))

Accuracy for metal-nonmetal classifier: 0.9508
F1 Score for metal-nonmetal classifier: 0.9521
MAE for bandgap prediction:0.2321
R2 Score for bandgap prediction:0.8930
Accuracy for gap type classifier: 0.8770
F1 Score for gap type classifier: 0.8769


The final evaluation metric can be calculated as follows:

$$ Evaluation Metric = 0.4 \cdot F1_{metal} \cdot (1 -  metal_{precision}) + 0.3 \cdot (1 - MAE_{bandgap} / max_{bandgap}) + 0.3 \cdot  F1_{gaptype} $$


where:

- `F1_{metal}` is the F1 score for the binary classification task of distinguishing between metal and non-metal materials
- `metal_{precision}` is the precision of the binary classification task for metals
- `MAE_{bandgap}` is the mean absolute error (MAE) for the regression task of predicting the band gap value for all materials in the dataset
- `max_{bandgap}` is the maximum band gap value observed in the dataset
- `F1_{gaptype}` is the F1 score for the multiclass classification task of predicting the band gap type for all materials in the dataset

The weights for each component of the metric are as follows:

- `0.4` for the metal classification task
- `0.3` for the band gap regression task
- `0.3` for the band gap type classification task

The evaluation metric ranges from 0 to 1, where a score of 1 indicates perfect performance and a score of 0 indicates the worst possible performance.


In [19]:
w_metal = 0.3
w_bandgap = 0.4
w_type = 0.3

def eval_metric(y1_pred, y2_pred, y3_pred, y1_test, y2_test, y3_test):
    
    # f1 score and precision for metal-nonmetal classification
    f1_metal = f1_score(y1_pred, y1_test, pos_label=1)
    precision_metal = precision_score(y1_pred, y1_test, pos_label=1)
    
    # Penalize metrics for metal-nonmetal classification
    penalty = w_metal * (1 - precision_metal)
    
    # MAE for band gap prediction
    mae_bandgap = mean_absolute_error(y2_pred, y2_test)
    
    # MAE normalized by the maximum band gap value
    max_bandgap = np.max(y2_test)
    norm_mae_bandgap = mae_bandgap / max_bandgap
    
    # f1 score for gap type classification
    f1_type = f1_score(y3_pred, y3_test, average='weighted')
    
    # overall metric
    score = 1 - (penalty + w_bandgap * norm_mae_bandgap + w_type * (1 - f1_type))
    
    return score

score = eval_metric(metal_nonmetal_predictions, band_gap_predictions, gap_type_predictions, y1, y2, y3)

print('The Evaluation Metric for the model: {:.4f}' .format(score))

The Evaluation Metric for the model: 0.9336
