In [1]:
import numpy as np
import os

from Scripts.data_preparation import DataExtractor
from nn_training import set_random_seed
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler


run_config = {
    'hidden_sizes': [100, 50, 50],
    'lr': 3e-5,
    'weight_decay': 0,
    'batch_size': 128
}

set_random_seed(57)


In [2]:
data_dir = '../Data/'

extractor = DataExtractor()

for base_element in ('Ti', 'Zr'):
    files_dir = os.path.join(data_dir, base_element)
    for file in os.listdir(files_dir):
        if file.endswith('.dat'):
            extractor.read_file(files_dir, file, 2, base_element)
        elif file.endswith('.unalloyed'):
            extractor.read_file(files_dir, file, 1, base_element)
files_dir = os.path.join(data_dir, 'Ternary')
for file in os.listdir(files_dir):
    if file.endswith('.dat'):
        extractor.read_file(files_dir, file, num_elements=3, base_element='Ti')

In [3]:
data = extractor.dataframe

In [4]:
features = data.apply(extractor.extract_properties, axis=1, result_type='expand').to_numpy()
target = data['a'].to_numpy()

In [5]:
cv_mse_train = []
cv_mse_test = []
cv_r2_train = []
cv_r2_test = []

for train_index, test_index in KFold(n_splits=5, shuffle=True, random_state=57).split(features):
    features_train, features_test = features[train_index], features[test_index]
    target_train, target_test = target[train_index], target[test_index]

    scaler = StandardScaler().fit(features_train)

    scaled_features_train = scaler.transform(features_train)
    scaled_features_test = scaler.transform(features_test)

    svr = SVR(kernel='rbf').fit(scaled_features_train, target_train)
    res_train = svr.predict(scaled_features_train)
    res_test = svr.predict(scaled_features_test)
    cv_mse_train.append(mean_squared_error(target_train, res_train))
    cv_mse_test.append(mean_squared_error(target_test, res_test))
    cv_r2_train.append(r2_score(target_train, res_train))
    cv_r2_test.append(r2_score(target_test, res_test))

print(f"A MSE Train: {np.mean(cv_mse_train):.3f}±{np.std(cv_mse_train):.3f}\n"
      f"A MSE Test:  {np.mean(cv_mse_test):.3f}±{np.std(cv_mse_test):.3f}")
print(f"A R^2 Train: {np.mean(cv_r2_train):.3f}±{np.std(cv_r2_train):.3f}\n"
      f"A R^2 Test:  {np.mean(cv_r2_test):.3f}±{np.std(cv_r2_test):.3f}")

A MSE Train: 0.003±0.000
A MSE Test:  0.003±0.000
A R^2 Train: 0.832±0.005
A R^2 Test:  0.825±0.019


In [7]:
features = data.apply(extractor.extract_properties, axis=1, result_type='expand').to_numpy()
target = data['e'].to_numpy()

cv_mse_train = []
cv_mse_test = []
cv_r2_train = []
cv_r2_test = []

for train_index, test_index in KFold(n_splits=5, shuffle=True, random_state=57).split(features):
    features_train, features_test = features[train_index], features[test_index]
    target_train, target_test = target[train_index], target[test_index]

    scaler = StandardScaler().fit(features_train)

    scaled_features_train = scaler.transform(features_train)
    scaled_features_test = scaler.transform(features_test)

    svr = SVR(kernel='rbf').fit(scaled_features_train, target_train)
    res_train = svr.predict(scaled_features_train)
    res_test = svr.predict(scaled_features_test)
    cv_mse_train.append(mean_squared_error(target_train, res_train))
    cv_mse_test.append(mean_squared_error(target_test, res_test))
    cv_r2_train.append(r2_score(target_train, res_train))
    cv_r2_test.append(r2_score(target_test, res_test))

print(f"E MSE Train: {np.mean(cv_mse_train):.3f}±{np.std(cv_mse_train):.3f}\n"
      f"E MSE Test:  {np.mean(cv_mse_test):.3f}±{np.std(cv_mse_test):.3f}")
print(f"E R^2 Train: {np.mean(cv_r2_train):.3f}±{np.std(cv_r2_train):.3f}\n"
      f"E R^2 Test:  {np.mean(cv_r2_test):.3f}±{np.std(cv_r2_test):.3f}")

E MSE Train: 832.522±48.554
E MSE Test:  859.527±230.423
E R^2 Train: 0.280±0.017
E R^2 Test:  0.265±0.077
