In [None]:
import torch.nn as nn
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib  
import os

torch.manual_seed(1)
np.random.seed(1)
random.seed(1)


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

class FeatureDataset(Dataset):
    def __init__(self, x):
        self.x = x
    
    def __len__(self):
        return self.x.shape[0]
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.x[idx])

    def getBatch(self, idxs=[]):
        if idxs == None:
            return idxs
        else:
            x_features = []
            for i in idxs:
                x_features.append(self.__getitem__(i))
            return torch.FloatTensor(x_features)

def normalizing_data(data, seed=1):
    composition = data[['Nd-content(mol%)', 'PT-content(%)']]
    
    min_max_scaler = MinMaxScaler()
    normalized_composition = min_max_scaler.fit_transform(composition)

    scaler_save_path = 'scaler.pkl'  
    joblib.dump(min_max_scaler, scaler_save_path)  
    print(f'Scaler saved at {scaler_save_path}')

    normalized_composition_df = pd.DataFrame(normalized_composition, columns=composition.columns)

    print(normalized_composition_df)

    y = data[['d33 (pC/N)']]  
    print(y)

    x = torch.FloatTensor(normalized_composition_df.values)
    y = torch.FloatTensor(y.values)

    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()
 
    train_features, test_features, train_labels, test_labels = train_test_split(x, y, test_size=0.2, random_state=seed)
    
    return x, y, train_features, test_features, train_labels, test_labels


In [None]:
import os
import time
from bayes_opt import BayesianOptimization
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.svm import SVR
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import random
import torch

def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_random_seed(1)
starttime = datetime.datetime.now()

t = time.localtime()
model_name = 'd33_inference_SVR'
file_name = '{}.xlsx'.format(model_name)
data = pd.read_excel('data-1.xlsx')
x_all, y_all, train_features, test_features, train_labels, test_labels = normalizing_data(data, seed=1)
train_features, test_features = train_features.cpu().data.numpy(), test_features.cpu().data.numpy()
train_labels, test_labels = train_labels.cpu().data.numpy(), test_labels.cpu().data.numpy()
train_labels, test_labels = train_labels.reshape(-1), test_labels.reshape(-1) 

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))

def train_model(C, epsilon, gamma):
    params = {
        "C": C,
        "epsilon": epsilon,
        "gamma": gamma,
        "kernel": "rbf" 
    }
    model = SVR(**params)
    
    scores = cross_val_score(model, train_features, train_labels, cv=5, scoring='neg_mean_absolute_percentage_error')
    cv_scores = scores
    cv_score = -scores.mean()
    print("cv_scores (each fold):", cv_scores) 
    print("cv_score (mean):", cv_score) 

    
    model.fit(train_features, train_labels)
    y_pred_train = model.predict(train_features)
    y_pred_test = model.predict(test_features)
    train_mape = mean_absolute_percentage_error(train_labels, y_pred_train)
    test_mape = mean_absolute_percentage_error(test_labels, y_pred_test)
    print("train_mapre:", train_mape)
    print("test_mapre:", test_mape)
    error = -mean_absolute_percentage_error(test_labels, y_pred_test)
    return error
    
    
bounds = {
    'C': (1, 100), 
    'epsilon': (1, 5), 
    'gamma': (0.01, 1) 
}

optimizer = BayesianOptimization(
    f=train_model,
    pbounds=bounds,
    random_state=1,
)

optimizer.maximize(init_points=100, n_iter=150)

result_list = []
for res in optimizer.res:
    result_list.append(pd.DataFrame({'target': [res['target']],
                                     'C': [res['params']['C']],
                                     'epsilon': [res['params']['epsilon']],
                                     'gamma': [res['params']['gamma']]}))

table = pd.concat(result_list, ignore_index=True)

best_result = pd.DataFrame({'target': [optimizer.max['target']],
                            'C': [optimizer.max['params']['C']],
                            'epsilon': [optimizer.max['params']['epsilon']],
                            'gamma': [optimizer.max['params']['gamma']]})

table = pd.concat([table, best_result], ignore_index=True)

table.to_excel(file_name)
endtime = datetime.datetime.now()
print('Running time: {}'.format(endtime - starttime))
print(table)

In [None]:
import os
import time
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, r2_score
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import pickle 

def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

def ensure_directory_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

data = pd.read_excel('data-1.xlsx')
x_all, y_all, train_features, test_features, train_labels, test_labels = normalizing_data(data, seed=1)
train_features, test_features = train_features.cpu().data.numpy(), test_features.cpu().data.numpy()
train_labels, test_labels = train_labels.cpu().data.numpy(), test_labels.cpu().data.numpy()
train_labels, test_labels = train_labels.reshape(-1), test_labels.reshape(-1)

scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

results_df = pd.DataFrame(columns=['Iteration', 'target', 'R2_Score_test', 'R2_Score_train', 'Train Loss', 'Test Loss', 'Figure_Path_Test', 'Figure_Path_Train', 'Figure_Path_All', 'Loss_Path'])
set_random_seed(1)

results_dir = 'Results/STU_SVR_BO(100+150)_1'
figures_dir = os.path.join(results_dir, 'Figures')
ensure_directory_exists(results_dir)
ensure_directory_exists(figures_dir)

for mm in range(0, 251):
    set_random_seed(1)
    target = pd.read_excel('d33_inference_SVR.xlsx')
    tg = target.at[mm, 'target']
    C = target.at[mm, 'C']
    epsilon = target.at[mm, 'epsilon']
    gamma = target.at[mm, 'gamma']
    
    params = {
        'C': C,
        'epsilon': epsilon,
        'gamma': gamma,
        'kernel': 'rbf'
    }
    
    model = SVR(**params)
    model.fit(train_features, train_labels)
    
    model_filename = os.path.join(results_dir, f'{mm}-seed_1.pt')
    with open(model_filename, 'wb') as f:
        pickle.dump(model, f)
    print(f"Model saved as: {model_filename}")
    
    predict_train = model.predict(train_features)
    train_mape = mean_absolute_percentage_error(train_labels, predict_train)
    predict_test = model.predict(test_features)
    test_mape = mean_absolute_percentage_error(test_labels, predict_test)

    plt.figure()
    plt.plot([train_mape], 'bo-', label='Train MAPE')
    plt.plot([test_mape], 'ro-', label='Test MAPE')
    plt.title('MAPE during Training')
    plt.xlabel('Iterations')
    plt.ylabel('MAPE')
    plt.text(0, test_mape, f'Target={tg:.4f}', fontdict={'size': 12, 'color': 'red'})
    plt.legend()
    plt.savefig(f'{figures_dir}/{mm}_SVR_training_history.png', format='png', dpi=300)
    plt.close()
    
    df_losses = pd.DataFrame({
        'Epoch': [1],
        'Train Loss': [train_mape],
        'Test Loss': [test_mape]
    })
    excel_path = os.path.join(results_dir, f'{mm}_SVR_loss_data.xlsx')
    df_losses.to_excel(excel_path, index=False)
    
    fig_name_test = f'{figures_dir}/{mm}_SVR_experiment_vs_pred_test.png'
    fig_name_train = f'{figures_dir}/{mm}_SVR_experiment_vs_pred_train.png'
    fig_name_all = f'{figures_dir}/{mm}_SVR_experiment_vs_pred_all.png'

    plt.figure()
    sns.regplot(x=predict_test, y=test_labels, color='red')
    current_r2_test = r2_score(test_labels, predict_test)
    plt.text(min(predict_test), max(test_labels), f'R²={current_r2_test:.4f}', color='red')
    plt.title('Test Prediction vs Actual')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(fig_name_test, format='png', dpi=300)
    plt.close()
    
    plt.figure()
    sns.regplot(x=predict_train, y=train_labels, color='blue')
    current_r2_train = r2_score(train_labels, predict_train)
    plt.text(min(predict_train), max(train_labels), f'R²={current_r2_train:.4f}', color='blue')
    plt.title('Train Prediction vs Actual')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(fig_name_train, format='png', dpi=300)
    plt.close()
    
    plt.figure()
    sns.regplot(x=predict_train, y=train_labels, color='blue', label='Train')
    sns.regplot(x=predict_test, y=test_labels, color='red', label='Test')
    plt.legend()
    plt.text(min(np.concatenate([predict_train, predict_test])), max(np.concatenate([train_labels, test_labels])), f'R²={r2_score(np.concatenate([train_labels, test_labels]), np.concatenate([predict_train, predict_test])):.4f}', color='green')
    plt.title('All Prediction vs Actual')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(fig_name_all, format='png', dpi=300)
    plt.close()

    df_prediction_comparison_train = pd.DataFrame({
        'Predicted Train': predict_train.squeeze(),
        'Actual Train': train_labels.squeeze()
    })
    df_prediction_comparison_test = pd.DataFrame({
        'Predicted Test': predict_test.squeeze(),
        'Actual Test': test_labels.squeeze()
    })
    df_prediction_comparison_all = pd.DataFrame({
        'Predicted': np.concatenate([predict_train, predict_test]),
        'Actual': np.concatenate([train_labels, test_labels]),
        'Dataset': ['Train'] * len(train_labels) + ['Test'] * len(test_labels)
    })

    df_prediction_comparison_train.to_excel(os.path.join(results_dir, f'{mm}_SVR_predictions_train.xlsx'), index=False)
    df_prediction_comparison_test.to_excel(os.path.join(results_dir, f'{mm}_SVR_predictions_test.xlsx'), index=False)
    df_prediction_comparison_all.to_excel(os.path.join(results_dir, f'{mm}_SVR_predictions_all.xlsx'), index=False)

    results_df = pd.concat([results_df, pd.DataFrame([{
        'Iteration': mm,
        'target': tg,
        'R2_Score_test': current_r2_test,
        'R2_Score_train': current_r2_train,
        'Train Loss': train_mape,  
        'Test Loss': test_mape,   
        'Figure_Path_Test': fig_name_test,
        'Figure_Path_Train': fig_name_train,
        'Figure_Path_All': fig_name_all,
        'Loss_Path': excel_path
    }])], ignore_index=True)

results_summary_filename = os.path.join(results_dir, 'results_summary_SVR.csv')
results_df.to_csv(results_summary_filename, index=False)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR  
import joblib  
import pickle  
import os

Comp = pd.read_csv('prediction.csv')

scaler = joblib.load('Results/scaler.pkl')  

def normalizing_data(df_all, scaler, seed=1):
    composition = df_all[['Nd-content(mol%)', 'PT-content(%)']]  
    
    composition_normalized = scaler.transform(composition)  
    
    y = df_all[['d33(pC/N)']]  

    x_all = pd.DataFrame(composition_normalized, columns=composition.columns)
    y_all = pd.DataFrame(y.values, columns=['d33(pC/N)'])

    train_features, test_features, train_labels, test_labels = train_test_split(x_all, y_all, test_size=0.2, random_state=seed)

    return train_features, test_features, train_labels, test_labels

def SVRModel(n, seed, X_Comp, scaler):
    model_file = f'Results/STU_SVR_BO(100+150)_1/{n}-seed_{seed}.pt'
    
    with open(model_file, 'rb') as f:
        svr_model = pickle.load(f)

    print(f"Loading SVR model from file: {model_file}")

    X_Comp_filtered = X_Comp[['Nd-content(mol%)', 'PT-content(%)']]
    X_Comp_normalized = scaler.transform(X_Comp_filtered)  

    preds = svr_model.predict(X_Comp_normalized)
    return preds

i = 250 
j = 1    
X_Comp = pd.read_csv('prediction.csv')  

scaler = joblib.load('Results/scaler.pkl')  


prediction_SVR = SVRModel(i, j, X_Comp, scaler)


Comp['pred_Z_SVR'] = prediction_SVR

if not os.path.exists('Results'):
    os.makedirs('Results')

Comp.to_excel('Results/SVR_comp_prediction.xlsx', index=False)


Loading SVR model from file: Results/STU_SVR_BO(100+150)_1/250-seed_1.pt
