In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split, KFold
import warnings
from numpy import savetxt
from numpy import loadtxt
warnings.filterwarnings("ignore")
import pickle

In [None]:
from sklearn.metrics import r2_score
import optuna
from sklearn.ensemble import BaggingRegressor,AdaBoostRegressor,GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
import xgboost as xgb
from sklearn.svm import SVR
from catboost import CatBoostRegressor, Pool
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
import math
import matplotlib.pyplot as plt
import shap

In [None]:
train = loadtxt('Data_Set/efficiencyTrain.csv', delimiter=',')
label = loadtxt('Data_Set/efficiencylabel.csv', delimiter=',')

In [None]:
train[0]

In [None]:
label[0]

In [None]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="6"

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train, label, test_size=0.15, random_state=42)

In [None]:
def CatBoostRegressorTraining():
    params =  {'iterations': 8540, 'learning_rate': 0.09176035707878118, 'depth': 15,
              'random_seed': 352, 'metric_period': 245, 'od_wait': 144}
    
    xgb_reg = CatBoostRegressor(**params)
    xgb_reg.fit(X_train, y_train)

    return xgb_reg

In [None]:
def AdaBoostRegressorTraining():
    params = {'n_estimators': 3, 'learning_rate': 0.06037559584802324, 'random_state': 74}
    
    xgb_reg = AdaBoostRegressor(**params)
    xgb_reg.fit(X_train, y_train)

    return xgb_reg

In [None]:
def KNeighborsRegressorTraining():
    params = {'n_neighbors': 16, 'leaf_size': 1, 'p': 1, 'n_jobs': 4}
    
    xgb_reg = KNeighborsRegressor(**params)
    xgb_reg.fit(X_train, y_train)

    return xgb_reg

In [None]:
def RandomForestRegressorTraining():
    params = {'criterion': 'squared_error', 'n_estimators': 89, 'min_samples_leaf': 0.0477528566476102,
              'min_samples_split': 0.3493938886646444, 'min_weight_fraction_leaf': 0.07364614448551399,
              'max_depth': 11, 'n_jobs': 2,'ccp_alpha': 0.6192017360171039}
    
    xgb_reg = RandomForestRegressor(**params)
    xgb_reg.fit(X_train, y_train)

    return xgb_reg

In [None]:
def SVRTraining():
    params = {'coef0': 9.775132889897574, 'tol': 0.006493993396391401, 'epsilon': 0.10002960976094372,
              'C': 9.885891357520826, 'degree': 4, 'max_iter': 5,'cache_size': 190}
    
    xgb_reg = SVR(**params)
    xgb_reg.fit(X_train, y_train)

    return xgb_reg

In [None]:
def GradientBoostingRegressorTraining():
    params = {'learning_rate': 0.06708983968051184, 'alpha': 0.8570822543281797, 'loss': 'absolute_error',
              'criterion': 'friedman_mse', 'n_estimators': 489, 'min_samples_leaf': 0.20044756670461691,
              'min_samples_split': 0.2119159916298289, 'min_weight_fraction_leaf': 0.2184570930183031, 'max_depth': 42,
              'min_impurity_decrease': 0.08752324756334523}
    
    xgb_reg = GradientBoostingRegressor(**params)
    xgb_reg.fit(X_train, y_train)

    return xgb_reg

In [None]:
def showResults(model, modelName):
    y_ped = model.predict(X_test)
    MSE = mean_squared_error(y_test, y_ped)
    RMSE = math.sqrt(MSE)
    resultsDict = {'R2: ':r2_score(y_test,model.predict(X_test)), 'MSE: ':MSE, 'MAE: ':mean_absolute_error(y_test,y_ped),
           'RMSE: ':RMSE}

    # open file for writing
    name = "Results/" + modelName + ".txt"
    
    # save the model to disk
    filename = "Saved_Models/" + modelName + ".sav"
    pickle.dump(model, open(filename, 'wb'))
    
    f = open(name,"w")
    # write file
    f.write( str(resultsDict) )
    # close file
    f.close()
    print(resultsDict)

In [None]:
def drawPlots(model,modelName):
    y_ped = model.predict(X_test)
    plt.scatter(y_ped,y_test,s=10,marker ="s",c=y_test,cmap=plt.get_cmap('plasma'))
    plt.plot(y_test,y_test)
    plt.xlabel('Experimental Efficiency \n b')
    plt.ylabel('Predicated Efficiency')
    plt.title(modelName + ' Efficiency')
    plt.grid(False)
    name = "Visualization/" + modelName + "Efficiency.png"
    plt.savefig(name, dpi=400,transparent=True,bbox_inches = "tight")
    plt.show()

In [None]:
def drawShapAnanlysis(model,modelName,features):
    plt.clf()
    explainer = shap.Explainer(model,feature_names=features)
    shap_values = explainer(X_train)
#     shap.plots.bar(shap_values)
    shap.plots.beeswarm(shap_values)

#     fig.savefig("shap.png", dpi=400,transparent=True,bbox_inches = "tight")
#     plt.savefig('shap.png')


In [None]:
# 

In [None]:
features=['Cs', 'FA', 'MA','Pb', 'Sn','Br', 'I','Thickness']

In [None]:
model = CatBoostRegressorTraining()
showResults(model,"CatBoostRegressorEfficiency")
drawPlots(model,"CatBoostRegressor")
# drawShapAnanlysis(model,"CatBoostRegressor",features)

In [None]:
# drawShapAnanlysis(model,"CatBoostRegressor",features)

In [None]:
model = AdaBoostRegressorTraining()
showResults(model,"AdaBoostRegressorEfficiency")
drawPlots(model,"AdaBoostRegressor")

In [None]:
model = KNeighborsRegressorTraining()
showResults(model,"KNeighborsRegressorEfficiency")
drawPlots(model,"KNeighborsRegressor")

In [None]:
model = RandomForestRegressorTraining()
showResults(model,"RandomForestRegressorEfficiency")
drawPlots(model,"RandomForestRegressor")

In [None]:
model = SVRTraining()
showResults(model,"SVREfficiency")
drawPlots(model,"SVR")

In [None]:
model = GradientBoostingRegressorTraining()
showResults(model,"GradientBoostingRegressorEfficiency")
drawPlots(model,"GradientBoostingRegressor")

In [None]:
# load the model from disk
# filename = "Saved_Models/" + "CatBoostRegressor" + ".sav"
# loaded_model = pickle.load(open(filename, 'rb'))