In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split, KFold
import warnings
from numpy import savetxt
from numpy import loadtxt
warnings.filterwarnings("ignore")
import pickle

In [None]:
from sklearn.metrics import r2_score
import optuna
from sklearn.ensemble import BaggingRegressor,AdaBoostRegressor,GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
import xgboost as xgb
from sklearn.svm import SVR
from catboost import CatBoostRegressor, Pool
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
import math
import matplotlib.pyplot as plt
import shap

In [None]:
train = loadtxt('Data_Set/efficiencyTrain.csv', delimiter=',')
label = loadtxt('Data_Set/efficiencylabel.csv', delimiter=',')

In [None]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [None]:
def showResults(model, modelName,X_test,y_test):
    y_ped = model.predict(X_test)
    MSE = mean_squared_error(y_test, y_ped)
    RMSE = math.sqrt(MSE)
    resultsDict = {'R2: ':r2_score(y_test,model.predict(X_test)), 'MSE: ':MSE, 'MAE: ':mean_absolute_error(y_test,y_ped),
           'RMSE: ':RMSE}

    # open file for writing
    name = "Results/" + modelName + ".txt"
    
    # save the model to disk
    filename = "Saved_Models/" + modelName + ".sav"
    pickle.dump(model, open(filename, 'wb'))
    
    f = open(name,"w")
    # write file
    f.write( str(resultsDict) )
    # close file
    f.close()
    print(resultsDict)

In [None]:
def drawPlots(model,modelName,X_test,y_test):
    y_ped = model.predict(X_test)
    plt.scatter(y_ped,y_test,s=10,marker ="s",c=y_test,cmap=plt.get_cmap('plasma'))
    plt.plot(y_test,y_test)
    plt.xlabel('Experimental Bandgap \n a')
    plt.ylabel('Predicated Bandgap')
    plt.title(modelName + ' Bandgap')
    plt.grid(False)
    name = "Visualization/" + modelName + ".png"
    plt.savefig(name, dpi=400,transparent=True,bbox_inches = "tight")
    plt.show()

In [None]:
def trainingWithKfolds():
    params= {'iterations': 16937, 'learning_rate': 0.0802044556274633, 'depth': 7, 'random_seed': 397, 
             'metric_period': 407, 'od_wait': 148}
    k_fold=KFold(n_splits=6)
    scores = []
    true_val = []
    pred_val = []
    fig = plt.figure()
    i = 1
    for tr, tst in k_fold.split(train,label):


        model_regressor = CatBoostRegressor(**params)

        X_train, X_test, y_train, y_test = train[tr],train[tst],label[tr],label[tst]
        model_regressor.fit(X_train, y_train, 
          eval_set=(X_test, y_test),  
          use_best_model=True,  
          plot= False   
         );
        
        y_ped = model_regressor.predict(X_test)
        true_val.append(y_test)
        pred_val.append(y_ped)
        MSE = mean_squared_error(y_test, y_ped)
        RMSE = math.sqrt(MSE)
        score = r2_score(y_test,model_regressor.predict(X_test))
        scores.append(score)
        
#         plt.scatter(y_ped,y_test,s=10,marker ="s",c=y_test,cmap=plt.get_cmap('plasma'),
#                     label='Fold %d (R2 = %0.2f)' % (i,score))
        
        plt.scatter(y_ped, y_test, lw=2, alpha=0.5,cmap=plt.get_cmap('plasma'),
                label='Fold %d (R2 = %0.2f)' % (i,score))
        
        i = i+1
    print("R2 Average",np.average(scores))
    plt.plot([min(y_ped),max(y_test)], [min(y_ped),max(y_test)], 
         linestyle='--', lw=2, color='black')
    plt.xlabel('Experimental Bandgap')
    plt.ylabel('Predicated Bandgap')
    plt.title('CatBoostRegressor 6-Fold Validation')
    plt.legend()
    plt.grid(False)
    plt.savefig('5FoldsR2Efficiency.png', dpi=400,transparent=True)
    plt.show()

In [None]:
trainingWithKfolds()