In [None]:
import numpy as np
from sklearn.preprocessing import scale
from sklearn.preprocessing import MinMaxScaler
import os
import warnings
warnings.filterwarnings('ignore')


importance_list=[]
first_ord=[]
score_list = []
pvalue_list =[]
mse_list = []
mae_list = []
ypred=[]
ytest=[]

import json

with open('configAMPSIT.json') as config_file:
    config = json.load(config_file)

totalhours = config['totalhours']
variables = config['variables']
regions = config['regions']
verticalmax = config['verticalmax']
totalsim = config['totalsim']
parameter_names = config['parameter_names']
output_path = config['output_pathname']
tun_iter = config['tun_iter']
    
#############################################################################################         

from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import spearmanr

def sa_randomforest(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun):
  from sklearn.ensemble import RandomForestRegressor
  from joblib import dump, load

  if tun==1:

    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score

    rf = RandomForestRegressor()

  
    params = {
        'n_estimators': (10, 30),  
        'max_depth': (2, 6),  
        'min_samples_split': (2, 10), 
        'min_samples_leaf': (1, 8),  
        'max_features': [None],  
        'bootstrap': [True],  
    }


    opt = BayesSearchCV(rf, params, n_iter=tun_iter, cv=5, n_jobs=-1)
    opt.fit(X_train, y_train)
    
    score = cross_val_score(opt, X_train, y_train, cv=5).mean()

    if os.path.exists(f'{output_path}tuning_results_rf_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_rf_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_rf_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(opt.best_params_))
        file.write("Cross-validation score: {}\n".format(score))


    best_params = opt.get_params()
    
    best_rf_params = {key.replace("estimator__", ""): value for key, value in best_params.items() if key.startswith('estimator__')}
    rf = RandomForestRegressor(**best_rf_params)

    rf.fit(X_train, y_train)
    
    y_pred = rf.predict(X_test)

    importances = rf.feature_importances_

    dump(rf, output_path+'rf_model_'+f[:-4]+'.joblib')
    
  else:
    
    if tun==2:
      loaded_model = load(output_path+'rf_model_'+f[:-4]+'.joblib')
      
      y_pred = loaded_model.predict(X_test)
      
      importances = loaded_model.feature_importances_
      
    else:            

      rf = RandomForestRegressor(n_estimators=20, max_depth=5, max_features='log2',min_samples_leaf= 1, min_samples_split= 2)
    
      rf.fit(X_train, y_train)
      
      y_pred = rf.predict(X_test)

      importances = rf.feature_importances_


  if len(importance_list) <= totalhours-1:
      importance_list.append(importances)
  else:
      importance_list = []               
  
  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value
  
  from sklearn.metrics import mean_squared_error
  mse = mean_squared_error(y_test, y_pred)
  from sklearn.metrics import mean_absolute_error
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[]


def sa_xgboost(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun):
  from xgboost import XGBRFRegressor
  from joblib import dump, load

  
  if tun==1:

    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score

    gb = XGBRFRegressor()

    params = {
        'n_estimators': (10, 30),
        'max_depth': (2, 6),
    }

    opt = BayesSearchCV(gb, params, n_iter=tun_iter, cv=5, n_jobs=-1)

    opt.fit(X_train, y_train)
    score = cross_val_score(opt, X_train, y_train, cv=5).mean()

    if os.path.exists(f'{output_path}tuning_results_xgb_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_xgb_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_xgb_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(opt.best_params_))
        file.write("Cross-validation score: {}\n".format(score))

    best_params = opt.get_params()
    gb = XGBRFRegressor(**best_params)

    gb.fit(X_train, y_train)
  
    y_pred = gb.predict(X_test)
    
    importances = gb.feature_importances_

    dump(gb, output_path+'xgb_model_'+f[:-4]+'.joblib')
    
  else:

    if tun==2:
      loaded_model = load(output_path+'xgb_model_'+f[:-4]+'.joblib')
      
      y_pred = loaded_model.predict(X_test)
      
      importances = loaded_model.feature_importances_
      
    else:        
    
      gb = XGBRFRegressor(n_estimators=20, max_depth=5, max_features='log2',min_samples_leaf= 1, min_samples_split= 2)
    
      gb.fit(X_train, y_train)
    
      y_pred = gb.predict(X_test)

      importances = gb.feature_importances_

  if len(importance_list) <= totalhours-1:
      importance_list.append(importances)
  else:
      importance_list = []               
  
  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value
  
  from sklearn.metrics import mean_squared_error
  mse = mean_squared_error(y_test, y_pred)
  from sklearn.metrics import mean_absolute_error
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[]


def sa_cart(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun):
  from sklearn.tree import DecisionTreeRegressor
  
  from joblib import dump, load
  
  if tun==1:

    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score

    dt = DecisionTreeRegressor()

    params = {
        'max_depth': (2, 6),
        'min_samples_split': (2, 10),
        'min_samples_leaf': (1, 5)
    }

    opt = BayesSearchCV(dt, params, n_iter=tun_iter, cv=5, n_jobs=-1)

    opt.fit(X_train, y_train)
    score = cross_val_score(opt, X_train, y_train, cv=5).mean()

    if os.path.exists(f'{output_path}tuning_results_cart_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_cart_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_cart_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(opt.best_params_))
        file.write("Cross-validation score: {}\n".format(score))

    best_params = opt.get_params()
    
    dt_params = {key: value for key, value in best_params.items() if key in ['max_depth', 'min_samples_split', 'min_samples_leaf']}

    dt = DecisionTreeRegressor(**dt_params)

    dt.fit(X_train, y_train)

    y_pred = dt.predict(X_test)

    importances = dt.feature_importances_

    dump(dt, output_path+'cart_model_'+f[:-4]+'.joblib')
    
  else:
    
    if tun==2:
      loaded_model = load(output_path+'cart_model_'+f[:-4]+'.joblib')
      
      y_pred = loaded_model.predict(X_test)
      
      importances = loaded_model.feature_importances_
      
    else:    
    
      dt = DecisionTreeRegressor(max_depth=5,min_samples_leaf= 1, min_samples_split= 2)
    
      dt.fit(X_train, y_train)

      y_pred = dt.predict(X_test)

      importances = dt.feature_importances_


  if len(importance_list) <= totalhours-1:
      importance_list.append(importances)
  else:
      importance_list = []               
  
  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value
  
  from sklearn.metrics import mean_squared_error
  mse = mean_squared_error(y_test, y_pred)
  from sklearn.metrics import mean_absolute_error
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[]



def sa_lassoregression(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun):
  import numpy as np
  from sklearn.linear_model import LassoCV
  from joblib import dump, load
  
  if tun==1:
    
    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score
    
    lasso_cv = LassoCV()

    params = {
        'eps': (1e-8, 1e-1, 'log-uniform'),
        'n_alphas': (50, 300),
        'tol': (1e-8, 1e-3, 'log-uniform'),
        'cv': [5, 7],
    }

    opt = BayesSearchCV(lasso_cv, params, n_iter=tun_iter, cv=5, n_jobs=-1)

    opt.fit(X_train, y_train)

    score = cross_val_score(opt, X_train, y_train, cv=5).mean()

    if os.path.exists(f'{output_path}tuning_results_lasso_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_lasso_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_lasso_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(opt.best_params_))
        file.write("Cross-validation score: {}\n".format(score))

    y_pred = opt.best_estimator_.predict(X_test)

    importances = np.abs(opt.best_estimator_.coef_)

    dump(opt, output_path+'lasso_model_'+f[:-4]+'.joblib')

  else:
  
    if tun==2:
      loaded_model = load(output_path+'lasso_model_'+f[:-4]+'.joblib')
      
      y_pred = loaded_model.predict(X_test)
      
      importances = np.abs(loaded_model.best_estimator_.coef_)
      
    else:
  
  
      lasso_cv = LassoCV(cv=5)
      
      lasso_cv.fit(X_train, y_train)                        

      y_pred = lasso_cv.predict(X_test)
      
      importances = np.abs(lasso_cv.coef_)
                      
  
       
  if len(importance_list) <= totalhours-1:
      #importance_list.append(importances)
      sum_importances = np.sum(importances, axis=0)
      normalized_importances = importances / sum_importances       
      importance_list.append(normalized_importances)
  else:
      importance_list = []

  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value
  
  mse = mean_squared_error(y_test, y_pred)
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[]

def sa_svm(X_train, X_test, y_train, y_test, problem, N, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun):
  from sklearn import svm
  from joblib import dump, load
  from sklearn.model_selection import train_test_split, cross_val_score 
  
  
  if tun==1:
    
    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score
    
    svm_model = svm.SVR()

    
    from skopt.space import Real, Categorical, Integer
    params = {
        'C': Real(1e-4, 1e+1, prior='log-uniform'),
        'kernel': Categorical(['poly']),
        'gamma': Real(1e-3, 1e+1, prior='log-uniform'), 
        'epsilon': Real(1e-4, 1e-1, prior='log-uniform'), 
        'degree': Integer(2, 6),
        'coef0': Real(0, 10),
    }


    svr = BayesSearchCV(svm_model, params, n_iter=tun_iter, cv=5, n_jobs=-1)

    svr.fit(X_train, y_train)

    score = cross_val_score(svr, X_train, y_train, cv=5).mean()

    if os.path.exists(f'{output_path}tuning_results_svm_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_svm_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_svm_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(svr.best_params_))
        file.write("Cross-validation score: {}\n".format(score))

    y_pred = svr.predict(X_test)
    coef = svr.best_estimator_.coef_[0]
    
    dump(svr, output_path+'svm_model_'+f[:-4]+'.joblib')
  
  else:
 
    if tun==2:
      svr = load(output_path+'svm_model_'+f[:-4]+'.joblib')
      
      y_pred = svr.predict(X_test)
      
      coef=svr.best_estimator_.coef_[0]
      
    else:
      
      svr = svm.SVR(kernel='linear', C=1, epsilon=0.1)
      svr.fit(X_train, y_train)
      
      y_pred = svr.predict(X_test)
      
      coef=svr.coef_[0]
  
  
  if len(importance_list) <= totalhours-1:
      sum_importances = np.sum(np.abs(coef))
      normalized_importances = coef / sum_importances      
      importance_list.append(normalized_importances)    
  else:
      importance_list = []

  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value
  mse = mean_squared_error(y_test, y_pred)
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[]


def sa_baesyanreg(X_train, X_test, y_train, y_test,problem,N, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,first_ord,f,tun):
  from sklearn.model_selection import train_test_split

  from sklearn.linear_model import BayesianRidge
  from joblib import dump, load


  if tun==1:
    
    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score
    
    br_model = BayesianRidge()

    params = {
        'n_iter': (100, 500),  
        'tol': (1e-9, 1e-3, 'log-uniform'),
        'alpha_1': (1e-10, 1e-4, 'log-uniform'),  
        'alpha_2': (1e-10, 1e-4, 'log-uniform'),  
        'lambda_1': (1e-10, 1e-4, 'log-uniform'),  
        'lambda_2': (1e-10, 1e-4, 'log-uniform'), 
        'fit_intercept': [True, False], 
    }


    br = BayesSearchCV(br_model, params, n_iter=tun_iter, cv=5, n_jobs=-1)

    br.fit(X_train, y_train)

    score = cross_val_score(br, X_train, y_train, cv=5).mean()
    
    if os.path.exists(f'{output_path}tuning_results_br_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_br_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_br_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(br.best_params_))
        file.write("Cross-validation score: {}\n".format(score))
    
    dump(br, output_path+'br_model_'+f[:-4]+'.joblib')
    y_pred = br.predict(X_test)
    
  else:    
    if tun==2:
      br = load(output_path+'br_model_'+f[:-4]+'.joblib')
      y_pred = br.predict(X_test)
    else:
      br = BayesianRidge()
      br.fit(X_train, y_train)
      
      y_pred = br.predict(X_test)


  from SALib.sample import sobol
  X_sobol = sobol.sample(problem, N, calc_second_order=True)
  from numpy import zeros
  Y_sobol = zeros((len(X_sobol), 1))
  for i in range(len(X_sobol)):
      Y_sobol[i] = br.predict(X_sobol[i].reshape(1, -1))
  Y_sobol = Y_sobol.reshape(-1)
  from SALib.analyze import sobol
  Si = sobol.analyze(problem, Y_sobol, calc_second_order=True, print_to_console=False)

  importances = Si['ST']
  first_order = Si['S1']
  interactions=Si['S2']
  np.savetxt(output_path+f'interactions_matrix_{len(importance_list)}.txt', interactions, delimiter='\t', fmt='%f')
  
  
  if len(importance_list) <= totalhours-1:
      sum_importances = np.sum(importances, axis=0)
      normalized_importances = importances / sum_importances       
      importance_list.append(normalized_importances)
      #importance_list.append(importances)
      first_ord.append(first_order)
  else:
      importance_list = []
      first_ord=[]


  from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value  
  
  mse = mean_squared_error(y_test, y_pred)
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[] 

def sa_gaussianreg(X_train, X_test, y_train, y_test,problem,N, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,first_ord,f,tun):

  from sklearn.gaussian_process import GaussianProcessRegressor
  from sklearn.gaussian_process.kernels import RBF
  from sklearn.gaussian_process.kernels import Matern
  from joblib import dump, load
  
  
  if tun==1:
    
    from skopt import BayesSearchCV
    from sklearn.model_selection import cross_val_score  
    from skopt.space import Real, Categorical, Integer
    from sklearn.gaussian_process.kernels import RBF, Matern, RationalQuadratic, Sum, Product
        
    kernel = RBF()
    gp_model = GaussianProcessRegressor(kernel=kernel)

    params = {
        'alpha': Real(1e-8, 1e+3, prior='log-uniform'),
        'n_restarts_optimizer': Integer(0, 20),
        'kernel__length_scale': Real(1e-2, 1e+2, prior='log-uniform'),
    }    

    gp = BayesSearchCV(gp_model, params, n_iter=tun_iter, cv=5,n_jobs=-1)
    gp.fit(X_train, y_train)

    score = cross_val_score(gp, X_train, y_train, cv=5).mean()

    if os.path.exists(f'{output_path}tuning_results_gp_{f[:-4]}.txt'):
        os.remove(f'{output_path}tuning_results_gp_{f[:-4]}.txt')
    with open(f'{output_path}tuning_results_gp_{f[:-4]}.txt', 'a') as file:
        file.write("Best parameters: {}\n".format(gp.best_params_))
        file.write("Cross-validation score: {}\n".format(score))
    
    dump(gp, output_path+'gp_model_'+f[:-4]+'.joblib')
    
    y_pred = gp.predict(X_test)
        
  else:  
    
    if tun==2:
      gp = load(output_path+'gp_model_'+f[:-4]+'.joblib')
      y_pred = gp.predict(X_test)
    else:
      kernel = RBF(length_scale=1.0) ##########################################################
      gp = GaussianProcessRegressor(kernel=kernel)
      gp.fit(X_train, y_train)
      
      y_pred = gp.predict(X_test)
  
  
  from SALib.sample import sobol
  X_sobol = sobol.sample(problem, N, calc_second_order=True)
  from numpy import zeros
  Y_sobol = zeros((len(X_sobol), 1))
  for i in range(len(X_sobol)):
      Y_sobol[i] = gp.predict(X_sobol[i].reshape(1, -1))
  Y_sobol = Y_sobol.reshape(-1)
  from SALib.analyze import sobol
  Si = sobol.analyze(problem, Y_sobol, calc_second_order=True, print_to_console=False)

  importances = np.where(Si['ST'] < 1, Si['ST'], np.nan)
  first_order=Si['S1']
  interactions=Si['S2']
  np.savetxt(output_path+f'interactions_matrix_{len(importance_list)}.txt', interactions, delimiter='\t', fmt='%f')
  
  if len(importance_list) <= totalhours-1:
      sum_importances = np.sum(importances, axis=0)
      normalized_importances = importances / sum_importances       
      importance_list.append(normalized_importances)
      #importance_list.append(importances)
      first_ord.append(first_order)
  else:
      importance_list = []
      first_ord=[]

  from sklearn.metrics import r2_score, mean_squared_error,mean_absolute_error
  
  spearman_corr, p_value = spearmanr(y_test, y_pred)
  score=spearman_corr
  pvalue= p_value  
  mse = mean_squared_error(y_test, y_pred)
  mae = mean_absolute_error(y_test, y_pred)
  
  if len(score_list) <= totalhours-1:
    score_list.append(score)
    pvalue_list.append(pvalue)
    mse_list.append(mse)
    mae_list.append(mae)
  else:
    score_list=[]
    pvalue_list=[]
    mse_list=[]
    mae_list=[]

  if len(ytest)<=totalhours-1:
    ytest.append(y_test)
    ypred.append(y_pred)
  else:
    ytest=[]
    ypred=[]

  

#############################################################################################
import ipywidgets as widgets
from IPython.display import display


import shutil


file_loop = "C:/Users/dario/Documents/MATLABdott/WRF_IDEAL_SIMUL/loopconfigseabreezeSCALED.json"

with open(file_loop, 'r') as json_file:
    config_data = json.load(json_file)


hour=config_data['hour'] 
n_sample=config_data['Nsobol'] #mod nparam+2 == 0
        
tun = config_data['tun']      


for meth in config_data.get("methh", []):
  for N in config_data.get("NN", []):
    for var in config_data.get("varr", []):
      for vpoint in config_data.get("vpointt", []):
        for hpoint in config_data.get("hpointt", []):

          if var >= 1 and var <= len(variables):
              nam1 = variables[var - 1]
          else:
              nam1 = 'Invalid var value'

          if hpoint >= 1 and hpoint <= len(regions):
              nam2 = regions[hpoint - 1]
          else:
              nam2 = 'Invalid hpoint value'        

          name=nam1+'_'+nam2+'_lev'+str(vpoint)
          file_list=[nam1+'_'+nam2+'_lev'+str(vpoint)+'_'+str(i)+'.txt' for i in range(1,totalhours+1)]
          
          Xnonscaled = np.loadtxt(output_path+'X.txt') 


          for file in file_list:

            f = file
            ynonscaled = np.loadtxt(output_path+file, delimiter=',')
            
            if N==int(totalsim-190):
              Xnonscaled = Xnonscaled[:int(totalsim-190),:]
              ynonscaled = ynonscaled[:int(totalsim-190)]
            elif N==int(totalsim-180):
              Xnonscaled = Xnonscaled[:int(totalsim-180),:]
              ynonscaled = ynonscaled[:int(totalsim-180)]
            elif N==int(totalsim-170):
              Xnonscaled = Xnonscaled[:int(totalsim-170),:]
              ynonscaled = ynonscaled[:int(totalsim-170)]
            elif N==int(totalsim-160):
              Xnonscaled = Xnonscaled[:int(totalsim-160),:]
              ynonscaled = ynonscaled[:int(totalsim-160)] 
            elif N==int(totalsim-150):
              Xnonscaled = Xnonscaled[:int(totalsim-150),:]
              ynonscaled = ynonscaled[:int(totalsim-150)]
            elif N==int(totalsim-140):
              Xnonscaled= Xnonscaled[:int(totalsim-140),:]
              ynonscaled = ynonscaled[:int(totalsim-140)]
            elif N==int(totalsim-130):
              Xnonscaled= Xnonscaled[:int(totalsim-130),:]
              ynonscaled = ynonscaled[:int(totalsim-130)]
            elif N==int(totalsim-120):
              Xnonscaled= Xnonscaled[:int(totalsim-120),:]
              ynonscaled = ynonscaled[:int(totalsim-120)]
            elif N==int(totalsim-110):
              Xnonscaled= Xnonscaled[:int(totalsim-110),:]
              ynonscaled = ynonscaled[:int(totalsim-110)]
            elif N==int(totalsim-100):
              Xnonscaled= Xnonscaled[:int(totalsim-100),:]
              ynonscaled = ynonscaled[:int(totalsim-100)]            
            if N==int(totalsim-90):
              Xnonscaled = Xnonscaled[:int(totalsim-90),:]
              ynonscaled = ynonscaled[:int(totalsim-90)]
            elif N==int(totalsim-80):
              Xnonscaled = Xnonscaled[:int(totalsim-80),:]
              ynonscaled = ynonscaled[:int(totalsim-80)]
            elif N==int(totalsim-70):
              Xnonscaled = Xnonscaled[:int(totalsim-70),:]
              ynonscaled = ynonscaled[:int(totalsim-70)]
            elif N==int(totalsim-60):
              Xnonscaled = Xnonscaled[:int(totalsim-60),:]
              ynonscaled = ynonscaled[:int(totalsim-60)] 
            elif N==int(totalsim-50):
              Xnonscaled = Xnonscaled[:int(totalsim-50),:]
              ynonscaled = ynonscaled[:int(totalsim-50)]
            elif N==int(totalsim-40):
              Xnonscaled= Xnonscaled[:int(totalsim-40),:]
              ynonscaled = ynonscaled[:int(totalsim-40)]
            elif N==int(totalsim-30):
              Xnonscaled= Xnonscaled[:int(totalsim-30),:]
              ynonscaled = ynonscaled[:int(totalsim-30)]
            elif N==int(totalsim-20):
              Xnonscaled= Xnonscaled[:int(totalsim-20),:]
              ynonscaled = ynonscaled[:int(totalsim-20)]
            elif N==int(totalsim-10):
              Xnonscaled= Xnonscaled[:int(totalsim-10),:]
              ynonscaled = ynonscaled[:int(totalsim-10)]
            elif N==int(totalsim-0):
              Xnonscaled= Xnonscaled[:int(totalsim-0),:]
              ynonscaled = ynonscaled[:int(totalsim-0)]
            

            y=ynonscaled
            X=Xnonscaled            
            
            
            from sklearn.model_selection import train_test_split
            
            partition=0.3
            
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=partition, random_state=42)

            from sklearn.preprocessing import StandardScaler
            from joblib import dump

            ##########################
            ########

            scalerX = StandardScaler()
            X_train = scalerX.fit_transform(X_train)
            X_test = scalerX.transform(X_test)

            scalery = StandardScaler()
            y_train = scalery.fit_transform(y_train.reshape(-1, 1)).ravel()
            y_test = scalery.transform(y_test.reshape(-1, 1)).ravel()

            
            ''
            Xlow = np.min(X_train, axis=0)
            Xup = np.max(X_train, axis=0)
            Nn = n_sample 
            bounds = [(Xlow[i], Xup[i]) for i in range(Xlow.shape[0])]
            problem = {'num_vars': X.shape[1], 'names': parameter_names, 'bounds': bounds}            
            ''
            
            
            if meth==1:
              Si = sa_randomforest(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun)
              method='randomforest'
            elif meth==2:
              Si = sa_lassoregression(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun)
              method='lasso'
            elif meth==3:
              Si = sa_svm(X_train, X_test, y_train, y_test,problem,Nn, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun)
              method='svm'
            elif meth==4:        
              Si = sa_baesyanreg(X_train, X_test, y_train, y_test,problem,Nn, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,first_ord,f,tun)
              method='br'
            elif meth==5:
              Si = sa_gaussianreg(X_train, X_test, y_train, y_test,problem,Nn, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,first_ord,f,tun)
              method='gp'
            elif meth==6:
              Si = sa_xgboost(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun)
              method='xgboost'              
            elif meth==7:
              Si = sa_cart(X_train, X_test, y_train, y_test, importance_list,score_list,pvalue_list,mse_list,mae_list,ytest,ypred,f,tun)
              method='cart'
              
                          
            data = np.array([score_list, pvalue_list, mse_list, mae_list]).T
            import pandas as pd
            df = pd.DataFrame(data, columns=['score', 'pvalue' , 'mse', 'mae'])
            importance_df = pd.DataFrame(importance_list)
            if hour==totalhours:
              importance_df.to_csv(output_path+'importance'+method+str(N)+file[:-7]+'.txt', header=False, index=False, sep=' ')
              df.to_csv(output_path+'df'+method+str(N)+file[:-7]+'.txt', header=False, index=False, sep=' ')
              yt=np.array(ytest)
              yp=np.array(ypred)
              
          
          importance_list=[]
          first_ord=[]
          score_list = []
          pvalue_list =[]
          mse_list = []
          mae_list = []
          ypred=[]
          ytest=[]
