In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import warnings
warnings.filterwarnings('ignore')

from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from scipy.stats import shapiro,f_oneway,kruskal

data = pd.read_csv("Electrical Grid.csv")

data_x = data.drop(['stab','stabf'],axis=1)
data_y = data['stab']

scaler = MinMaxScaler()
scaled_x = scaler.fit_transform(data_x)
scaled_x = pd.DataFrame(scaled_x, columns=data_x.columns)

In [2]:
depth_data = pd.read_csv("depth.csv")
tree_data = pd.read_csv("tree.csv")
max_feature_data = pd.read_csv("max feature.csv")
bootstrap_size_data = pd.read_csv("bootstrap size.csv")
batch_data = pd.read_csv("batch size.csv")
iteration_data = pd.read_csv("iteration.csv")

best_parameter = pd.read_csv("bast parameter.csv")
anova_data = pd.read_csv("anova result.csv")

In [3]:
def normal_test(data):
    test_stat,p_value = shapiro(data)
    return np.round(p_value,3)

In [4]:
rotf_paremater_set = {"trees":[100,300,500,700],"sampling_sizes":[0.5,0.7,0.9],'depths':[10,20,30,40,50]}

# RotF

In [5]:
def get_random_subset(iterable,k):
    subsets = []
    iteration = 0
    np.random.shuffle(iterable)
    subset = 0
    limit = len(iterable)/k
    while iteration < limit:
        if k <= len(iterable):
            subset = k
        else:
            subset = len(iterable)
        subsets.append(iterable[-subset:])
        del iterable[-subset:]
        iteration+=1
    return subsets

def Rotation_Forest(X , Y, test_x, max_depth, size, n_trees, k):
  r_matrices , models = [],[]

  for tree in range(n_trees):
    feature_index = list(range(X.shape[1]))
    k_subset = get_random_subset(feature_index,k) #每個子集有k個特徵，每個子集特徵不重複 #將訓練集中的屬性拆分為大小相等的 K 個非重疊子集。
    rotation_matrix = np.zeros((X.shape[1],X.shape[1]),dtype=float) #591*591大小的矩陣
    
    for each_subset in k_subset:
      pca = PCA()
      X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=size)
      X_subset = X_train.iloc[:,each_subset]
      pca.fit(X_subset)
      for i in range(0,len(pca.components_)):
        for j in range(0,len(pca.components_)):
          rotation_matrix[each_subset[i],each_subset[j]] = pca.components_[i,j]

    x_transformed = X.dot(rotation_matrix)
    model = DecisionTreeRegressor(max_depth = max_depth)
    model.fit(x_transformed,Y)
    models.append(model) #存放每個樹的模型
    r_matrices.append(rotation_matrix) #存放每個樹的旋轉矩陣

  return models,r_matrices

def model_predict(models,r_matrices,x):
    predicted_ys = []
    for i,model in enumerate(models):
        x_mod =  x.dot(r_matrices[i])  
        predicted_y = model.predict(x_mod)
        predicted_ys.append(predicted_y)
        
    final_results = []
    for i in range(len(predicted_ys[0])): #總共要預測的Y有幾個 #2000個預測值
        predict_result = [] #存放每棵樹的預測值 #100棵樹有100個預測值
        for j in range(len(predicted_ys)): #每棵樹的預測值 #100棵樹
            predict_result.append( (predicted_ys[j][i]) )
        final_results.append(np.mean(predict_result))

    return final_results

def RotF_kfold(tree,depth,max_sample,seed):
    RMSE_set_rotf = []
    kf = KFold(n_splits=10, shuffle=True, random_state=seed)
    for train_index, valid_index in kf.split(scaled_x, data_y): # split()  method generate indices to split data intSo training and test set.
        #print('fold',cnt)
        train_x = scaled_x.iloc[train_index,:]
        train_y = data.iloc[train_index,:]['stab']
        valid_x = scaled_x.iloc[valid_index,:]
        valid_y = data.iloc[valid_index,:]['stab']
        models,r_matrices = Rotation_Forest(X=train_x,Y=train_y,test_x=valid_x, max_depth= depth, size= max_sample, n_trees=tree, k =3 )
        rot_pred = model_predict(models,r_matrices,valid_x)
        RMSE_set_rotf.append( np.sqrt( mean_squared_error(valid_y,rot_pred)))

    mean_rmse = np.mean(RMSE_set_rotf)
    return RMSE_set_rotf

In [6]:
num = 1

for param_name, parameter in rotf_paremater_set.items():
    print('================',param_name,'================')
    if param_name == 'trees':
        tree_rmse  = []
        best_rmse,best_para = 100,1000
        min_normality_p = 100
        for para in parameter:
            parm_set_2 = RotF_kfold(tree=para,depth=None,max_sample=0.7,seed = 47 ) #23 31 47 52
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的樹參數
            if np.mean(parm_set_2) < best_rmse:
              best_tree = para
              best_rmse = np.mean(parm_set_2)
            tree_rmse.append(parm_set_2)
            
            # 紀錄每個參數的常態檢定p-value
            tree_data[str(para)].iloc[num] = normal_test(parm_set_2)

            if normal_test(parm_set_2) < min_normality_p:
              min_normality_p = normal_test(parm_set_2)

        if min_normality_p < 0.05 :
          test_stat,anova_p = kruskal(tree_rmse[0],tree_rmse[1],tree_rmse[2],tree_rmse[3])
          print('kruskal_oneway:',test_stat,anova_p)              
        else:
          test_stat,anova_p = f_oneway(tree_rmse[0],tree_rmse[1],tree_rmse[2],tree_rmse[3])
          print('f_oneway:',test_stat,anova_p)

        anova_data['trees'].iloc[num] = np.round(anova_p,3)    # 紀錄該參數的anova result
        best_parameter['trees'].iloc[num] = best_tree   # 紀錄該參數的最佳參數
        tree_result = [np.mean(tree_rmse[0]),np.mean(tree_rmse[1]),np.mean(tree_rmse[2]),np.mean(tree_rmse[3])]   # 紀錄每個參數之平均rmse

########################################################################################################################################

    elif param_name == 'sampling_sizes':
        sample_rmse = []
        best_rmse,best_para = 100,1000
        min_normality_p = 100
        for para in parameter:
            parm_set_2 = RotF_kfold(tree=300,depth=20,max_sample=para,seed = 47)
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的參數
            if np.mean(parm_set_2) < best_rmse:
              best_para = para
              best_rmse = np.mean(parm_set_2)
            sample_rmse.append(parm_set_2)

            # 紀錄每個參數的常態檢定p-value
            bootstrap_size_data[str(para)].iloc[num] = normal_test(parm_set_2)

            if normal_test(parm_set_2) < min_normality_p:
              min_normality_p = normal_test(parm_set_2)
              
        if min_normality_p < 0.05 :
          test_stat,anova_p = kruskal(sample_rmse[0],sample_rmse[1],sample_rmse[2])
          print('kruskal_oneway:',test_stat,anova_p)              
        else:
          test_stat,anova_p = f_oneway(sample_rmse[0],sample_rmse[1],sample_rmse[2])
          print('f_oneway:',test_stat,anova_p)

        anova_data['bootstrap size'].iloc[num] = np.round(anova_p,3)

        best_parameter['bootstrap size'].iloc[num] = best_para   # 紀錄該參數的最佳參數
        bootstrap_result = [np.mean(sample_rmse[0]),np.mean(sample_rmse[1]),np.mean(sample_rmse[2])]  # 紀錄每個參數之平均rmse

########################################################################################################################################

    elif param_name == 'depths':
        depth_rmse = []
        best_rmse,best_para = 100,1000
        for para in parameter:
            parm_set_2 = RotF_kfold(tree=300,depth=para,max_sample=0.7,seed = 47)
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的參數
            if np.mean(parm_set_2) < best_rmse:
              best_para = para
              best_rmse = np.mean(parm_set_2)
            depth_rmse.append(parm_set_2)

            # 紀錄每個參數的常態檢定p-value
            depth_data[str(para)].iloc[num] = normal_test(parm_set_2)

        print(f_oneway(depth_rmse[0],depth_rmse[1],depth_rmse[2],depth_rmse[3],depth_rmse[4]))

        # 紀錄該參數的anova result
        test_stat,anova_p = f_oneway(depth_rmse[0],depth_rmse[1],depth_rmse[2],depth_rmse[3],depth_rmse[4])
        anova_data['max depth'].iloc[num] = np.round(anova_p,3)

        # 紀錄該參數的最佳參數
        best_parameter['max depth'].iloc[num] = best_para

        # 紀錄每個參數之平均rmse
        depth_result = [np.mean(depth_rmse[0]),np.mean(depth_rmse[1]),np.mean(depth_rmse[2]),np.mean(depth_rmse[3]),np.mean(depth_rmse[4])]
        
print(tree_result)
print(bootstrap_result)
print(depth_result)

100 0.689 [0.011987311248332732, 0.011994282619423263, 0.012156725625959028, 0.011741502520151673, 0.011802461165136769, 0.011290746936678092, 0.012265751589777264, 0.01221038428796153, 0.0118329477529619, 0.011606115711842988]
300 0.522 [0.011790129811796238, 0.011879722998698933, 0.011791229222268525, 0.011585420298305242, 0.011720585062500908, 0.011152059870397657, 0.012130149457771888, 0.01210696968114149, 0.011791788658921783, 0.01147863066979329]
500 0.449 [0.011844433450245558, 0.011901014538889731, 0.011762945102513061, 0.011605731212221145, 0.011690586440036968, 0.01100501485885767, 0.012175693041185579, 0.012062931350984228, 0.011699406362178082, 0.01151764711023444]
700 0.809 [0.01181246107052202, 0.011878597762751008, 0.011771811131557501, 0.011507374292432546, 0.01175684651615985, 0.01109771748342525, 0.012135258348796073, 0.01199898043812453, 0.011581845693150353, 0.01148579264447762]
f_oneway: 0.7723198627800278 0.517110938389744
0.5 0.572 [0.011739174604965565, 0.011730

# TWRotF

In [7]:
def weighted_average(prediction_set,oobrmse):
    final_results = []
    for i in range(len(prediction_set[0])): #總共要預測的Y有幾個 #2000個預測值
        predict_result = [] #存放每棵樹的預測值 #100棵樹有100個預測值
        final_result = 0
        for j in range(len(prediction_set)): #每棵樹的預測值 #100棵樹
            predict_result.append( (prediction_set[j][i]) )

        for k in range(len(predict_result)): #加權預測值
            final_result =  final_result + predict_result[k] * oobrmse[k]
        final_results.append(final_result)
    return final_results

def Tree_Weighting_Rotation_Forest(X , Y, test_x, max_depth, size, n_trees, k):
    strength_set = []
    Prediction_set = []
    OOB_MSE = []
    r_matrices , models = [],[]
    for tree in range(n_trees):
        feature_index = list(range(X.shape[1]))
        k_subset = get_random_subset(feature_index,k) #每個子集有k個特徵，每個子集特徵不重複 #將訓練集中的屬性拆分為大小相等的 K 個非重疊子集。
        rotation_matrix = np.zeros((X.shape[1],X.shape[1]),dtype=float) #591*591大小的矩陣
        X_train, X_valid, y_train, y_valid = train_test_split(X, Y, train_size = size)

        for each_subset in k_subset:
            pca = PCA()
            x_train,_,_,_ = train_test_split(X_train, y_train, train_size = 0.7)
            X_subset = x_train.iloc[:,each_subset]
            pca.fit(X_subset)
            for i in range(0,len(pca.components_)):
                for j in range(0,len(pca.components_)):
                    rotation_matrix[ each_subset[i],each_subset[j] ] = pca.components_[i,j]

        x_transformed = X_train.dot(rotation_matrix)
        model = DecisionTreeRegressor(max_depth = max_depth).fit(x_transformed,y_train)

        x_valid_transformed = X_valid.dot(rotation_matrix)
        valid_prediction = model.predict(x_valid_transformed)

        models.append(model) #存放每個樹的模型
        r_matrices.append(rotation_matrix) #存放每個樹的旋轉矩陣

        OOB_MSE.append(mean_squared_error(y_valid,valid_prediction) )
        
    oob_mse_prop = OOB_MSE/np.sum(OOB_MSE)
    
    predicted_ys = [] #測試階段預測
    for i,model in enumerate(models): 
        x_mod =  test_x.dot(r_matrices[i])  
        predicted_y = model.predict(x_mod)
        predicted_ys.append(predicted_y)

    weighted_result = weighted_average(predicted_ys,oob_mse_prop)
    return weighted_result

def TWRotF_kfold(tree,depth,max_sample,seed):
    RMSE_set_twrotf = []
    kf = KFold(n_splits=10, shuffle=True, random_state=seed)
    for train_index, valid_index in kf.split(scaled_x, data_y): # split()  method generate indices to split data intSo training and test set.
        #print('fold',cnt)
        train_x = scaled_x.iloc[train_index,:]
        train_y = data.iloc[train_index,:]['stab']
        valid_x = scaled_x.iloc[valid_index,:]
        valid_y = data.iloc[valid_index,:]['stab']
        TWRotF_pred = Tree_Weighting_Rotation_Forest(X=train_x,Y=train_y,test_x=valid_x, max_depth= depth, size= max_sample, n_trees=tree, k =3 )
        RMSE_set_twrotf.append( np.sqrt( mean_squared_error(valid_y,TWRotF_pred)))

    mean_rmse = np.mean(RMSE_set_twrotf)
    return RMSE_set_twrotf

In [9]:
num = 7

for param_name, parameter in rotf_paremater_set.items():
    print('================',param_name,'================')
    if param_name == 'trees':
        tree_rmse  = []
        best_rmse,best_para = 100,1000
        min_normality_p = 100
        for para in parameter:
            parm_set_2 = TWRotF_kfold(tree=para,depth=None,max_sample=0.7,seed = 47 ) #23 31 47 52
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的樹參數
            if np.mean(parm_set_2) < best_rmse:
              best_tree = para
              best_rmse = np.mean(parm_set_2)
            tree_rmse.append(parm_set_2)
            
            # 紀錄每個參數的常態檢定p-value
            tree_data[str(para)].iloc[num] = normal_test(parm_set_2)

            if normal_test(parm_set_2) < min_normality_p:
              min_normality_p = normal_test(parm_set_2)

        if min_normality_p < 0.05 :
          test_stat,anova_p = kruskal(tree_rmse[0],tree_rmse[1],tree_rmse[2],tree_rmse[3])
          print('kruskal_oneway:',test_stat,anova_p)              
        else:
          test_stat,anova_p = f_oneway(tree_rmse[0],tree_rmse[1],tree_rmse[2],tree_rmse[3])
          print('f_oneway:',test_stat,anova_p)

        anova_data['trees'].iloc[num] = np.round(anova_p,3)    # 紀錄該參數的anova result
        best_parameter['trees'].iloc[num] = best_tree   # 紀錄該參數的最佳參數
        tree_result = [np.mean(tree_rmse[0]),np.mean(tree_rmse[1]),np.mean(tree_rmse[2]),np.mean(tree_rmse[3])]   # 紀錄每個參數之平均rmse

########################################################################################################################################

    elif param_name == 'sampling_sizes':
        sample_rmse = []
        best_rmse,best_para = 100,1000
        min_normality_p = 100
        for para in parameter:
            parm_set_2 = TWRotF_kfold(tree=300,depth=20,max_sample=para,seed = 47)
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的參數
            if np.mean(parm_set_2) < best_rmse:
              best_para = para
              best_rmse = np.mean(parm_set_2)
            sample_rmse.append(parm_set_2)

            # 紀錄每個參數的常態檢定p-value
            bootstrap_size_data[str(para)].iloc[num] = normal_test(parm_set_2)

            if normal_test(parm_set_2) < min_normality_p:
              min_normality_p = normal_test(parm_set_2)
              
        if min_normality_p < 0.05 :
          test_stat,anova_p = kruskal(sample_rmse[0],sample_rmse[1],sample_rmse[2])
          print('kruskal_oneway:',test_stat,anova_p)              
        else:
          test_stat,anova_p = f_oneway(sample_rmse[0],sample_rmse[1],sample_rmse[2])
          print('f_oneway:',test_stat,anova_p)

        anova_data['bootstrap size'].iloc[num] = np.round(anova_p,3)

        best_parameter['bootstrap size'].iloc[num] = best_para   # 紀錄該參數的最佳參數
        bootstrap_result = [np.mean(sample_rmse[0]),np.mean(sample_rmse[1]),np.mean(sample_rmse[2])]  # 紀錄每個參數之平均rmse

########################################################################################################################################

    elif param_name == 'depths':
        depth_rmse = []
        best_rmse,best_para = 100,1000
        for para in parameter:
            parm_set_2 = TWRotF_kfold(tree=300,depth=para,max_sample=0.7,seed = 47)
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的參數
            if np.mean(parm_set_2) < best_rmse:
              best_para = para
              best_rmse = np.mean(parm_set_2)
            depth_rmse.append(parm_set_2)

            # 紀錄每個參數的常態檢定p-value
            depth_data[str(para)].iloc[num] = normal_test(parm_set_2)

        print(f_oneway(depth_rmse[0],depth_rmse[1],depth_rmse[2],depth_rmse[3],depth_rmse[4]))

        # 紀錄該參數的anova result
        test_stat,anova_p = f_oneway(depth_rmse[0],depth_rmse[1],depth_rmse[2],depth_rmse[3],depth_rmse[4])
        anova_data['max depth'].iloc[num] = np.round(anova_p,3)

        # 紀錄該參數的最佳參數
        best_parameter['max depth'].iloc[num] = best_para

        # 紀錄每個參數之平均rmse
        depth_result = [np.mean(depth_rmse[0]),np.mean(depth_rmse[1]),np.mean(depth_rmse[2]),np.mean(depth_rmse[3]),np.mean(depth_rmse[4])]
        
print(tree_result)
print(bootstrap_result)
print(depth_result)

100 0.722 [0.01274842327668985, 0.01245300873637978, 0.01237178823108606, 0.012278316821634458, 0.012314440066182875, 0.011804550530478597, 0.012737580177606714, 0.0128233151131707, 0.012188568253804061, 0.01203662807516986]
300 0.954 [0.012476672063913317, 0.012314374258830717, 0.012359000162953365, 0.011942074029665595, 0.012161392338562359, 0.011664732622128908, 0.012690788950087224, 0.012513426823061564, 0.012134316302049232, 0.011870465316904704]
500 0.895 [0.012337093847375311, 0.012369026792481017, 0.012384812407393272, 0.012040163173408096, 0.012189967442361883, 0.011640540999445913, 0.012648125405935686, 0.012559175678011007, 0.012088467129180557, 0.011861593074169676]
700 0.845 [0.012327273710538406, 0.01229307864497304, 0.012233705177152584, 0.012117024270366395, 0.012201711706010634, 0.011614434491118285, 0.012664704575175338, 0.01257028063958359, 0.012100300528311105, 0.01187650672620669]
f_oneway: 0.703864943673035 0.5559642710439059
0.5 0.987 [0.012953754293490272, 0.012

# SRotF

In [10]:
def Strength_Rotation_Forest(X , Y, test_x, max_depth, size, n_trees, k):
    strength_set = []
    Prediction_set = []
    r_matrices , models = [],[]
    for tree in range(n_trees):
        feature_index = list(range(X.shape[1]))
        k_subset = get_random_subset(feature_index,k) #每個子集有k個特徵，每個子集特徵不重複 #將訓練集中的屬性拆分為大小相等的 K 個非重疊子集。
        rotation_matrix = np.zeros((X.shape[1],X.shape[1]),dtype=float) #591*591大小的矩陣
        X_train, X_valid, y_train, y_valid = train_test_split(X, Y, train_size = size)

        for each_subset in k_subset:
            pca = PCA()
            x_train,_,_,_ = train_test_split(X_train, y_train, train_size = 0.7)
            X_subset = x_train.iloc[:,each_subset]
            pca.fit(X_subset)
            for i in range(0,len(pca.components_)):
                for j in range(0,len(pca.components_)):
                    rotation_matrix[ each_subset[i],each_subset[j] ] = pca.components_[i,j]

        x_transformed = X_train.dot(rotation_matrix)
        model = DecisionTreeRegressor(max_depth = max_depth).fit(x_transformed,y_train)

        x_valid_transformed = X_valid.dot(rotation_matrix)
        valid_prediction = model.predict(x_valid_transformed)

        models.append(model) #存放每個樹的模型
        r_matrices.append(rotation_matrix) #存放每個樹的旋轉矩陣

        confidence = []
        margin = np.abs(valid_prediction - y_valid)
        for j in range(len(margin)):
          confidence.append(1/ math.exp(margin.values[j]))
        strength = np.sum(confidence)/len(confidence)
        strength_set.append(strength)
    
    predicted_ys = [] #測試階段預測
    for i,model in enumerate(models): 
        x_mod =  test_x.dot(r_matrices[i])  
        predicted_y = model.predict(x_mod)
        predicted_ys.append(predicted_y)
    
    final_result = []
    for i in range(len(predicted_ys[0])):
      predict_result = [] #存放每棵樹的預測值 #100棵樹有100個預測值
      for j in range(len(predicted_ys)):
        predict_result.append( (predicted_ys[j][i]) )
      strength_predict_result = np.array(predict_result) * np.array(strength_set) #每顆樹的預測值*每個樹的strength
      final_result.append( np.mean(strength_predict_result)  ) #存放最後的2000個預測值

    return final_result

def SRotF_kfold(tree,depth,max_sample,seed):
    RMSE_set_srotf = []
    kf = KFold(n_splits=10, shuffle=True, random_state=seed)
    for train_index, valid_index in kf.split(scaled_x, data_y): # split()  method generate indices to split data intSo training and test set.
        #print('fold',cnt)
        train_x = scaled_x.iloc[train_index,:]
        train_y = data.iloc[train_index,:]['stab']
        valid_x = scaled_x.iloc[valid_index,:]
        valid_y = data.iloc[valid_index,:]['stab']
        SRotF_pred = Strength_Rotation_Forest(X=train_x,Y=train_y,test_x=valid_x, max_depth= depth, size= max_sample, n_trees=tree, k =3 )
        RMSE_set_srotf.append( np.sqrt( mean_squared_error(valid_y,SRotF_pred)))

    mean_rmse = np.mean(RMSE_set_srotf)
    return RMSE_set_srotf

In [11]:
num = 4

for param_name, parameter in rotf_paremater_set.items():
    print('================',param_name,'================')
    if param_name == 'trees':
        tree_rmse  = []
        best_rmse,best_para = 100,1000
        min_normality_p = 100
        for para in parameter:
            parm_set_2 = SRotF_kfold(tree=para,depth=None,max_sample=0.7,seed = 47 ) #23 31 47 52
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的樹參數
            if np.mean(parm_set_2) < best_rmse:
              best_tree = para
              best_rmse = np.mean(parm_set_2)
            tree_rmse.append(parm_set_2)
            
            # 紀錄每個參數的常態檢定p-value
            tree_data[str(para)].iloc[num] = normal_test(parm_set_2)

            if normal_test(parm_set_2) < min_normality_p:
              min_normality_p = normal_test(parm_set_2)

        if min_normality_p < 0.05 :
          test_stat,anova_p = kruskal(tree_rmse[0],tree_rmse[1],tree_rmse[2],tree_rmse[3])
          print('kruskal_oneway:',test_stat,anova_p)              
        else:
          test_stat,anova_p = f_oneway(tree_rmse[0],tree_rmse[1],tree_rmse[2],tree_rmse[3])
          print('f_oneway:',test_stat,anova_p)

        anova_data['trees'].iloc[num] = np.round(anova_p,3)    # 紀錄該參數的anova result
        best_parameter['trees'].iloc[num] = best_tree   # 紀錄該參數的最佳參數
        tree_result = [np.mean(tree_rmse[0]),np.mean(tree_rmse[1]),np.mean(tree_rmse[2]),np.mean(tree_rmse[3])]   # 紀錄每個參數之平均rmse

########################################################################################################################################

    elif param_name == 'sampling_sizes':
        sample_rmse = []
        best_rmse,best_para = 100,1000
        min_normality_p = 100
        for para in parameter:
            parm_set_2 = SRotF_kfold(tree=300,depth=20,max_sample=para,seed = 47)
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的參數
            if np.mean(parm_set_2) < best_rmse:
              best_para = para
              best_rmse = np.mean(parm_set_2)
            sample_rmse.append(parm_set_2)

            # 紀錄每個參數的常態檢定p-value
            bootstrap_size_data[str(para)].iloc[num] = normal_test(parm_set_2)

            if normal_test(parm_set_2) < min_normality_p:
              min_normality_p = normal_test(parm_set_2)
              
        if min_normality_p < 0.05 :
          test_stat,anova_p = kruskal(sample_rmse[0],sample_rmse[1],sample_rmse[2])
          print('kruskal_oneway:',test_stat,anova_p)              
        else:
          test_stat,anova_p = f_oneway(sample_rmse[0],sample_rmse[1],sample_rmse[2])
          print('f_oneway:',test_stat,anova_p)

        anova_data['bootstrap size'].iloc[num] = np.round(anova_p,3)

        best_parameter['bootstrap size'].iloc[num] = best_para   # 紀錄該參數的最佳參數
        bootstrap_result = [np.mean(sample_rmse[0]),np.mean(sample_rmse[1]),np.mean(sample_rmse[2])]  # 紀錄每個參數之平均rmse

########################################################################################################################################

    elif param_name == 'depths':
        depth_rmse = []
        best_rmse,best_para = 100,1000
        for para in parameter:
            parm_set_2 = SRotF_kfold(tree=300,depth=para,max_sample=0.7,seed = 47)
            print(para,normal_test(parm_set_2),parm_set_2)

            # 找出最好的參數
            if np.mean(parm_set_2) < best_rmse:
              best_para = para
              best_rmse = np.mean(parm_set_2)
            depth_rmse.append(parm_set_2)

            # 紀錄每個參數的常態檢定p-value
            depth_data[str(para)].iloc[num] = normal_test(parm_set_2)

        print(f_oneway(depth_rmse[0],depth_rmse[1],depth_rmse[2],depth_rmse[3],depth_rmse[4]))

        # 紀錄該參數的anova result
        test_stat,anova_p = f_oneway(depth_rmse[0],depth_rmse[1],depth_rmse[2],depth_rmse[3],depth_rmse[4])
        anova_data['max depth'].iloc[num] = np.round(anova_p,3)

        # 紀錄該參數的最佳參數
        best_parameter['max depth'].iloc[num] = best_para

        # 紀錄每個參數之平均rmse
        depth_result = [np.mean(depth_rmse[0]),np.mean(depth_rmse[1]),np.mean(depth_rmse[2]),np.mean(depth_rmse[3]),np.mean(depth_rmse[4])]
        
print(tree_result)
print(bootstrap_result)
print(depth_result)

100 0.135 [0.012910882451065938, 0.012749531661373431, 0.012602152156850628, 0.012594489803874507, 0.012752207739501455, 0.012013136066066795, 0.012946373683579728, 0.01285554125756261, 0.012632033123593641, 0.01225512713592549]
300 0.987 [0.012638772249958196, 0.012537010194047567, 0.012575923023320447, 0.012368537188935965, 0.012491168541601122, 0.011952905163644565, 0.012916317553755403, 0.012714505550306697, 0.012254712260330586, 0.012232288375531829]
500 0.635 [0.012568886991237819, 0.012523828966075824, 0.012564503297561477, 0.012302539234926751, 0.012491728907457186, 0.011900699075902082, 0.01295057585466183, 0.012792085005767112, 0.012459055846012911, 0.012026258810674414]
700 0.936 [0.012515575288255885, 0.012576428710340046, 0.012529319820194838, 0.012293866358180054, 0.012384162634664712, 0.011933946350771692, 0.01287314671889609, 0.012707334078303848, 0.012437914972545578, 0.012083016666835112]
f_oneway: 0.9529533137208577 0.4253887105471654
0.5 0.692 [0.013113199663131755,

In [12]:
depth_data.to_csv("depth.csv",index=False)
tree_data.to_csv("tree.csv",index=False)
max_feature_data.to_csv("max feature.csv",index=False)
bootstrap_size_data.to_csv("bootstrap size.csv",index=False)
best_parameter.to_csv("bast parameter.csv",index=False)
anova_data.to_csv("anova result.csv",index=False)