In [18]:
import pandas as pd
import numpy as np
import os
import random

In [19]:
data = pd.read_csv('../CA/Other_method/result.csv')

## Surrogate model
### data preprocessing

In [310]:
# data
data_agg = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0', 'y1', 'y2', 'y3']).groupby(['g_time', 'act', 'g_per']).mean().reset_index()
data_agg.head()

Unnamed: 0,g_time,act,g_per,rmse_Y3,rmse_Y1,rmse_Y2,rmse_Y3_test,rmse_Y1_test,rmse_Y2_test
0,30,0.05,0.1,6.748237,1.227077,1.823021,8.54045,1.854858,3.749179
1,30,0.05,0.2,4.289127,1.396292,2.22012,5.684172,1.170235,2.544679
2,30,0.05,0.3,6.057574,1.847569,2.22112,7.564624,2.307075,3.662435
3,30,0.05,0.4,5.006639,1.812709,2.542025,5.954443,1.582546,2.615103
4,30,0.05,0.5,3.923151,2.086422,2.97078,4.566449,1.628368,2.307886


### random forest

In [311]:
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor

def randomForest(data_agg, y_column):
    # feature와 target 설정
    X = data_agg[['g_time', 'act', 'g_per']]
    y = data_agg[y_column]

    # 하이퍼파라미터 그리드 설정
    param_grid = {
        'n_estimators': [10, 50, 100, 200],
        'max_depth': [3, 5, 7, 10],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    # GridSearchCV를 사용하여 하이퍼파라미터 최적화
    rf = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(rf, param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X, y)

    # 최적 하이퍼파라미터 출력
    print("Y: ", y_column, ", Best hyperparameters:", grid_search.best_params_)

    # 각 하이퍼파라미터 조합에 대한 교차 검증 점수 출력
    # means = grid_search.cv_results_['mean_test_score']
    # stds = grid_search.cv_results_['std_test_score']
    # for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    #     print(f"Mean RMSE: {mean:.3f} (±{std:.3f}) with: {params}")

    # 최적 하이퍼파라미터 조합으로 모델 학습 및 예측
    rf_best = RandomForestRegressor(**grid_search.best_params_, random_state=42)
    rf_best.fit(X, y)
    y_pred_train = rf_best.predict(X)

    # 예측 결과를 새로운 컬럼으로 추가
    data_agg[y_column + '_pred'] = y_pred_train

In [312]:
randomForest(data_agg, 'rmse_Y1')
randomForest(data_agg, 'rmse_Y2')
randomForest(data_agg, 'rmse_Y3')

Y:  rmse_Y1 , Best hyperparameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
Y:  rmse_Y2 , Best hyperparameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 100}
Y:  rmse_Y3 , Best hyperparameters: {'max_depth': 7, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 10}


### result

In [321]:
result_Y1_rd = data_agg[data_agg['rmse_Y1_pred'] <= 1.59]
result_Y2_rd = data_agg[data_agg['rmse_Y2_pred'] <= 2.22]
result_Y3_rd = data_agg[data_agg['rmse_Y3_pred'] <= 3.57]

In [322]:
# print all combinations in Y1,Y2,Y3
result_rd = pd.concat([result_Y1_rd, result_Y2_rd, result_Y3_rd], ignore_index = True)
result_rd.reset_index(drop = True, inplace = True)
result_rd['sum2'] = 1
print(len(result_rd))

465


In [323]:
# aggregate
result_rd = result_rd.groupby(['g_time', 'act', 'g_per']).agg({'sum2': 'sum'})
result_rd.reset_index(inplace = True)
result_rd_rmsesel = result_rd[result_rd['sum2'] > 2]
len(result_rd_rmsesel)

26

In [324]:
#result
result_rd_rmsesel.head()

Unnamed: 0,g_time,act,g_per,sum2
13,120,0.1,0.2,3
24,150,0.1,0.2,3
36,180,0.05,0.4,3
37,180,0.05,0.5,3
43,180,0.1,0.4,3


## accuracy

In [325]:
# data
# train, test data
Path = '../CA/Data/test_trainData/'

trainY1 = np.loadtxt(Path + "train_data_forY1_80.txt")
trainY2 = np.loadtxt(Path + "train_data_forY2_80.txt")
trainY3 = np.loadtxt(Path + "train_data_forY3_80.txt")

testY1 = np.loadtxt(Path + "test_data_forY1_20.txt")
testY2 = np.loadtxt(Path + "test_data_forY2_20.txt")
testY3 = np.loadtxt(Path + "test_data_forY3_20_2.txt")

In [326]:
data_for_accuracy_rd = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0','y1', 'y2', 'y3']).reset_index()

In [327]:
#결과와 일치하는 조합 찾기.
unique_combinations = result_rd_rmsesel[['g_time', 'act', 'g_per']].drop_duplicates()

# data_for_accuracy_rd에서 필터링하기
filtered_data_for_accuracy_rd = data_for_accuracy_rd.merge(unique_combinations, on=['g_time', 'act', 'g_per'], how='inner')

In [328]:
# train accuracy 구하기
filtered_data_for_accuracy_rd['all_sum_train'] = 1 #전체 

# rmsesel 만족하는 애들만
filtered_data_for_accuracy_rd['calibrated_Y1'] = np.where(filtered_data_for_accuracy_rd['rmse_Y1'] <= 1.59, 1, 0)
filtered_data_for_accuracy_rd['calibrated_Y2'] = np.where(filtered_data_for_accuracy_rd['rmse_Y2'] <= 2.22, 1, 0)
filtered_data_for_accuracy_rd['calibrated_Y3'] = np.where(filtered_data_for_accuracy_rd['rmse_Y3'] <= 3.57, 1, 0)
filtered_data_for_accuracy_rd

#aggregate
filtered_data_for_accuracy_rd_result = filtered_data_for_accuracy_rd.groupby(['g_time', 'act', 'g_per']).agg({'all_sum_train': 'sum', 
                                                                                                              'calibrated_Y1': 'sum', 'calibrated_Y2': 'sum', 'calibrated_Y3': 'sum'})
                                                                                                              
# Uncertainty degree                                                                                                              
filtered_data_for_accuracy_rd_result.reset_index(inplace = True)
filtered_data_for_accuracy_rd_result['uncertainty_degree_Y1'] = 1 - (filtered_data_for_accuracy_rd_result['calibrated_Y1']/filtered_data_for_accuracy_rd_result['all_sum_train'] )
filtered_data_for_accuracy_rd_result['uncertainty_degree_Y2'] = 1 - (filtered_data_for_accuracy_rd_result['calibrated_Y2']/filtered_data_for_accuracy_rd_result['all_sum_train'] )
filtered_data_for_accuracy_rd_result['uncertainty_degree_Y3'] = 1 - (filtered_data_for_accuracy_rd_result['calibrated_Y3']/filtered_data_for_accuracy_rd_result['all_sum_train'] )

#Accuracy
def accuracy(df, column):
    accuracy = len(df[df['uncertainty_degree_' + column]<= 0.6])/ len(df)
    return accuracy
    
accuracy_Y1 = accuracy(filtered_data_for_accuracy_rd_result, 'Y1')
accuracy_Y2 = accuracy(filtered_data_for_accuracy_rd_result, 'Y2')
accuracy_Y3 = accuracy(filtered_data_for_accuracy_rd_result, 'Y3')

print("train accuracy for Y1: ", accuracy_Y1)
print("train accuracy for Y2: ", accuracy_Y2)
print("train accuracy for Y3: ", accuracy_Y3)

train accuracy for Y1:  0.9230769230769231
train accuracy for Y2:  0.7307692307692307
train accuracy for Y3:  0.7692307692307693


In [331]:
# test accuracy 구하기
filtered_data_for_accuracy_rd['all_sum_test'] = 1 #전체 

# rmsesel 만족하는 애들만
filtered_data_for_accuracy_rd['calibrated_Y1_test'] = np.where(filtered_data_for_accuracy_rd['rmse_Y1_test'] <= 1.59, 1, 0)
filtered_data_for_accuracy_rd['calibrated_Y2_test'] = np.where(filtered_data_for_accuracy_rd['rmse_Y2_test'] <= 2.22, 1, 0)
filtered_data_for_accuracy_rd['calibrated_Y3_test'] = np.where(filtered_data_for_accuracy_rd['rmse_Y3_test'] <= 3.57, 1, 0)

#aggregate
filtered_data_for_accuracy_rd_result = filtered_data_for_accuracy_rd.groupby(['g_time', 'act', 'g_per']).agg({'all_sum_test': 'sum', 
                                                                                                              'calibrated_Y1_test': 'sum', 'calibrated_Y2_test': 'sum', 'calibrated_Y3_test': 'sum'})
                                                                                                              
# Uncertainty degree                                                                                                              
filtered_data_for_accuracy_rd_result.reset_index(inplace = True)
filtered_data_for_accuracy_rd_result['uncertainty_degree_Y1'] = 1 - (filtered_data_for_accuracy_rd_result['calibrated_Y1_test']/filtered_data_for_accuracy_rd_result['all_sum_test'] )
filtered_data_for_accuracy_rd_result['uncertainty_degree_Y2'] = 1 - (filtered_data_for_accuracy_rd_result['calibrated_Y2_test']/filtered_data_for_accuracy_rd_result['all_sum_test'] )
filtered_data_for_accuracy_rd_result['uncertainty_degree_Y3'] = 1 - (filtered_data_for_accuracy_rd_result['calibrated_Y3_test']/filtered_data_for_accuracy_rd_result['all_sum_test'] )

#Accuracy
def accuracy(df, column):
    accuracy = len(df[df['uncertainty_degree_' + column]<= 0.6])/ len(df)
    return accuracy
    
accuracy_Y1 = accuracy(filtered_data_for_accuracy_rd_result, 'Y1')
accuracy_Y2 = accuracy(filtered_data_for_accuracy_rd_result, 'Y2')
accuracy_Y3 = accuracy(filtered_data_for_accuracy_rd_result, 'Y3')

print("test accuracy for Y1: ", accuracy_Y1)
print("test accuracy for Y2: ", accuracy_Y2)
print("test accuracy for Y3: ", accuracy_Y3)

test accuracy for Y1:  1.0
test accuracy for Y2:  0.5
test accuracy for Y3:  0.6923076923076923


## Ridge Regression

In [163]:
data = pd.read_csv('../CA/Other_method/result.csv')
data_agg = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0', 'y1', 'y2', 'y3']).groupby(['g_time', 'act', 'g_per']).mean().reset_index()
data_agg.head()

Unnamed: 0,g_time,act,g_per,rmse_Y3,rmse_Y1,rmse_Y2,rmse_Y3_test,rmse_Y1_test,rmse_Y2_test
0,30,0.05,0.1,6.748237,1.227077,1.823021,8.54045,1.854858,3.749179
1,30,0.05,0.2,4.289127,1.396292,2.22012,5.684172,1.170235,2.544679
2,30,0.05,0.3,6.057574,1.847569,2.22112,7.564624,2.307075,3.662435
3,30,0.05,0.4,5.006639,1.812709,2.542025,5.954443,1.582546,2.615103
4,30,0.05,0.5,3.923151,2.086422,2.97078,4.566449,1.628368,2.307886


In [332]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

def ridgeRegression(data_agg, y_column):
    # X와 Y를 지정합니다.
    X = data_agg[['g_time', 'act', 'g_per']]
    Y = data_agg[y_column]

    # ridge regression 모델 객체를 생성합니다.
    ridge = Ridge()

    # hyperparameter 후보군을 지정합니다.
    parameters = {'alpha': [0.1, 1, 10]}

    # 5-fold cross validation을 수행하여 최적의 hyperparameter를 찾습니다.
    grid_search = GridSearchCV(ridge, parameters, cv=5)
    grid_search.fit(X, Y)
    best_alpha = grid_search.best_params_['alpha']
    print("Y: ", y_column, 'Best alpha:', best_alpha)

    # 최적의 hyperparameter로 ridge regression 모델을 학습합니다.
    model = Ridge(alpha=best_alpha)
    model.fit(X, Y)

    # 학습 데이터에 대한 예측 결과를 새로운 컬럼으로 추가합니다.
    data_agg[y_column + '_pred'] = model.predict(X)

In [333]:
ridgeRegression(data_agg, 'rmse_Y1')
ridgeRegression(data_agg, 'rmse_Y2')
ridgeRegression(data_agg, 'rmse_Y3')

Y:  rmse_Y1 Best alpha: 0.1
Y:  rmse_Y2 Best alpha: 0.1
Y:  rmse_Y3 Best alpha: 10


### result

In [345]:
result_Y1_rr = data_agg[data_agg['rmse_Y1_pred'] <= 1.59]
result_Y2_rr = data_agg[data_agg['rmse_Y2_pred'] <= 2.22]
result_Y3_rr = data_agg[data_agg['rmse_Y3_pred'] <= 3.57]

In [346]:
# Y1,Y2,Y3에 모두 있는 조합들을 출력
result_rr = pd.concat([result_Y1_rr, result_Y2_rr, result_Y3_rr], ignore_index = True)
result_rr.reset_index(drop = True, inplace = True)
result_rr['sum2'] = 1
print(len(result_rr))

207


In [347]:
# aggregate
result_rr = result_rr.groupby(['g_time', 'act', 'g_per']).agg({'sum2': 'sum'})
result_rr.reset_index(inplace = True)
result_rr_rmsesel = result_rr[result_rr['sum2'] > 2]
len(result_rr_rmsesel)

37

In [348]:
result_rr_rmsesel.head()

Unnamed: 0,g_time,act,g_per,sum2
11,240,0.05,0.1,3
12,240,0.05,0.2,3
13,240,0.05,0.3,3
14,240,0.05,0.4,3
31,270,0.05,0.1,3


## accuracy

In [349]:
# data
# train, test data
Path = '../CA/Data/test_trainData/'

trainY1 = np.loadtxt(Path + "train_data_forY1_80.txt")
trainY2 = np.loadtxt(Path + "train_data_forY2_80.txt")
trainY3 = np.loadtxt(Path + "train_data_forY3_80.txt")

testY1 = np.loadtxt(Path + "test_data_forY1_20.txt")
testY2 = np.loadtxt(Path + "test_data_forY2_20.txt")
testY3 = np.loadtxt(Path + "test_data_forY3_20_2.txt")

In [350]:
data_for_accuracy_rr = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0','y1', 'y2', 'y3']).reset_index()

In [351]:
#결과와 일치하는 조합 찾기.
unique_combinations = result_rr_rmsesel[['g_time', 'act', 'g_per']].drop_duplicates()

# data_for_accuracy_rd에서 필터링하기
filtered_data_for_accuracy_rr = data_for_accuracy_rr.merge(unique_combinations, on=['g_time', 'act', 'g_per'], how='inner')

In [354]:
# train accuracy 구하기
filtered_data_for_accuracy_rr['all_sum_train'] = 1 #전체 

# rmsesel 만족하는 애들만
filtered_data_for_accuracy_rr['calibrated_Y1'] = np.where(filtered_data_for_accuracy_rr['rmse_Y1'] <= 1.59, 1, 0)
filtered_data_for_accuracy_rr['calibrated_Y2'] = np.where(filtered_data_for_accuracy_rr['rmse_Y2'] <= 2.22, 1, 0)
filtered_data_for_accuracy_rr['calibrated_Y3'] = np.where(filtered_data_for_accuracy_rr['rmse_Y3'] <= 3.57, 1, 0)
filtered_data_for_accuracy_rr

#aggregate
filtered_data_for_accuracy_rr_result = filtered_data_for_accuracy_rr.groupby(['g_time', 'act', 'g_per']).agg({'all_sum_train': 'sum', 
                                                                                                              'calibrated_Y1': 'sum', 'calibrated_Y2': 'sum', 'calibrated_Y3': 'sum'})
                                                                                                              
# Uncertainty degree                                                                                                              
filtered_data_for_accuracy_rr_result.reset_index(inplace = True)
filtered_data_for_accuracy_rr_result['uncertainty_degree_Y1'] = 1 - (filtered_data_for_accuracy_rr_result['calibrated_Y1']/filtered_data_for_accuracy_rr_result['all_sum_train'] )
filtered_data_for_accuracy_rr_result['uncertainty_degree_Y2'] = 1 - (filtered_data_for_accuracy_rr_result['calibrated_Y2']/filtered_data_for_accuracy_rr_result['all_sum_train'] )
filtered_data_for_accuracy_rr_result['uncertainty_degree_Y3'] = 1 - (filtered_data_for_accuracy_rr_result['calibrated_Y3']/filtered_data_for_accuracy_rr_result['all_sum_train'] )

#Accuracy
def accuracy(df, column):
    accuracy = len(df[df['uncertainty_degree_' + column]<= 0.6])/ len(df)
    return accuracy
    
accuracy_Y1 = accuracy(filtered_data_for_accuracy_rr_result, 'Y1')
accuracy_Y2 = accuracy(filtered_data_for_accuracy_rr_result, 'Y2')
accuracy_Y3 = accuracy(filtered_data_for_accuracy_rr_result, 'Y3')

print("train accuracy for Y1: ", accuracy_Y1)
print("train accuracy for Y2: ", accuracy_Y2)
print("train accuracy for Y3: ", accuracy_Y3)

train accuracy for Y1:  0.8648648648648649
train accuracy for Y2:  0.7837837837837838
train accuracy for Y3:  0.4594594594594595


In [356]:
# test accuracy 구하기
filtered_data_for_accuracy_rr['all_sum_test'] = 1 #전체 

# rmsesel 만족하는 애들만
filtered_data_for_accuracy_rr['calibrated_Y1_test'] = np.where(filtered_data_for_accuracy_rr['rmse_Y1_test'] <= 1.59, 1, 0)
filtered_data_for_accuracy_rr['calibrated_Y2_test'] = np.where(filtered_data_for_accuracy_rr['rmse_Y2_test'] <= 2.22, 1, 0)
filtered_data_for_accuracy_rr['calibrated_Y3_test'] = np.where(filtered_data_for_accuracy_rr['rmse_Y3_test'] <= 3.57, 1, 0)

#aggregate
filtered_data_for_accuracy_rr_result = filtered_data_for_accuracy_rr.groupby(['g_time', 'act', 'g_per']).agg({'all_sum_test': 'sum', 
                                                                                                              'calibrated_Y1_test': 'sum', 'calibrated_Y2_test': 'sum', 'calibrated_Y3_test': 'sum'})
                                                                                                              
# Uncertainty degree                                                                                                              
filtered_data_for_accuracy_rr_result.reset_index(inplace = True)
filtered_data_for_accuracy_rr_result['uncertainty_degree_Y1'] = 1 - (filtered_data_for_accuracy_rr_result['calibrated_Y1_test']/filtered_data_for_accuracy_rr_result['all_sum_test'] )
filtered_data_for_accuracy_rr_result['uncertainty_degree_Y2'] = 1 - (filtered_data_for_accuracy_rr_result['calibrated_Y2_test']/filtered_data_for_accuracy_rr_result['all_sum_test'] )
filtered_data_for_accuracy_rr_result['uncertainty_degree_Y3'] = 1 - (filtered_data_for_accuracy_rr_result['calibrated_Y3_test']/filtered_data_for_accuracy_rr_result['all_sum_test'] )

#Accuracy
def accuracy(df, column):
    accuracy = len(df[df['uncertainty_degree_' + column]<= 0.6])/ len(df)
    return accuracy
    
accuracy_Y1 = accuracy(filtered_data_for_accuracy_rr_result, 'Y1')
accuracy_Y2 = accuracy(filtered_data_for_accuracy_rr_result, 'Y2')
accuracy_Y3 = accuracy(filtered_data_for_accuracy_rr_result, 'Y3')

print("test accuracy for Y1: ", accuracy_Y1)
print("test accuracy for Y2: ", accuracy_Y2)
print("test accuracy for Y3: ", accuracy_Y3)

test accuracy for Y1:  0.918918918918919
test accuracy for Y2:  0.3783783783783784
test accuracy for Y3:  0.3783783783783784


## Traditional Parameter space searching

In [198]:
data = pd.read_csv('../CA/Other_method/result_all.csv')
data_agg = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0', 'y1', 'y2', 'y3']).groupby(['g_time', 'act', 'g_per']).mean().reset_index()
data_agg.head()

Unnamed: 0,g_time,act,g_per,rmse_Y3,rmse_Y1,rmse_Y2,rmse_Y3_test,rmse_Y1_test,rmse_Y2_test
0,30,0.05,0.1,6.748237,1.227077,1.823021,8.54045,1.854858,3.749179
1,30,0.05,0.2,4.289127,1.396292,2.22012,5.684172,1.170235,2.544679
2,30,0.05,0.3,6.057574,1.847569,2.22112,7.564624,2.307075,3.662435
3,30,0.05,0.4,5.006639,1.812709,2.542025,5.954443,1.582546,2.615103
4,30,0.05,0.5,3.923151,2.086422,2.97078,4.566449,1.628368,2.307886


### result

In [216]:
result_Y1_tr = data_agg[data_agg['rmse_Y1'] <= 1.59]
result_Y2_tr = data_agg[data_agg['rmse_Y2'] <= 2.22]
result_Y3_tr = data_agg[data_agg['rmse_Y3'] <= 3.57]

In [217]:
# Y1,Y2,Y3에 모두 있는 조합들을 출력
result_tr = pd.concat([result_Y1_tr, result_Y2_tr, result_Y3_tr], ignore_index = True)
result_tr.reset_index(drop = True, inplace = True)
result_tr['sum2'] = 1
print(len(result_tr))

390


In [219]:
# aggregate
result_tr = result_tr.groupby(['g_time', 'act', 'g_per']).agg({'sum2': 'sum'})
result_tr.reset_index(inplace = True)
result_tr_rmsesel = result_tr[result_tr['sum2'] > 2]
len(result_tr_rmsesel)

10

## accuracy

In [222]:
# data
# train, test data
Path = '../CA/Data/test_trainData/'

trainY1 = np.loadtxt(Path + "train_data_forY1_80.txt")
trainY2 = np.loadtxt(Path + "train_data_forY2_80.txt")
trainY3 = np.loadtxt(Path + "train_data_forY3_80.txt")

testY1 = np.loadtxt(Path + "test_data_forY1_20.txt")
testY2 = np.loadtxt(Path + "test_data_forY2_20.txt")
testY3 = np.loadtxt(Path + "test_data_forY3_20_2.txt")

In [223]:
data_for_accuracy_tr = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0','y1', 'y2', 'y3']).reset_index()

In [225]:
#결과와 일치하는 조합 찾기.
unique_combinations = result_tr_rmsesel[['g_time', 'act', 'g_per']].drop_duplicates()

# data_for_accuracy_rd에서 필터링하기
filtered_data_for_accuracy_tr = data_for_accuracy_tr.merge(unique_combinations, on=['g_time', 'act', 'g_per'], how='inner')

In [238]:
# train accuracy 구하기
filtered_data_for_accuracy_tr['all_sum_train'] = 1 #전체 

# rmsesel 만족하는 애들만
filtered_data_for_accuracy_tr['calibrated_Y1'] = np.where(filtered_data_for_accuracy_tr['rmse_Y1'] <= 1.59, 1, 0)
filtered_data_for_accuracy_tr['calibrated_Y2'] = np.where(filtered_data_for_accuracy_tr['rmse_Y2'] <= 2.22, 1, 0)
filtered_data_for_accuracy_tr['calibrated_Y3'] = np.where(filtered_data_for_accuracy_tr['rmse_Y3'] <= 3.57, 1, 0)
filtered_data_for_accuracy_tr

#aggregate
filtered_data_for_accuracy_tr_result = filtered_data_for_accuracy_tr.groupby(['g_time', 'act', 'g_per']).agg({'all_sum_train': 'sum', 
                                                                                                              'calibrated_Y1': 'sum', 'calibrated_Y2': 'sum', 'calibrated_Y3': 'sum'})
                                                                                                              
# Uncertainty degree                                                                                                              
filtered_data_for_accuracy_tr_result.reset_index(inplace = True)
filtered_data_for_accuracy_tr_result['uncertainty_degree_Y1'] = 1 - (filtered_data_for_accuracy_tr_result['calibrated_Y1']/filtered_data_for_accuracy_tr_result['all_sum_train'] )
filtered_data_for_accuracy_tr_result['uncertainty_degree_Y2'] = 1 - (filtered_data_for_accuracy_tr_result['calibrated_Y2']/filtered_data_for_accuracy_tr_result['all_sum_train'] )
filtered_data_for_accuracy_tr_result['uncertainty_degree_Y3'] = 1 - (filtered_data_for_accuracy_tr_result['calibrated_Y3']/filtered_data_for_accuracy_tr_result['all_sum_train'] )

#Accuracy
def accuracy(df, column):
    accuracy = len(df[df['uncertainty_degree_' + column]<= 0.7])/ len(df)
    return accuracy
    
accuracy_Y1 = accuracy(filtered_data_for_accuracy_tr_result, 'Y1')
accuracy_Y2 = accuracy(filtered_data_for_accuracy_tr_result, 'Y2')
accuracy_Y3 = accuracy(filtered_data_for_accuracy_tr_result, 'Y3')

print("train accuracy for Y1: ", accuracy_Y1)
print("train accuracy for Y2: ", accuracy_Y2)
print("train accuracy for Y3: ", accuracy_Y3)

train accuracy for Y1:  1.0
train accuracy for Y2:  0.9
train accuracy for Y3:  0.8


In [255]:
# test accuracy 구하기
filtered_data_for_accuracy_tr['all_sum_test'] = 1 #전체 

# rmsesel 만족하는 애들만
filtered_data_for_accuracy_tr['calibrated_Y1_test'] = np.where(filtered_data_for_accuracy_tr['rmse_Y1_test'] <= 1.59, 1, 0)
filtered_data_for_accuracy_tr['calibrated_Y2_test'] = np.where(filtered_data_for_accuracy_tr['rmse_Y2_test'] <= 2.22, 1, 0)
filtered_data_for_accuracy_tr['calibrated_Y3_test'] = np.where(filtered_data_for_accuracy_tr['rmse_Y3_test'] <= 3.57, 1, 0)

#aggregate
filtered_data_for_accuracy_tr_result = filtered_data_for_accuracy_tr.groupby(['g_time', 'act', 'g_per']).agg({'all_sum_test': 'sum', 
                                                                                                              'calibrated_Y1_test': 'sum', 'calibrated_Y2_test': 'sum', 'calibrated_Y3_test': 'sum'})
                                                                                                              
# Uncertainty degree                                                                                                              
filtered_data_for_accuracy_tr_result.reset_index(inplace = True)
filtered_data_for_accuracy_tr_result['uncertainty_degree_Y1'] = 1 - (filtered_data_for_accuracy_tr_result['calibrated_Y1_test']/filtered_data_for_accuracy_tr_result['all_sum_test'] )
filtered_data_for_accuracy_tr_result['uncertainty_degree_Y2'] = 1 - (filtered_data_for_accuracy_tr_result['calibrated_Y2_test']/filtered_data_for_accuracy_tr_result['all_sum_test'] )
filtered_data_for_accuracy_tr_result['uncertainty_degree_Y3'] = 1 - (filtered_data_for_accuracy_tr_result['calibrated_Y3_test']/filtered_data_for_accuracy_tr_result['all_sum_test'] )

#Accuracy
def accuracy(df, column):
    accuracy = len(df[df['uncertainty_degree_' + column]<= 0.55])/ len(df)
    return accuracy
    
accuracy_Y1 = accuracy(filtered_data_for_accuracy_tr_result, 'Y1')
accuracy_Y2 = accuracy(filtered_data_for_accuracy_tr_result, 'Y2')
accuracy_Y3 = accuracy(filtered_data_for_accuracy_tr_result, 'Y3')

print("test accuracy for Y1: ", accuracy_Y1)
print("test accuracy for Y2: ", accuracy_Y2)
print("test accuracy for Y3: ", accuracy_Y3)

test accuracy for Y1:  1.0
test accuracy for Y2:  0.7
test accuracy for Y3:  0.6


In [249]:
filtered_data_for_accuracy_tr_result

Unnamed: 0,g_time,act,g_per,all_sum_test,calibrated_Y1_test,calibrated_Y2_test,calibrated_Y3_test,uncertainty_degree_Y1,uncertainty_degree_Y2,uncertainty_degree_Y3
0,30,0.1,0.3,1,1,0,0,0.0,1.0,1.0
1,90,0.1,0.1,4,4,3,3,0.0,0.25,0.25
2,150,0.05,0.3,3,3,2,3,0.0,0.333333,0.0
3,150,0.15,0.1,5,5,1,2,0.0,0.8,0.6
4,210,0.05,0.6,14,14,13,6,0.0,0.071429,0.571429
5,240,0.1,0.5,25,23,11,8,0.08,0.56,0.68
6,270,0.05,0.6,24,24,18,12,0.0,0.25,0.5
7,270,0.1,0.4,11,10,5,4,0.090909,0.545455,0.636364
8,270,0.1,0.5,28,26,18,15,0.071429,0.357143,0.464286
9,300,0.1,0.6,20,20,13,8,0.0,0.35,0.6
