In [8]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

In [18]:
TEST_DATA_PATH = './dataset/test/'
HTP_LIST = ['house', 'tree', 'person']

In [19]:
label_mapper = {
		"house": {
				'door_yn':{'n':0, 'y':1},
				'loc':{'left':0, 'center':1, 'right':2},
				'roof_yn':{'y':1, 'n':0},
				'window_cnt':{'absence':0, '1 or 2':1, 'more than 3':2},
				'size':{'small':0, 'middle':1, 'big':2},
		},
        "tree": {
		    "branch_yn": {"n": 0, "y": 1},
		    "root_yn": {"n": 0, "y": 1},
		    "crown_yn": {"n": 0, "y": 1},
		    "fruit_yn": {"n": 0, "y": 1},
		    "gnarl_yn": {"n": 0, "y": 1},
		    "loc": {"left": 0, "center": 1, "right": 2},
		    "size": {"small": 0, "middle": 1, "big": 2},
		},
        "person": {
		    "eye_yn": {"n": 0, "y": 1},
		    "leg_yn": {"n": 0, "y": 1},
		    "loc": {"left": 0, "center": 1, "right": 2},
		    "mouth_yn": {"n": 0, "y": 1},
		    "size": {"small": 0, "middle": 1, "big": 2},
		    "arm_yn": {"n": 0, "y": 1},
		}
}

In [23]:
class LoadData:
    def load_test_data(path, htp_list):
        test_data_list = []

        for idx, htp in enumerate(htp_list):
            test_data = pd.read_csv(f'{path}dbi_{htp}.csv')
            test_data_list.append(test_data)
            test_data_list[idx] = test_data_list[idx].replace(label_mapper[htp]).iloc[:, 1:]

        return test_data_list

    def load_pred_data(htp_list):
        pred_data_list = []

        for htp in htp_list:
            pred_data = pd.read_csv(f'./ensemble_{htp}_output.csv')
            pred_data = pred_data.iloc[:, 1:]
            pred_data_list.append(pred_data)

        return pred_data_list

In [24]:
LoadData.load_test_data(TEST_DATA_PATH, HTP_LIST)

[     door_yn  loc  roof_yn  window_cnt  size
 0          1    1        0           1     1
 1          0    1        1           2     1
 2          1    1        1           0     1
 3          1    1        1           0     1
 4          0    1        0           2     2
 ..       ...  ...      ...         ...   ...
 995        1    1        1           2     0
 996        1    1        1           2     1
 997        0    1        1           1     2
 998        1    1        1           1     1
 999        1    1        1           1     1
 
 [1000 rows x 5 columns],
      branch_yn root_yn crown_yn fruit_yn gnarl_yn   loc  size
 0            0       0        1        1        0     0     1
 1            0       0        1        0        0     1     0
 2            0       0        1        0        0     1     0
 3            1       0        0        0        0     0     0
 4            0       0        1        1        1     1     2
 ...        ...     ...      ...      ... 

In [22]:
LoadData.load_pred_data(HTP_LIST)

[   door_yn  loc  roof_yn  window_cnt  size
 0        1    1        1           1     0
 1        1    1        1           1     1
 2        1    0        1           1     0
 3        0    1        1           1     0
 4        1    1        1           0     0
 5        0    1        1           2     0
 6        1    1        1           1     0
 7        1    1        1           1     1
 8        1    1        1           2     0
 9        1    1        1           1     0,
    branch_yn  root_yn  crown_yn  fruit_yn  gnarl_yn  loc  size
 0          0        1         1         0         0    1     0
 1          0        0         1         1         1    1     0
 2          1        1         1         0         1    1     0
 3          1        0         1         0         0    1     0
 4          0        0         1         0         0    1     0
 5          1        1         1         0         0    1     0
 6          0        0         1         0         0    1     0
 7 

In [8]:
accuracy_list = []
f1_list = []
for i in range(len(HTP_LIST)):
    accuracy_sum = 0
    f1_sum = 0
    print(f'{HTP_LIST[i]}')
    for idx, column in enumerate(LoadData.load_test_data(TEST_DATA_PATH, HTP_LIST)[i].columns):
        accuracy = accuracy_score(LoadData.load_test_data(TEST_DATA_PATH, HTP_LIST)[i][column], LoadData.load_pred_data(HTP_LIST)[i][column])
        f1 = f1_score(LoadData.load_test_data(TEST_DATA_PATH, HTP_LIST)[i][column], LoadData.load_pred_data(HTP_LIST)[i][column], average='macro')
        accuracy_sum = accuracy_sum + accuracy
        f1_sum = f1_sum + f1
        print(f"Feature: {column}")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score (Macro average): {f1:.4f}")
        print("-" * 40)

    accuracy_list.append(accuracy_sum / (idx + 1))
    f1_list.append(f1_sum / (idx + 1))

htp_accuracy_average = sum(accuracy_list) / len(accuracy_list)
htp_f1_average = sum(f1_list) / len(f1_list)
print(f'Total Accuracy Average:{htp_accuracy_average:.4f}')
print(f'Total F1 Score Average:{htp_f1_average:.4f}')

house
Feature: door_yn
Accuracy: 0.9000
F1 Score (Macro average): 0.8667
----------------------------------------
Feature: loc
Accuracy: 1.0000
F1 Score (Macro average): 1.0000
----------------------------------------
Feature: roof_yn
Accuracy: 1.0000
F1 Score (Macro average): 1.0000
----------------------------------------
Feature: window_cnt
Accuracy: 1.0000
F1 Score (Macro average): 1.0000
----------------------------------------
Feature: size
Accuracy: 0.5000
F1 Score (Macro average): 0.2564
----------------------------------------
tree
Feature: branch_yn
Accuracy: 0.8000
F1 Score (Macro average): 0.7917
----------------------------------------
Feature: root_yn
Accuracy: 0.9000
F1 Score (Macro average): 0.8901
----------------------------------------
Feature: crown_yn
Accuracy: 1.0000
F1 Score (Macro average): 1.0000
----------------------------------------
Feature: fruit_yn
Accuracy: 0.9000
F1 Score (Macro average): 0.8667
----------------------------------------
Feature: gnarl_yn

In [None]:
83 + 83 + 82

In [None]:
83 + 83 + 82
84 + 86 + 82
93 + 90 + 89