작성자: 서승아

마지막 수정일: 2023.02.21.

목적: 석사 졸업 논문 코드 공유

Prepare the environment

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
import re

import pandas as pd
from IPython.display import display

Define functions

In [5]:
def ListElementSubtypes(lines):
    elem_subtypes = []                          # Cluster lines by element
    subset = []

    # for i, line in enumerate(lines[0:10]):
    for i, line in enumerate(lines):
        if i == 0: 
            subset.append(line)

        elif line.startswith('Revit'):
            elem_subtypes.append(subset)        # Add completed subset to elem_subtypes
            subset = [line]                     # Start new subset

        else:
            subset.append(line)

    elem_subtypes.append(subset)
    
    return elem_subtypes


def CalCorrectAnswer(elem_subtypes, df_index):
    total_elem = {'Wall': 0, 'Slab': 0, 'Total':0}
    total_typ, correct_typ = {}, {}

    for key in df_index:
        total_typ[key] = 0
        correct_typ[key] = 0

    for elem_subtype in elem_subtypes:
        code = elem_subtype[0].split('#')[1].split('_')[1].split(':')[0].split('-')[0].split('(')[0].strip() # ABETW, ACOW
        
        if re.match('[가-힣]', code[0]) != None:
            code = elem_subtype[0].split('#')[1].split('_')[2].split(':')[0].split('-')[0].split('(')[0].strip()
    
        gr_truth = DetermineGroundTruth(code)

        if len(gr_truth) != 0:
            # Count all elements
            elem_key = {'l':'Wall', 'b':'Slab', 's': 'Wall'}
            total_elem['Total'] += 1
            total_elem[elem_key[gr_truth[0][-1]]] += 1
            
            for val in gr_truth:
                # Count all types
                total_typ[val] += 1
                total_typ['Total'] += 1
                # print(val)
                
                # Count only correct answersid
                if val in elem_subtype:
                    correct_typ[val] += 1
                    correct_typ['Total'] += 1

    return total_elem, total_typ, correct_typ


def DetermineGroundTruth(code):
    elem_type = code[-1]
    elem_subtype = code[:-1]
    result = []
    
    # Element type
    if elem_type == 'W':
        # Element subtype
        if 'OUT' in elem_subtype or 'BS' in elem_subtype or 'FT' in elem_subtype or 'TT' in elem_subtype or 'SID' in elem_subtype:
            result.extend(['External_Wall', 'Generic_Wall'])
        elif 'LI' in elem_subtype or 'LE' in elem_subtype or 'LB' in elem_subtype:
            result.extend(['External_Wall', 'Wall_with_Openings'])
        elif 'CI' in elem_subtype or 'FH' in elem_subtype or 'FV' in elem_subtype or 'TH' in elem_subtype or 'TV' in elem_subtype or 'TIN' in elem_subtype or 'HOR' in elem_subtype or 'VER' in elem_subtype:
            result.extend(['Internal_Wall'])

    elif elem_type == 'H':
        if 'PA' in elem_subtype:
            result.extend(['External_Wall'])
        elif 'LI' in elem_subtype or 'LE' in elem_subtype or 'LB' in elem_subtype:
            result.extend(['External_Wall', 'Wall_with_Openings'])

    elif elem_type == 'B':
        if 'LI' in elem_subtype or 'LE' in elem_subtype or 'LB' in elem_subtype:
            result.extend(['External_Wall', 'Wall_with_Openings'])

    elif elem_type == 'A':
        if 'AP' in elem_subtype:
            result.extend(['External_Wall'])
    
    elif elem_type in [str(i) for i in range(10)]:
        result.extend(['External_Wall', 'Wall_with_Openings'])


    elif elem_type == 'S':
        if 'ER' in elem_subtype:
            result.extend(['Sloped Slab'])
        else:
            result.extend(['Flat Slab'])

        if 'TO' in elem_subtype:
            result.extend(['Toilet Slab'])
        else:
            result.extend(['Generic Slab'])

    return result


def CalByModel(model_name, df_index):
    int_line = []
    for xls_n in model_name:
        # Get full file name
        ## Change r_path
        r_path = r'Write ROOT Directory of GH output text files'
        f_name = f'total_process_{xls_n:03d}.txt'
        file = os.path.join(r_path, f_name)

        # Merge all info in files into int_line
        with open(file, 'r', encoding='utf-8') as f:
            lines = f.read().splitlines()
        int_line.extend(lines)

    # Calculate accuracy for the model
    elem_subtypes = ListElementSubtypes(int_line)
    total_elem, total_typ, correct_typ = CalCorrectAnswer(elem_subtypes, df_index)

    return list(total_elem.items()), list(total_typ.items()), list(correct_typ.items())


def CalFinalAccuracy(e_vision, e_milyang, e_seosan, df_index, df_col):
    # Define dataframe for data organizing
    df_elem = pd.DataFrame(0, index=['Wall', 'Slab', 'Total'], columns=df_col)
    df_typ = pd.DataFrame(0, index=df_index, columns=df_col)
    df_correct = pd.DataFrame(0, index=df_index, columns=df_col)
    df_acc = pd.DataFrame(0, index=df_index, columns=df_col)

    # Fill df by model
    for i, model_name in enumerate([e_vision, e_milyang, e_seosan]):
        total_elem, total_typ, correct_typ = CalByModel(model_name, df_index)
        for item in total_elem:
            df_elem.at[item[0],df_col[i]] = item[1]
        for item in total_typ:
            df_typ.at[item[0],df_col[i]] = item[1]
        for item in correct_typ:
            df_correct.at[item[0],df_col[i]] = item[1]

    # Calculate sum of df_row
    df_elem['total'] = df_elem.sum(axis=1)
    df_typ['total'] = df_typ.sum(axis=1)
    df_correct['total'] = df_correct.sum(axis=1)

    # Calculate final accuracy
    for i in df_index:
        for col in df_col:
            answer = df_correct.loc[i][col]
            total = df_typ.loc[i][col]
            if total != 0:
                df_acc.at[i, col] = round(answer/total*100, 1)
            else:
                df_acc.at[i, col] = 0

    return df_elem, df_typ, df_correct, df_acc

Define input values

In [3]:
# Corresponding output .txt file numbers for each model
## Change numbers
model_2 = [1,]
model_3 = [2,]
model_4 = [3,]

# Define index and column to create dataframe
df_index = ['External_Wall', 'Internal_Wall', 'Generic_Wall', 'Wall_with_Openings',\
    'Sloped Slab', 'Flat Slab', 'Toilet Slab', 'Generic Slab', 'Total']
df_col = ['model_2', 'model_3', 'model_4', 'total']


Calculate final accuracy

In [6]:
# Calculate overall accuracy for dl
df_elem, df_typ, df_correct, df_acc = CalFinalAccuracy(model_2, model_3, model_4, df_index, df_col)

# Print results
print('all elements')
display(df_elem)
print('\nall types')
display(df_typ)
print('\ncorrect types')
display(df_correct)
print('\nfinal accuracy')
display(df_acc)

all elements


Unnamed: 0,model_2,model_3,model_4,total
Wall,2594,4511,2113,9218
Slab,352,520,267,1139
Total,2946,5031,2380,10357



all types


Unnamed: 0,model_2,model_3,model_4,total
External_Wall,2265,3959,1786,8010
Internal_Wall,329,552,327,1208
Generic_Wall,1330,2541,1082,4953
Wall_with_Openings,901,1354,652,2907
Sloped Slab,0,0,0,0
Flat Slab,352,520,267,1139
Toilet Slab,196,286,146,628
Generic Slab,156,234,121,511
Total,5529,9446,4381,19356



correct types


Unnamed: 0,model_2,model_3,model_4,total
External_Wall,2222,3840,1670,7732
Internal_Wall,329,545,293,1167
Generic_Wall,1289,2389,1005,4683
Wall_with_Openings,876,1351,624,2851
Sloped Slab,0,0,0,0
Flat Slab,352,520,267,1139
Toilet Slab,196,284,146,626
Generic Slab,154,227,121,502
Total,5418,9156,4126,18700



final accuracy


Unnamed: 0,model_2,model_3,model_4,total
External_Wall,98.1,97.0,93.5,96.5
Internal_Wall,100.0,98.7,89.6,96.6
Generic_Wall,96.9,94.0,92.9,94.5
Wall_with_Openings,97.2,99.8,95.7,98.1
Sloped Slab,0.0,0.0,0.0,0.0
Flat Slab,100.0,100.0,100.0,100.0
Toilet Slab,100.0,99.3,100.0,99.7
Generic Slab,98.7,97.0,100.0,98.2
Total,98.0,96.9,94.2,96.6
