In [1]:
import os
import glob

from functools import partial

import pandas as pd

In [2]:
image_types = ['png', 'jpg']
data_dirs = ['OPG Film Set 1', 'OPG Film Set 2', 'OPG Film Set 3']
classes = []
with open('dataset/classes.txt') as fd:
    for line in fd:
        classes.append(line.strip())
print(classes)

['Lt. Condyle', 'Rt. Condyle', 'Lt. Ramus-Angle', 'Rt. Ramus-Angle', 'Lt. Body', 'Rt. Body', 'Lt. Coronoid', 'Rt. Coronoid', 'Symphysis-Parasymphysis']


In [39]:
def fix_bilateral(fxs):
    new_fxs = []
    for fx in fxs:
        if fx.startswith('bilateral'):
            f = fx.split(' ')[-1]
            new_fxs.append(f'rt. {f}')
        else:
            new_fxs.append(fx)
    return new_fxs

def transform_fxs(fxs, classes):
    labels = [0 for __ in range(len(classes))]
    fxs = fix_bilateral(fxs)
    for fx in fxs:
        if 'subcondyle' in fx:
            fx = fx.replace('subcondyle', 'condyle')
        if 'sym' in fx and (fx.startswith('rt.') or fx.startswith('lt.')):
            fx = fx[4:]
        for i, c in enumerate(classes):
            for tfx, tc in zip(fx.split(' '), c.split(' ')):
                if not tfx in tc.lower():
                    break
            else:
                labels[i] = 1
                break
        else:
            if fx != 'normal' and not 'alveolar' in fx:
                print('Error: No mapping for', fx)
    return labels

In [43]:
data = []
for data_dir in data_dirs:
    d = partial(os.path.join, 'dataset', data_dir)
    labels = pd.read_csv(d('labels.csv'))

    for i, row in labels.iterrows():
        if 'Post op' in row and row['Post op'] == 'yes':
            continue
        fx = []
        if 'Diagnosis' in row:
            for j in range(9):
                c = f'Diagnosis{j}'
                if c in row and not isinstance(row[c], float):
                    fx.append(row[c].lower())
        elif 'diagnose' in row:
            fx = row['diagnose']
            fx = [f.strip().lower() for f in fx.split('+')]
        fx = transform_fxs(fx, classes)
        fx.insert(0, os.path.join(data_dir, row['image']))
        data.append(fx)



In [45]:
df = pd.DataFrame(data, columns=['image', *classes])

In [46]:
df

Unnamed: 0,image,Lt. Condyle,Rt. Condyle,Lt. Ramus-Angle,Rt. Ramus-Angle,Lt. Body,Rt. Body,Lt. Coronoid,Rt. Coronoid,Symphysis-Parasymphysis
0,OPG Film Set 1/case1.jpg,0,0,0,0,0,0,0,0,0
1,OPG Film Set 1/case2.jpg,0,0,0,0,0,0,0,0,0
2,OPG Film Set 1/case3.jpg,0,0,0,0,0,0,0,0,0
3,OPG Film Set 1/case4.jpg,0,0,0,0,0,0,0,0,0
4,OPG Film Set 1/case5.jpg,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
96,OPG Film Set 3/KKU case 29.png,1,0,0,0,0,0,0,0,0
97,OPG Film Set 3/KKU case 30.png,0,1,0,0,0,0,0,0,0
98,OPG Film Set 3/KKU case 31.png,1,0,0,0,0,0,0,0,1
99,OPG Film Set 3/KKU case 32.png,0,0,0,0,0,0,0,0,1


In [47]:
df.to_csv(os.path.join('dataset', 'data.csv'), index=False)