In [1]:
import os
import csv

data_folder = '/media/QNAP/People/GongyuZhang/ROP/data/raw/image/'

with open('../src/data/data_split.csv', 'r') as f:
    content = list(csv.reader(f))
    grading_header = content[0]
    grading_data = content[1:]
 

In [2]:
list(enumerate(grading_header))

[(0, 'file'),
 (1, 'subset'),
 (2, 'CH'),
 (3, 'JT'),
 (4, 'KP'),
 (5, 'SB'),
 (6, 'GA'),
 (7, 'group')]

In [3]:
all_gradings = {}
for row in grading_data:
    for grader, grading in zip(grading_header[2:], row[2:]):
        all_gradings[row[0], grader] = grading

In [4]:
with open('model_classification_all_files.csv', 'r') as f:
    content = list(csv.reader(f))
    model_header = content[0]
    model_data = content[1:]

In [5]:
list(enumerate(model_header))

[(0, 'file'), (1, 'model'), (2, 'normal'), (3, 'pre-plus'), (4, 'plus')]

In [6]:
model_gradings = {}    
for file, model, normal, pre_plus, plus in model_data:
    model_gradings[file, model, 'normal'] = normal
    model_gradings[file, model, 'pre-plus'] = pre_plus
    model_gradings[file, model, 'plus'] = plus

In [6]:
model_normal_scores = {r[0]: float(r[2]) for r in model}

In [84]:
model_scores_grading = {
    'normal': {r[0]: float(r[2]) for r in model},
    'pre-plus': {r[0]: float(r[3]) for r in model},
    'plus': {r[0]: float(r[4]) for r in model},
    'ungradable': {r[0]: 0 for r in model}
}

[(0, 'file'),
 (1, 'subset'),
 (2, 'CH'),
 (3, 'JT'),
 (4, 'KP'),
 (5, 'SB'),
 (6, 'GA'),
 (7, 'group')]

In [86]:
ga_grading = {r[0]: r[6] for r in grading}
group_grading = {r[0]: r[7] for r in grading}

In [87]:
from sklearn.metrics import roc_auc_score

In [88]:
label_names = 'normal', 'pre-plus', 'plus', 'ungradable'

In [89]:
ref = []
pred = []
for k, n in model_normal_scores.items():
    if ga_grading[k] == 'ungradable':
        continue
    ref.append(1 if ga_grading[k] == 'normal' else 0) 
    pred.append(n) 

roc_auc_score(ref, pred)

0.9317875107348792

In [93]:
for label_name in label_names:
    
    ref = []
    pred = []
    for k, n in model_scores_grading[label_name].items():
        ref.append(1 if ga_grading[k] == label_name else 0) 
        pred.append(n) 

    try:
        auc = roc_auc_score(ref, pred)
    except:
        continue
    print(label_name, auc)

normal 0.9317875107348792
pre-plus 0.8174603174603174
plus 0.9405956362478103


In [94]:
for label_name in label_names:
    
    ref = []
    pred = []
    for k, n in model_scores_grading[label_name].items():
        ref.append(1 if group_grading[k] == label_name else 0) 
        pred.append(n) 

    try:
        auc = roc_auc_score(ref, pred)
    except:
        continue
    print(label_name, auc)

normal 0.9573999999999999
pre-plus 0.7402351669144479
plus 0.9489406500855375


In [68]:
filenames = [r[0] for r in model if r[1] == 'ensemble']
ensemble_scores = {r[0]: ['normal', 'pre-plus', 'plus'][np.argmax([float(x) for x in r[2:]])]
                   for r in model if r[1] == 'ensemble'}

model_scores = np.array([ensemble_scores[filename] for filename in filenames])
ga_scores = np.array([ga_grading[filename] for filename in filenames])
group_scores = np.array([group_grading[filename] for filename in filenames])

In [78]:
from IPython.display import Markdown
import numpy as np

def disp_table(reference_scores, obs_scores):
    table = ''

    
    template = ('| ' + '{:12} | ' * (len(label_names) + 2) + '\n' )
    table += template.format(* (['ref \ model'] + list(label_names) + ['']))
    table += ' :-: '.join(['|'] * (len(label_names) + 3)) + '\n' 
    totals = np.zeros(len(label_names))

    for ref_name in label_names:
        ref_items = reference_scores == ref_name
        c = []
        for obs_name in label_names:
            obs_items = obs_scores == obs_name
            c.append(np.count_nonzero(ref_items & obs_items))

        totals += c
        table += template.format(*([ref_name] + c + [sum(c)]))

    table += template.format(*(['&nbsp;'] + list(totals) + ['']))

    display(Markdown(table))

In [72]:
disp_table(ga_scores, model_scores)

| ref \ model  | normal       | pre-plus     | plus         | ungradable   |              | 
| :-: | :-: | :-: | :-: | :-: | :-: |
| normal       |          132 |            0 |           11 |            0 |          143 | 
| pre-plus     |           13 |            0 |            5 |            0 |           18 | 
| plus         |            9 |            0 |           30 |            0 |           39 | 
| ungradable   |            0 |            0 |            0 |            0 |            0 | 
| &nbsp;       |        154.0 |          0.0 |         46.0 |          0.0 |              | 


In [73]:
disp_table(group_scores, model_scores)

| ref \ model  | normal       | pre-plus     | plus         | ungradable   |              | 
| :-: | :-: | :-: | :-: | :-: | :-: |
| normal       |          100 |            0 |            0 |            0 |          100 | 
| pre-plus     |           41 |            0 |            8 |            0 |           49 | 
| plus         |           13 |            0 |           38 |            0 |           51 | 
| ungradable   |            0 |            0 |            0 |            0 |            0 | 
| &nbsp;       |        154.0 |          0.0 |         46.0 |          0.0 |              | 


In [74]:
disp_table(group_scores, ga_scores)

| ref \ model  | normal       | pre-plus     | plus         | ungradable   |              | 
| :-: | :-: | :-: | :-: | :-: | :-: |
| normal       |          100 |            0 |            0 |            0 |          100 | 
| pre-plus     |           29 |           15 |            5 |            0 |           49 | 
| plus         |           14 |            3 |           34 |            0 |           51 | 
| ungradable   |            0 |            0 |            0 |            0 |            0 | 
| &nbsp;       |        143.0 |         18.0 |         39.0 |          0.0 |              | 
