In [22]:
import pandas as pd
from sklearn import metrics

In [23]:
def evaluate(y_pred, y_true):
    acc = metrics.accuracy_score(y_true, y_pred)
    prec = metrics.precision_score(y_true, y_pred, average='macro')
    rec = metrics.recall_score(y_true, y_pred, average='macro')
    f1 = metrics.f1_score(y_true, y_pred, average='macro')
    
    return acc, prec, rec, f1

In [24]:
def process(predictions):
    records = []
    for fcsv in predictions:
        df = pd.read_csv(fcsv)
        train = df[df['split']=='train']
        (acc, prec, rec, f1) = evaluate(train['y_pred'],
                                              train['y_true'])
        record = dict(method=method,
                     pct_missing=pct_missing,
                     split='train',
                     label='true',
                     acc=acc,
                     prec=prec,
                     rec=rec,
                     f1=f1)
        records.append(record)
        (acc_lab, prec_lab, rec_lab, f1_lab) = evaluate(train['y_pred'],
                                                        train['y_label'])
        record_lab = dict(method=method,
                     pct_missing=pct_missing,
                     split='train',
                     label='label',
                     acc=acc_lab,
                     prec=prec_lab,
                     rec=rec_lab,
                     f1=f1_lab)
        records.append(record_lab)

        test = df[df['split']=='test']
        (acc_val, prec_val, rec_val, f1_val) = evaluate(test['y_pred'], test['y_true'])
        record_val = dict(method=method,
                     pct_missing=pct_missing,
                     split='test',
                     label='true',
                     acc=acc_val,
                     prec=prec_val,
                     rec=rec_val,
                     f1=f1_val)
        records.append(record_val)
    return records

In [11]:
dataset = 'cifar110'
model = 'vgg8'
method = 'complete'
pct_missing = 0.
rounds = ['round1']
predictions = [
    'logs/{}/prediction_{}_{}.csv'.format(rnd, dataset, model)
    for rnd in rounds]
evaluation = 'logs/evaluation_{}_{}.csv'.format(dataset, model)
print(predictions)
records = process(predictions)
dfeval = pd.DataFrame(records)
print(dfeval)
dfeval.to_csv(evaluation, index=False, mode='a')

['logs/round1/prediction_cifar110_vgg8.csv']
       acc        f1  label    method  pct_missing      prec       rec  split
0  0.90655  0.892779   true  complete          0.0  0.929700  0.865649  train
1  0.72402  0.619262  label  complete          0.0  0.510247  0.840993  train
2  0.86665  0.841869   true  complete          0.0  0.888834  0.810482   test


In [28]:
dataset = 'cifar110'
model = 'vgg8'
method = 'pu'
pct_missing = 0.5
rounds = ['round1','round2','round3']
rounds = ['round2','round3']
# rounds = ['round2', 'round3', 'round4', 'round5','round6','round7','round8']
predictions = [
    'logs/{}/prediction_{}_{}_{}_{}.csv'.format(rnd, dataset, model, method, pct_missing)
    for rnd in rounds]
evaluation = 'logs/evaluation_{}_{}.csv'.format(dataset, model)
print(predictions)
records = process(predictions)
dfeval = pd.DataFrame(records)
print(dfeval)
dfeval.to_csv(evaluation, index=False, mode='a')

['logs/round2/prediction_cifar110_vgg8_pu_0.5.csv', 'logs/round3/prediction_cifar110_vgg8_pu_0.5.csv']
       acc        f1  label method  pct_missing      prec       rec  split
0  0.67825  0.531185   true     pu          0.5  0.953374  0.417831  train
1  0.78125  0.510123  label     pu          0.5  0.618192  0.469572  train
2  0.66105  0.497196   true     pu          0.5  0.941629  0.388309   test
3  0.67496  0.529849   true     pu          0.5  0.950470  0.411865  train
4  0.79361  0.534090  label     pu          0.5  0.639168  0.486366  train
5  0.65400  0.486697   true     pu          0.5  0.938322  0.375082   test


In [29]:
print(dfeval[dfeval['split']=='test'].mean())
print(dfeval[dfeval['split']=='test'].std())
print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].mean())
print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].std())

acc            0.657525
f1             0.491947
pct_missing    0.500000
prec           0.939976
rec            0.381695
dtype: float64
acc            0.004985
f1             0.007423
pct_missing    0.000000
prec           0.002338
rec            0.009353
dtype: float64
acc            0.676605
f1             0.530517
pct_missing    0.500000
prec           0.951922
rec            0.414848
dtype: float64
acc            0.002326
f1             0.000945
pct_missing    0.000000
prec           0.002054
rec            0.004218
dtype: float64


In [97]:
dataset = 'cifar110'
model = 'vgg8'
method = 'hb'
pct_missing = 0.5
rounds = ['round1']
rounds = ['round2', 'round3']
predictions = [
    'logs/{}/prediction_{}_{}_{}_{}.csv'.format(rnd, dataset, model, method, pct_missing)
    for rnd in rounds]
evaluation = 'logs/evaluation_{}_{}.csv'.format(dataset, model)
print(predictions)
records = process(predictions)
dfeval = pd.DataFrame(records)
print(dfeval)
dfeval.to_csv(evaluation, index=False, mode='a')

['logs/round2/prediction_cifar110_vgg8_hb_0.5.csv', 'logs/round3/prediction_cifar110_vgg8_hb_0.5.csv']
       acc        f1  label method  pct_missing      prec       rec  split
0  0.82025  0.807477   true     hb          0.5  0.777370  0.843822  train
1  0.64934  0.566379  label     hb          0.5  0.451396  0.858840  train
2  0.79975  0.779914   true     hb          0.5  0.753525  0.812800   test
3  0.82095  0.807968   true     hb          0.5  0.785751  0.839498  train
4  0.65348  0.569285  label     hb          0.5  0.456205  0.854135  train
5  0.79900  0.778309   true     hb          0.5  0.760227  0.807264   test


In [98]:
print(dfeval[dfeval['split']=='test'].mean())
print(dfeval[dfeval['split']=='test'].std())
print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].mean())
print(dfeval[(dfeval['split']=='train') & (dfeval['label']=='true')].std())

acc            0.799375
f1             0.779111
pct_missing    0.500000
prec           0.756876
rec            0.810032
dtype: float64
acc            0.000530
f1             0.001135
pct_missing    0.000000
prec           0.004739
rec            0.003915
dtype: float64
acc            0.820600
f1             0.807723
pct_missing    0.500000
prec           0.781560
rec            0.841660
dtype: float64
acc            0.000495
f1             0.000347
pct_missing    0.000000
prec           0.005926
rec            0.003057
dtype: float64


In [25]:
dataset = 'cifar110'
model = 'vgg8'
method = 'pu'
pct_missing = 0.5
predictions = ['prediction_{}_{}_{}_{}.csv'.format(dataset, model, method, pct_missing)]
records = process(predictions)
dfeval = pd.DataFrame(records)
print(dfeval)

       acc        f1  label method  pct_missing      prec       rec  split
0  0.67496  0.529849   true     pu          0.5  0.950470  0.411865  train
1  0.79361  0.534090  label     pu          0.5  0.639168  0.486366  train
2  0.65400  0.486697   true     pu          0.5  0.938322  0.375082   test
