In [2]:
import pandas as pd
import numpy as np

from sklearn import metrics
from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
def group_f1_b(df): 
    label = np.array(df.Class_b.tolist(), dtype=int)
    pred = np.array(df.Pred_b.tolist(), dtype=float)
    pred = np.where(pred > 0.5, 1, 0)
    score = metrics.f1_score(label, pred, average='micro')
    return score

def group_kappa_b(df): 
    label = np.array(df.Class_b.tolist(), dtype=int)
    pred = np.array(df.Pred_b.tolist(), dtype=float)
    pred = np.where(pred > 0.5, 1, 0)
    score = metrics.cohen_kappa_score(label, pred)
    return score

def group_acc_b(df): 
    label = np.array(df.Class_b.tolist(), dtype=int)
    pred = np.array(df.Pred_b.tolist(), dtype=float)
    pred = np.where(pred > 0.5, 1, 0)
    score = metrics.accuracy_score(label, pred)
    return score

def group_auc_b(df): 
    label = np.array(df.Class_b.tolist(), dtype=int)
    pred = np.array(df.Pred_b.tolist(), dtype=float)
    score = metrics.roc_auc_score(label, pred)
    return score

In [4]:
from scipy.special import softmax

def binary_cls(cls):
    if cls == 0 or cls == 1:
        return 0
    elif cls == 2 or cls == 3:
        return 1
    else:
        return None
    
def binary_pred(pred):
    pred = np.array(pred.split(','), dtype=float)
    pred_b = np.array([pred[0]+pred[1], pred[2]+pred[3]])
    pred_b = softmax(pred_b)
    return pred_b[1] # the probability to greater label

## 1. Independent training

In [17]:
dfs = []
for mb in range(18): 
    for i in range(5):
        dfs.append(pd.read_csv('../results/molnet_chirality_cls_etkdg_csp{}-5fold_{}.csv'.format(str(mb), str(i)), 
                               sep='\t', index_col=0))
    df = pd.concat(dfs, ignore_index=True)

Convert quadra classification into binary classification: 

In [18]:
df['Class_b'] = df['Class'].apply(binary_cls)
df['Pred_b'] = df['Pred'].apply(binary_pred)

In [8]:
auc = df.groupby('MB').apply(group_auc_b)
acc = df.groupby('MB').apply(group_acc_b)
kappa = df.groupby('MB').apply(group_kappa_b)
f1 = df.groupby('MB').apply(group_f1_b)

In [9]:
print('AUC:', '\n'+'\n'.join(auc.astype(str).tolist()), '\n')
print('ACC:', '\n'+'\n'.join(acc.astype(str).tolist()), '\n')
print('KAPPA:', '\n'+'\n'.join(kappa.astype(str).tolist()), '\n')
print('F1:', '\n'+'\n'.join(f1.astype(str).tolist()), '\n')

AUC: 
0.9057808857808858
0.9033212329793456
0.8952841427940758
0.8891244590010527
0.9085682606820935
0.8904499178981937
0.8124318440678137
0.9491153856386517
0.7722950509082925
0.7927883862283661
0.8420768086239215
0.7496803696794311
0.9239179025198657
0.7925037490950462
0.8691141570741012
0.8216904625928041
0.8747088973076201
0.7923774039403078 

ACC: 
0.8956896551724138
0.8615384615384616
0.8739864864864865
0.8186813186813187
0.8808988764044944
0.8827868852459017
0.8618343195266273
0.9010309278350516
0.8664465538588526
0.8818359375
0.8766233766233766
0.8482758620689655
0.9161676646706587
0.847926267281106
0.86
0.93
0.8660869565217392
0.8237410071942446 

KAPPA: 
0.7546582765250831
0.7230127018944135
0.7190295822089393
0.6324771261987634
0.7208244336654708
0.694487251330905
0.6606887108155124
0.8018628551366821
0.626491209751056
0.6424498464805966
0.6825561379230536
0.5873568716821373
0.7988176882104412
0.5744582231001263
0.6863880215277598
0.6737019293020847
0.6952900723234433
0.5265

## 2. Transfer learning

In [5]:
dfs = []
for mb in range(18): 
    for i in range(5):
        dfs.append(pd.read_csv('../results/molnet_chirality_cls_etkdg_csp{}-5fold_tl_{}.csv'.format(str(mb), str(i)), 
                               sep='\t', index_col=0))
    df = pd.concat(dfs, ignore_index=True)

In [6]:
df['Class_b'] = df['Class'].apply(binary_cls)
df['Pred_b'] = df['Pred'].apply(binary_pred)

In [7]:
auc = df.groupby('MB').apply(group_auc_b)
acc = df.groupby('MB').apply(group_acc_b)
kappa = df.groupby('MB').apply(group_kappa_b)
f1 = df.groupby('MB').apply(group_f1_b)

In [8]:
print('AUC:', '\n'+'\n'.join(auc.astype(str).tolist()), '\n')
print('ACC:', '\n'+'\n'.join(acc.astype(str).tolist()), '\n')
print('KAPPA:', '\n'+'\n'.join(kappa.astype(str).tolist()), '\n')
print('F1:', '\n'+'\n'.join(f1.astype(str).tolist()), '\n')

AUC: 
0.9210753570130602
0.9056131448592486
0.8902443140231847
0.9222745675018635
0.9052515278290473
0.8986084018676626
0.8570461679887467
0.958475004655865
0.8035248213538786
0.8075753438708568
0.8378784453018445
0.7999471278990895
0.9239899080240069
0.8017578712901089
0.911137516237768
0.8398149568933673
0.8735790494991003
0.7831831038942306 

ACC: 
0.9120689655172414
0.8648351648351649
0.8881756756756757
0.8571428571428571
0.897003745318352
0.8860655737704918
0.8826923076923077
0.9206185567010309
0.8775897647544366
0.8916015625
0.8844155844155844
0.8651542649727768
0.9119760479041916
0.8682027649769585
0.8856
0.9408333333333333
0.8639130434782609
0.8273381294964028 

KAPPA: 
0.7951814319989198
0.7295527581124508
0.7482346411207845
0.7123012495318511
0.7576247922851898
0.7079972724830735
0.718306395420327
0.8408997715423818
0.6617757317894766
0.6739669193912581
0.6954538047906142
0.6443832468694796
0.7910129347405986
0.6356538177036246
0.7482543286171803
0.7394782225810002
0.69142152