In [1]:
import time
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from joblib import dump
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix)
from sklearn.metrics import roc_auc_score, average_precision_score
from RefinedRandomForest import RefinedRandomForest

In [2]:
x_train = np.load('../feature_extraction/ResNext101/features_ResNext101_train.npy')
y_train = np.load('../feature_extraction/ResNext101/labels_ResNext101_train.npy')
x_valid = np.load('../feature_extraction/ResNext101/features_ResNext101_valid.npy')
y_valid = np.load('../feature_extraction/ResNext101/labels_ResNext101_valid.npy')
x_train = np.vstack((x_train, x_valid))
y_train = np.vstack((y_train, y_valid))
x_test = np.load('../feature_extraction/ResNext101/features_ResNext101_test.npy')
y_test = np.load('../feature_extraction/ResNext101/labels_ResNext101_test.npy')

In [6]:
######### 29 random forest validation ###########
rf_results = np.zeros_like(y_test)
for i in range(y_test.shape[1]):
    rfc = joblib.load(f'saved_model/rfc_{i}.joblib')
    y_test_prob = rfc.predict_proba(x_test)
    y_test_prob = np.array(y_test_prob)
    pred = y_test_prob[:,1]
    rf_results[:,i] = pred

In [7]:
auc1 = roc_auc_score(y_test[:,0], rf_results[:,0])
print(f'AUC of Challenge 1: {auc1}')
diseases_label = y_test[:,1:]
diseases_pred = rf_results[:,1:]
auc2 = roc_auc_score(diseases_label, diseases_pred)
print(f'AUC of Challenge 2: {auc2}')
mAP = average_precision_score(diseases_label, diseases_pred)
print(f'mAP of Challenge 2: {mAP}')
C1_Score = auc1
C2_Score = mAP * 0.5 + auc2 * 0.5
final_Score =  C2_Score * 0.5 + C1_Score * 0.5
print(f'C1 Score: {C1_Score} C2 Score: {C2_Score} Final Score: {final_Score}')

AUC of Challenge 1: 0.9736593711285471
AUC of Challenge 2: 0.8654896513860499
mAP of Challenge 2: 0.4997324385575243
C1 Score: 0.9736593711285471 C2 Score: 0.6826110449717872 Final Score: 0.8281352080501672


In [8]:
######### 29 random forest validation ###########
rrf_results = np.zeros_like(y_test)
aucs = np.load('aucs.npy')
optimal = aucs.argmax(axis=1)
for i in range(y_test.shape[1]):
    if optimal[i]==0:
        rfc = joblib.load(f'saved_model/rfc_{i}.joblib')
        y_test_prob = rfc.predict_proba(x_test)
        y_test_prob = np.array(y_test_prob)
        pred = y_test_prob[:,1]
        rrf_results[:,i] = pred
    else:
        rrfc = joblib.load(f'saved_model/rrfc/rrfc_{i}.joblib')
        pred = rrfc.predict_proba(x_test)[:,1]
        rrf_results[:,i] = pred
        

In [9]:
auc1 = roc_auc_score(y_test[:,0], rrf_results[:,0])
print(f'AUC of Challenge 1: {auc1}')
diseases_label = y_test[:,1:]
diseases_pred = rrf_results[:,1:]
auc2 = roc_auc_score(diseases_label, diseases_pred)
print(f'AUC of Challenge 2: {auc2}')
mAP = average_precision_score(diseases_label, diseases_pred)
print(f'mAP of Challenge 2: {mAP}')
C1_Score = auc1
C2_Score = mAP * 0.5 + auc2 * 0.5
final_Score =  C2_Score * 0.5 + C1_Score * 0.5
print(f'C1 Score: {C1_Score} C2 Score: {C2_Score} Final Score: {final_Score}')

AUC of Challenge 1: 0.9736593711285471
AUC of Challenge 2: 0.8996801448187943
mAP of Challenge 2: 0.49453484839183764
C1 Score: 0.9736593711285471 C2 Score: 0.697107496605316 Final Score: 0.8353834338669315


In [11]:
np.save('rf_results.npy', rf_results)
np.save('rrf_result.npy', rrf_results)