In [1]:
import ml.ML_util as ML_util

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import matthews_corrcoef

In [2]:
PREDS_PATH = 'ml/preds_rf_with_dev.npy'
DEV_PREDS_PATH = 'ml/preds_dev_rf_with_dev.npy'
CSV_PATH = 'ml/results_rf_with_dev.csv'
DATASETS_PATH = 'data_sets_dev_170322/'
DEST = 'ml/results_rf_with_dev_up.csv'

N_SEED = 2
SEEDS = np.arange(N_SEED)
N_CHUNKS = 8
N_MOD = 3
THRS = np.linspace(0.05, 0.95, 19)

In [3]:
data_path = f"{DATASETS_PATH}complete_{0}/morphological/0_0.650.150.2/"
_, dev, test, _, _, _ = ML_util.get_dataset(data_path)
targets = [row[0][-1] for row in test]
dev_targets = [row[0][-1] for row in dev]

targets = np.asarray(targets)
dev_targets = np.asarray(dev_targets)

In [5]:
preds = np.load(PREDS_PATH)
dev_preds = np.load(DEV_PREDS_PATH)

In [7]:
def get_modality_cs(mat, n_m, n_cs):
    mat_cs = mat[n_cs::N_CHUNKS]
    mat_ret = mat_cs[n_m::N_MOD]
    return mat_ret

def choose_thrs(mccs):
    mid = 9
    ind = -1
    max_mcc = mccs.max()
    if max_mcc == mccs[mid]:
        return 0.5
    for i, val in enumerate(mccs):
        if val != max_mcc:
            continue
        if ind == -1:
            ind = i
        else:
            if abs(mid - i) < abs(mid - ind):
                ind = i
    return THRS[ind]

In [8]:
dev_mccs = np.zeros((THRS.size, dev_preds.shape[0]))
for i, threshold in enumerate(THRS):
    dev_bin_preds = (dev_preds >= threshold).astype('int8')
    dev_mccs_thr = np.asarray([matthews_corrcoef(dev_targets, bin_pred) for bin_pred in dev_bin_preds])
    dev_mccs[i] = dev_mccs_thr
dev_mccs = dev_mccs.T
new_thrs = np.expand_dims(np.asarray([choose_thrs(dev_mcc) for dev_mcc in dev_mccs]), axis=1)

In [9]:
test_bin_preds = (preds >= new_thrs).astype('int8')
test_mccs = np.asarray([matthews_corrcoef(targets, bin_pred) for bin_pred in test_bin_preds])

In [11]:
df = pd.read_csv(CSV_PATH)
df['thr_mcc'] = test_mccs

df.to_csv(path_or_buf=DEST, index=False, header=df.columns)