In [None]:
import os
import pickle
import joblib
import numpy as np
import pandas as pd
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
from time import time
from math import sqrt
from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_score
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, LassoCV, Lasso
from sklearn.metrics import f1_score, make_scorer, accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from feature_selector import FeatureSelector
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectFromModel, SelectPercentile, f_classif
from sklearn.utils import class_weight
from matplotlib import rcParams
from boruta import BorutaPy
#from sklearn.externals import joblib
from imblearn.under_sampling import ClusterCentroids
from imblearn.over_sampling import SMOTE

## Utility functions for machine learning

In [None]:
def get_param_grid(clf_method):
    try:
        if clf_method == 'svm':
            clf = svm.SVC()
            params = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4, 1e-2], 'C': [1, 5, 10, 100]},
                      {'kernel': ['linear'], 'C': [1, 5, 10, 100]}]
        
        elif clf_method == 'randomforest':
            clf = RandomForestClassifier()
            params = [{
                'n_estimators': [100, 150, 200],
                'max_depth': [1, 2, 3],
                'bootstrap': [True],
                'max_features': ['auto', 'sqrt'],
                'min_samples_split': [5, 10]
                }] 
            
        else:
            print ('model type unkown!')
            return None
        
        return (clf, params)
        
    except :
        print ('exception occured in getting parameters!!')

def perform_cv(X, y, sampling_method, ml_method, num_folds, random_state):
    
    clf, params = get_param_grid(ml_method)
    cv_outer = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
    cv_inner = StratifiedKFold(n_splits=3, shuffle=True, random_state=random_state)  
    
    # enumerate splits
    fold_count = 0
    fold_scores = []
    pred_df = pd.DataFrame(index=X.index.values, columns=['truth', 'prediction'])
    
    
    for train_ix, test_ix in cv_outer.split(X, y):
                    
        print('fold', fold_count)
        X_train = X.iloc[train_ix]
        X_test  = X.iloc[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]

        # resample data
        if sampling_method == 'ccmut':
            print('\nccmut sampling')
            X_train, y_train = run_ccmut(X_train, y_train)
        elif sampling_method == 'under+over':
            print('\nunder+over sampling')
            X_train, y_train = under_and_over_sample(X_train, y_train)
        else:
            print('\nno resampling')
        
        print('trainset distribution of classes:', y_train.value_counts().to_dict())
        
        # tune and train
        print('tuning and training...')
        search = GridSearchCV(clf, params, scoring=scoring, cv=cv_inner, refit=True)
        gs = search.fit(X_train, y_train)
        print('tuning done..')
        best_clf = gs.best_estimator_
        inner_score = gs.best_score_
        print(gs.best_params_)

        # test 
        print('predicting..')
        yhat = best_clf.predict(X_test)
        if scoring == 'f1_macro':
            outer_score = f1_score(y_test, yhat, average='macro')
        elif scoring == 'f1_weighted':
            outer_score = f1_score(y_test, yhat, average='weighted')
        elif scoring == 'f1':
            outer_score = f1_score(y_test, yhat)
        else:
            outer_score = accuracy_score(y_test, yhat)
        
        
        # report progress
        print('inner %s=%.3f, outer %s=%.3f\n' % (scoring, inner_score, scoring, outer_score))
        fold_scores.append(outer_score)
        fold_count += 1
        pred_df.loc[X.index.values[test_ix], 'truth'] = y_test
        pred_df.loc[X.index.values[test_ix], 'prediction'] = yhat
        
    return fold_scores, pred_df 

def train_on_all_data(X, y, sampling_method, ml_method):
    
    clf, params = get_param_grid(ml_method)
    cv_inner = StratifiedKFold(n_splits=5, shuffle=True, random_state=i)  
    
    # resample data if specified
    if sampling_method == 'ccmut':
        print('ccmut sampling')
        X_train, y_train = run_ccmut(X_train, y_train)
    elif sampling_method == 'under+over':
        print('under+over sampling')
        X_train, y_train = under_and_over_sample(X_train, y_train)
    else:
        print('no resampling')

    # tune and train
    search = GridSearchCV(clf, params, scoring=scoring, cv=cv_inner, refit=True)
    gs = search.fit(X, y)
    best_clf = gs.best_estimator_
    inner_score = gs.best_score_
        
    # report progress
    print('best model, trained on all data, inner cv score f1=%.3f\n' % inner_score)
    
    # return trained model
    best_clf.fit(X, y)
    return best_clf

def run_repeated_cv(df, scale, fsmethod, sampling_method, ml_method, num_folds=5, num_trials=10):
    
    all_scores = []
    all_trial_counts = []
    all_fold_counts = []
    for tr in range(num_trials):
            
        print('- trial %d' % (tr+1))

        # dimensionality reduction, scaling & selecting features
        select_df, _ = feature_preprocessing(df, scale, fsmethod)

        # separate X and y
        y = select_df.loc[:, targetcol]
        X = select_df.drop(targetcol, axis=1)
        
        if X.shape[1] == 0:
            print('--> no features passed feature preprocessing!')
            continue

        # run cv
        scores, pred_df = perform_cv(X, y, sampling_method, ml_method, num_folds, tr)
        all_scores += scores
        all_trial_counts += [str(tr)] * num_folds
        all_fold_counts += [str(i) for i in range(num_folds)]
    
    # save results
    res_df = pd.DataFrame(index=np.arange(num_trials*num_folds))
    res_df['score'] = all_scores
    res_df['fold'] = all_fold_counts
    res_df['trial'] = all_trial_counts
    return res_df, pred_df



## Utility functions for feature selection

In [None]:
def get_correlations_df(df, labelvals):
    
    ftrnames = df.columns.values
    temp_df = df.copy()
    temp_df['target'] = labelvals
    cors = temp_df.corr()

    plt_df = pd.DataFrame(index=np.arange(len(ftrnames)),
                          columns=['feature', 'correlation'])
    plt_df['feature'] = ftrnames
    plt_df['correlation'] = cors.loc[ftrnames, 'target'].values
    
    plt_df.dropna(inplace=True)
    plt_df.sort_values(by=['correlation'], inplace=True)
    plt_df.reindex(np.arange(len(ftrnames)))
    return plt_df
    
def standardize_features(df, featurelist):
    """force features to have a 0 mean and unit std.
    this doesnt create a bounding range for values. 
    returns scaled df and the scaler(for use on test set)"""
    print('standardization feature scaling')
    scaler = StandardScaler()
    
    ftrVals = scaler.fit_transform(df)
    out_df = pd.DataFrame(ftrVals, columns=featurelist)
   
    return out_df, scaler

def normalize_features(df, featurelist):
    """force features to be between 0-1.
    also knowns as min-max scaling. 
    returns scaled df and the scaler(for use on test set)"""
    print('min-max feature scaling')
    scaler = MinMaxScaler()
    ftrVals = scaler.fit_transform(df)
    out_df = pd.DataFrame(ftrVals, columns=featurelist)
    return out_df, scaler

def do_nothing_scaling(df, featurelist):
    
    """this function exists for the sake of consistency and harmony
    doesn't do anything"""
    print('no feature scaling')
    
    out_df = pd.DataFrame(df, columns=featurelist)
    return out_df, None

def do_nothing_selection(df, y):
    
    print('no sampling')
    return df.columns.values
    
def remove_collinear_features(df, labelvals, missing_threshold=0.5, correlation_threshold=0.75):
    '''
    removes overly missing and redundant features based on their collinearity
    you can set what threshold to use for either one of the two.
    '''
    # remove redundant features
    fs = FeatureSelector(data=df, labels=labelvals)
    #fs.identify_missing(missing_threshold)
    fs.identify_collinear(correlation_threshold)
    
    temp_df = fs.remove(methods=['collinear'])
    print('%d non redundant features.' % len(temp_df.columns.values))
    return temp_df

def calculate_PCs(x, num):
    """Perform PCA on a list of features.

    returns #num PC covariates and pc values.
    """
    print ('calculating pcs..')
    pca = PCA(n_components=num, svd_solver='full')
    x_pca = pca.fit_transform(x)

    res = {}
    res['pca'] = pca
    res['pc_vals'] = x_pca
    
    print('%d pcs were calculated to explain %.2f variance'%(x_pca.shape[1], n_components))
    return res

def select_features_boruta(df, y):
    """
    Feature selection using random forest and burata method
    Uses cross validation to find the best rf model and gets the most important features

    Parameters
    ----------
    df : pandas dataframe 
        the dataset. Note: all columns will be considered in feature selection
    y : list
        target variable for optimization.

    Returns
    ------
    reduced list of features(length < df.shape[1]

    """
    print('feature selection using random forest and boruta..')
    
    feat_labels = df.columns.values
    
    # define all parameters including max #features using a grid search
    _, params = get_param_grid('randomforest')

    # parameter grid search
    gs = GridSearchCV(estimator=RandomForestClassifier(class_weight='balanced'), 
                      param_grid=params,
                      n_jobs = -1,
                      scoring=scoring,
                      cv=5)
    
    gs.fit(df, y)
    
    print('best model:', gs.best_params_)
    
    # configure SelectFromModel to select features
    # it needs a threshold for feature importance
    # use the default mean for now, but this could be optimized with cv
    
    # define Boruta feature selection method
    feat_selector = BorutaPy(gs.best_estimator_, n_estimators='auto', verbose=0, random_state=1)

    # find all relevant features
    feat_selector.fit(np.array(df.values), y)
    
    # iterate through features and find selected ones
    features = []
    for f, rank, support in zip(feat_labels, feat_selector.ranking_, feat_selector.support_):
        if support == True:
            print('Feature: {:<25} Rank: {}'.format(f, rank))
            features.append(f)
    
    print('\n%d features were selected.'%len(features))
    return features
    
def select_features_randomforest(df, y):
    """
    Feature selection using random forest feature importance
    Uses cross validation to find the best rf model and gets the most important features

    Parameters
    ----------
    df : pandas dataframe 
        the dataset. Note: all columns will be considered in feature selection
    y : list
        target variable for optimization.

    Returns
    ------
    reduced list of features(length <df.shape[1]

    """
    print('feature selection using random forest..')
    
    feat_labels = df.columns.values
    
    # define all parameters including max #features using a grid search
    params = [{'n_estimators': [int(x) for x in np.linspace(start=50, stop=500, num=5)],
               'max_depth': [int(x) for x in [3, 5, 7, 10]],
               'bootstrap': [True]}]

    # parameter grid search
    gs = GridSearchCV(estimator=RandomForestClassifier(), 
                      param_grid=params,
                      n_jobs = -1,
                      scoring='f1_micro',
                      cv=5)
    
    gs.fit(df, y)
    
    print('best model:', gs.best_params_)
    
    # configure SelectFromModel to select features
    # it needs a threshold for feature importance
    # use the default mean for now, but this could be optimized with cv
    
    print('select features using randomforest..')
    best_rf = RandomForestClassifier(**gs.best_params_)
    sfm = SelectFromModel(best_rf, threshold='median')
    sfm.fit(df, y)
    
    # Print the names of the most important features
    selected_ftrs = []
    for ftr_ind in sfm.get_support(indices=True):
        selected_ftrs.append(feat_labels[ftr_ind])
        
    print('%d features were selected by Random Forest.'%len(selected_ftrs))
    return selected_ftrs


def select_features_univariate(df, y, n=20):
    """Select features based on univariate F-test and 10 percentile.

    normalized log scores will be used to reorder features.
    return list of feature names that had a p-value less than alpha. 
    """
    print('select features based on univariate F-test..')
    
    ftr_indices = np.arange(df.shape[1])
    feature_names = df.columns.values
    
    selector = SelectPercentile(f_classif, percentile=10)
    selector.fit(df.values, y)
    
    scores = -np.log10(selector.pvalues_)
    scores /= scores.max()
    
    alpha = 0.05 # pvalue threshold (ideally should be adjusted for multiple comparison)
    indices_newOrder = [x for (y, x) in sorted(zip(selector.pvalues_, ftr_indices))]
    indices_significant = [i for i in indices_newOrder if selector.pvalues_[i] < alpha]    
    selected_ftrs = [feature_names[i] for i in indices_significant]
    
    print('%d features were selected using univariate analysis w threshold %.2f'%(len(selected_ftrs), alpha))
    return selected_ftrs

def select_features_coeffs(df, y):
    """
    Select features using Lasso regression. 
    
    parameter alpha defines how strict feature pruning is implemented.
    Here we use cross validation to select best alpha value for cv performance
    final alpha will be used to select features.
    """
    print('select features using Lasso regression..')
    # construct cv to find best alpha
    skf = StratifiedKFold(n_splits=5)
    model = LassoCV(cv=skf, normalize=True, max_iter=20000).fit(df, y)
    print('best alpha:', model.alpha_)

    # create a model with best alpha
    print('fit data to a lasso model..')
    clf = Lasso(alpha=model.alpha_)
    coeffs = clf.fit(df, y).coef_

    features = []
    for f, c in zip(df.columns.values, coeffs):
        if c != 0:
            features.append(f)
    
    print('%d features were selected by Lasso.'%len(features))
    return features

def feature_preprocessing(df, scale, fs):
        
    othercols = []
    
    indices = df.index.values
    
    # get X
    data_df = df[ftrcols].values

    # get y, process y here right after (categorical to binary, etc)
    y = df[targetcol].values
    
    # scale X 
    data_df, scaler = scaling_methods[scale](data_df, ftrcols)
    
    # add other columns to X (could be features that you want included, but dont want to scale)
    if othercols:
        for feature in othercols:
            data_df[feature] = df[feature].values
    
    # impute missing values using median
    data_df.fillna((data_df.median()), inplace=True)
    
    # remove redundant features
    data_df = remove_collinear_features(data_df, y)
    
    # remove unimportant features
    selected_features = fs_methods[fs](data_df, y)
    
    # get output
    out_df = data_df[selected_features].copy()
    out_df[targetcol] = y
    out_df['index'] = indices   
    out_df.set_index('index', inplace=True)
    return out_df, scaler


## Utility functions for sampling

In [None]:

def CCMUT(df, f):
    """Cluster Centroif based majority UnderSampling Technique or CCMUT.
    X: Matrix of Features from majority samples
    f: Percentage of samples that should be removed.
    
    WORKS ONLY ON BINARY
    """
    print('CCMUT..')
    # keep track of the original index values
    anon_ids = df.index.values
    X = df.values
    
    # 1. find cluster centroid : average feature vectors
    cluster_centroid = np.sum(X, axis=0) / X.shape[0]
    
    # 2. find Euclidean distance from cluster centroid to samples
    euclidean = [None] * X.shape[0]
    for i in range(0, X.shape[0]):
        euclidean[i] = sqrt(sum((cluster_centroid - X[i]) ** 2))
        
    # 3. track indices of samples in descending order of distance
    indices = list(reversed(sorted(range(len(euclidean)), key = lambda j: euclidean[j])))
    
    # 4. use the new order to rearrange anon ids
    anon_ids_r = [anon_ids[i] for i in indices]
    
    # 5. remove the low score instances
    X_f = np.delete(X, indices[:int(f * X.shape[0])], axis=0)
    anon_ids_f = np.delete(anon_ids_r, indices[:int(f * X.shape[0])], axis=0)
    
    # 5. return the under-sampled Majority Sample Matrix
    return pd.DataFrame(index=anon_ids_f, data=X_f, columns=df.columns.values)

def run_ccmut(df, y):
    """
    ONLY FOR BINARY DATA
    df is train dataframe, y is y_train:
    
    - identify majority and minority groups
    - undersample majority
    - merge with minority again
    - return balanced trainset dataframe"""
    
    # identify majority and minority labels and counts
    counts = y.value_counts().to_dict()
    
    [(min_class, min_n), (maj_class, maj_n)] = sorted(counts.items(), key=lambda x: x[1], reverse=False)
    print(min_class, min_n, '-', maj_class, maj_n)
    
    # measure % to be removed from majority
    f = float((maj_n - min_n) / maj_n)
    
    # skip resampling if class imbalance is negligible
    if f <= 0.1:
        print('classes are balanced (f = %.3f). skip resampling.' % f)
        return df, y

    print('%.3f perc of samples from group %d will be droped..' % (f, maj_class))
                
    # separate majority and minority dfs
    maj_ids = y[y == maj_class].index.values
    min_ids = y[y == min_class].index.values
    
    print('overlap of indices between min and maj groups', len([a for a in maj_ids if a in min_ids]))
    
    # run CCMUT on majority
    maj_r_df = CCMUT(df.loc[maj_ids, :], f)
    maj_r_df.set_index(maj_r_df.index.values, inplace=True)
    print('majority df after ccmut', maj_r_df.shape, 'has nans:', maj_r_df.isnull().values.any())
    
    # separate minority
    min_df = df.loc[min_ids, :]
    min_df.set_index(min_ids, inplace=True)
    print('minority df', min_df.shape, 'has nans:', min_df.isnull().values.any())
    
    # merge 
    df_r = pd.concat([maj_r_df, min_df])
    print('\nfinal df :', df_r.shape, 'has nans:', df_r.isnull().values.any())
  
    y_r = pd.concat([y[maj_r_df.index.values], y[min_ids]])
    print('final y :', y_r.value_counts().to_dict())
    
    # return merged df and labels
    return df_r , y_r

def sampling_strategy(X,y,n_samples, t='majority'):
    '''not my code. Got it from:
    https://towardsdatascience.com/how-to-deal-with-imbalanced-multiclass-datasets-in-python-fe0bb3f2b669
    '''
    target_classes = ''
    if t == 'majority':
        target_classes = y.value_counts() > n_samples
    elif t == 'minority':
        target_classes = y.value_counts() < n_samples
    tc = target_classes[target_classes == True].index
    
    sampling_strategy = {}
    for target in tc:
        sampling_strategy[target] = n_samples
    return sampling_strategy

def under_and_over_sample(X, y):
    
    count = y.value_counts()
    n_samples = int(count.median())
    
    print('counts before resampling:', count.values)

    under_sampler = ClusterCentroids(sampling_strategy=sampling_strategy(X, y, n_samples, t='majority'))
    X_under, y_under = under_sampler.fit_resample(X, y)

    
    
    over_sampler = SMOTE(sampling_strategy=sampling_strategy(X_under, y_under, n_samples, t='minority'), 
                         k_neighbors=2)
    X_bal, y_bal = over_sampler.fit_resample(X_under, y_under)
    count = y_bal.value_counts()
    print('counts after resampling:', count.values, '\n')
    
    return X_bal, y_bal

# main 

#### select settings that you'd like to run

In [None]:
## options for feature processing and feature selection

# feature scaling options
scaling_methods = {'none': do_nothing_scaling,
                   'normalize': normalize_features,
                   'standardize': standardize_features}

# feature selection options
fs_methods = {'rf':select_features_randomforest,
              'lasso':select_features_coeffs,
              'univariate':select_features_univariate,
              'boruta':select_features_boruta,
              'none':do_nothing_selection}


In [None]:
# settings that we will use (you can select from the above lists)
# this will define the folder name

compute_pca = False 
scaling_method = 'none'
featureselection_method = 'none'
sampling_method = 'under+over'
ml_method = 'randomforest'
global scoring
scoring = 'f1_weighted' # can also try f1_weighted, f1_macro, accuracy
num_folds = 5
num_trials = 10 # for repeated cross validation to get a more accurate estimate of performance

runname = '-'.join([s for s in [scaling_method,featureselection_method,sampling_method,ml_method,scoring]])

### Load data

In [None]:
foldername = 'classification-BulkCluster'

global targetcol
targetcol = 'BulkCluster'

# disable warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    warnings.filterwarnings('ignore')

# paths
csvname = '__.csv'
input_dir = os.getcwd() 
csvpath = os.path.join(input_dir, csvname)
output_dir = os.path.join(input_dir, foldername, runname)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
print('input_dir:', input_dir)
print('output_dir:', output_dir)

# create sex specific folders for outputs
cohorts = ['all', 'M', 'F']
for sex in cohorts:
    sex_spec_output_dir = os.path.join(output_dir, sex)
    if not os.path.exists(sex_spec_output_dir):
        os.makedirs(sex_spec_output_dir)

# read dataframe
indexcol = 'SampleID'
sexcol = 'Sex'
global ftrcols
ftrcols = ['Enhancing','RCBV.Raw_Mean', 'RCBV.Raw_Std', 'FA.Raw_Std', 
           'FA.Raw_Mean', 'MD.Raw_Mean', 'MD.Raw_Std', 'EPI.Raw_Mean', 
           'EPI.Raw_Std', 'CenterFecsT2', 'MeanFecsT2', 'StdFecsT2']
df = pd.read_csv(csvpath)

df[targetcol].replace(['A', 'B', 'C'], [0, 1, 2], inplace=True)
df['Enhancing'].replace(['ENH', 'BAT', 'NEC', 'Central Cyst/Cavity'], [0, 1, 2, 3], inplace=True)

# exclude additional columns
df = df.loc[:, ftrcols+[sexcol, targetcol, indexcol]]
df.set_index(indexcol, inplace=True)
df.dropna(inplace=True)

print(df.shape)
print(df.head())

### ----- repeated cv for evaluation of machine learning performance -----

#### step 1 - training and testing

In [None]:
# run modeling for cohorts
warnings.filterwarnings('ignore')
for sex in cohorts:
    print('\n', '-'*10, sex, '-'*10)

    sexed_input_dir = os.path.join(output_dir, sex)
    
    results_dir = os.path.join(sexed_input_dir, 'ml_results')
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    
    savecvpredpath = os.path.join(results_dir, 'cvpredictions.csv') 
    
    savecvrespath = os.path.join(results_dir, 'cvresult.csv') 
    if os.path.exists(savecvrespath):
        print('cvresult.csv exists!')
        continue
        
    # select cohort
    if sex == 'all': 
        sex_df = df.copy()
    else: 
        sex_df = df[df[sexcol] == sex]

    results_df, prediction_df = run_repeated_cv(sex_df, scaling_method, featureselection_method, 
                                                sampling_method, ml_method, num_folds, num_trials)
    
    results_df.to_csv(savecvrespath, index=False)
    prediction_df.to_csv(savecvpredpath, index=True)
    
    print(savecvrespath)

#### step 2 - merge all results and plot

In [None]:
# merge results
merged_results_df = pd.DataFrame(columns=['cohort', 'fold', 'trial', 'score'],
                                 index=np.arange(num_folds*num_trials*len(cohorts)))
print(merged_results_df.shape)
start_index = 0
for sex in cohorts:
    print('merging %s' % sex)
    csvpath = os.path.join(output_dir, sex, 'ml_results', 'cvresult.csv')
    df = pd.read_csv(csvpath)
    
    end_index = start_index + num_folds * num_trials - 1
    print(df.shape,start_index, end_index)
    
    for col in df.columns.values:
        merged_results_df.loc[start_index:end_index, col] = df[col].values
    
    merged_results_df.loc[start_index:end_index, 'cohort'] = sex
    start_index = end_index

merged_results_df.dropna(inplace=True)

# plot
fig_save_dir = output_dir
sns.set_theme(style="whitegrid")
    
fig = plt.figure(figsize=(len(cohorts)*2, 3))
ax = fig.add_subplot(1, 1, 1)
ax = sns.boxplot(y='score', x='cohort', data=merged_results_df, palette='Blues')
plt.ylabel(scoring, fontsize=12)
plt.xlabel('cohort', fontsize=12)
plt.title('random forest performance in predicting %s'%targetcol, fontsize=15)
ax.set_ylim([0., 1.0])
plt.show()
    
fig.savefig(os.path.join(fig_save_dir, 'cv_summary.jpg'), bbox_inches='tight', pad_inches=1, dpi=300)
plt.close()

#### plot per class classification performance results

In [None]:
from sklearn.metrics import plot_confusion_matrix, ConfusionMatrixDisplay, confusion_matrix
def accuracy (truth, pred):
    correct = sum([1 for i in range(len(truth)) if pred[i] == truth[i]])
    return float(correct) / len(truth)



fig = plt.figure(figsize=(len(cohorts)*4, 3))

for n, sex in enumerate(cohorts):
    ax = fig.add_subplot( 1, 3, n+1)
    predictioncsvpath = os.path.join(output_dir, sex, 'ml_results','cvpredictions.csv')
    pred_df = pd.read_csv(predictioncsvpath)
    
    y_test = list(pred_df['truth'].values)
    y_pred = list(pred_df['prediction'].values)
    ax.set_title('%s samples\noverall perc acc = %.1f' % (sex, accuracy(y_test, y_pred) * 100))

        
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['A', 'B', 'C'])
    disp = disp.plot(include_values=True, cmap='Purples', xticks_rotation='horizontal', ax=ax, colorbar='')
    plt.grid(b=None)
    
    ax.set_ylabel('True label')
ax.set_xlabel('Predicted label')
plt.show()
    
fig.savefig(os.path.join(fig_save_dir, 'confmat.jpg'), bbox_inches='tight', pad_inches=0.1, dpi=300)

plt.close('all')
    
t1 = time()
print('\n\n------------time taken for this run:', t1 - t0)